From 4d143aca324356e22ba546cb195e35f1eb816ece Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Mon, 3 May 2010 04:18:14 +0000 Subject: [PATCH] Pushed Rintze's Google Books update --- translators/Google Books.js | 136 ++++++++++++++++++++++-------------- 1 file changed, 83 insertions(+), 53 deletions(-) diff --git a/translators/Google Books.js b/translators/Google Books.js index ca7432ea9..18d836599 100644 --- a/translators/Google Books.js +++ b/translators/Google Books.js @@ -1,19 +1,36 @@ { "translatorID":"3e684d82-73a3-9a34-095f-19b112d88bbf", - "translatorType":4, "label":"Google Books", "creator":"Simon Kornblith, Michael Berkowitz and Rintze Zelle", "target":"^http://(books|www)\\.google\\.[a-z]+(\\.[a-z]+)?/books\\?(.*id=.*|.*q=.*)", - "minVersion":"1.0.0b3.r1", + "minVersion":"2.0b7", "maxVersion":"", "priority":100, - "inRepository":true, - "lastUpdated":"2010-01-12 11:25:00" + "inRepository":"1", + "translatorType":4, + "lastUpdated":"2010-05-03 04:20:00" } +/* +The various types of Google Books URLs are: + +Search results - List view +http://books.google.com/books?q=asimov&btnG=Search+Books + +Search results - Cover view +http://books.google.com/books?q=asimov&btnG=Search%20Books&rview=1 + +Single item - URL with "id" +http://books.google.com/books?id=skf3LSyV_kEC&source=gbs_navlinks_s +http://books.google.com/books?hl=en&lr=&id=Ct6FKwHhBSQC&oi=fnd&pg=PP9&dq=%22Peggy+Eaton%22&ots=KN-Z0-HAcv&sig=snBNf7bilHi9GFH4-6-3s1ySI9Q#v=onepage&q=%22Peggy%20Eaton%22&f=false + +Single item - URL with "vid" (see http://code.google.com/apis/books/docs/static-links.html) +http://books.google.com/books?printsec=frontcover&vid=ISBN0684181355&vid=ISBN0684183951&vid=LCCN84026715#v=onepage&q&f=false + +*/ function detectWeb(doc, url) { - var re = new RegExp('^http://(books|www)\\.google\\.[a-z]+(\.[a-z]+)?/books\\?id=([^&]+)', 'i'); + var re = new RegExp('^http://(books|www)\\.google\\.[a-z]+(\.[a-z]+)?/books\\?(.*&)?(id|vid)=([^&]+)', 'i'); if(re.test(doc.location.href)) { return "book"; } else { @@ -34,9 +51,13 @@ function doWeb(doc, url) { var uri = doc.location.href; var newUris = new Array(); - var re = new RegExp('^http://(?:books|www)\\.google\\.[a-z]+(\.[a-z]+)?/books\\?id=([^&]+)', 'i'); + var re = new RegExp('^http://(?:books|www)\\.google\\.[a-z]+(?:\.[a-z]+)?/books\\?(?:.*&)?(id|vid)=([^&]+)', 'i'); var m = re.exec(uri); - if(m) { + if(m && m[1] == "id") { + newUris.push("http://books.google.com/books/feeds/volumes/"+m[2]); + } else if (m && m[1] == "vid") { + var itemLinkWithID = doc.evaluate("/html/head/link", doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().href; + var m = re.exec(itemLinkWithID); newUris.push("http://books.google.com/books/feeds/volumes/"+m[2]); } else { var items = getItemArrayGB(doc, doc, 'google\\.' + suffix + '/books\\?id=([^&]+)', '^(?:All matching pages|About this Book|Table of Contents|Index)'); @@ -59,57 +80,66 @@ function doWeb(doc, url) { var itemUrlBase = "http://"+prefix+".google."+suffix+"/books?id="; - Zotero.Utilities.HTTP.doGet(newUris, function(text) { - // Remove xml parse instruction and doctype - text = text.replace(/]*>/, "").replace(/<\?xml[^>]*\?>/, ""); + for (var i in newUris) { + var d = Zotero.Utilities.retrieveSource(newUris[i]); + //Zotero.debug(d); + parseXML(d, itemUrlBase); + } +} + +function parseXML(text, itemUrlBase) { + // Remove xml parse instruction and doctype + text = text.replace(/]*>/, "").replace(/<\?xml[^>]*\?>/, ""); - var xml = new XML(text); + var xml = new XML(text); + + default xml namespace = "http://purl.org/dc/terms"; with ({}); - default xml namespace = "http://purl.org/dc/terms"; with ({}); - - var newItem = new Zotero.Item("book"); - - var authors = xml.creator; - for (var i in authors) { - newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[i].toString(), "author")); + var newItem = new Zotero.Item("book"); + + var authors = xml.creator; + for (var i in authors) { + newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[i].toString(), "author")); + } + + newItem.date = xml.date.toString(); + + var pages = xml.format.toString(); + var pagesRe = new RegExp(/(\d+)( pages)/); + var pagesMatch = pagesRe.exec(pages); + if (pagesMatch!=null) { + newItem.numPages = pagesMatch[1]; + } else { + newItem.numPages = pages; + } + + var ISBN; + var ISBN10Re = new RegExp(/(ISBN:)(\w{10})$/); + var ISBN13Re = new RegExp(/(ISBN:)(\w{13})$/); + var identifiers = xml.identifier; + for (var i in identifiers) { + var ISBN10Match = ISBN10Re.exec(identifiers[i].toString()); + var ISBN13Match = ISBN13Re.exec(identifiers[i].toString()); + if (ISBN10Match != null) { + ISBN = ISBN10Match[2]; } - - newItem.date = xml.date.toString(); - - var pages = xml.format.toString(); - var pagesRe = new RegExp(/(\d+)( pages)/); - var pagesMatch = pagesRe.exec(pages); - if (pagesMatch!=null) { - newItem.numPages = pagesMatch[1]; - } else { - newItem.numPages = pages; + if (ISBN13Match != null) { + ISBN = ISBN13Match[2]; } + } + newItem.ISBN = ISBN; + + if (xml.publisher[0]) { + newItem.publisher = xml.publisher[0].toString(); + } - var ISBN; - var identifiers = xml.identifier; - var identifiersRe = new RegExp(/(ISBN:)(\w+)/); - for (var i in identifiers) { - var identifierMatch = identifiersRe.exec(identifiers[i].toString()); - if (identifierMatch!=null && !ISBN) { - ISBN = identifierMatch[2]; - } else if (identifierMatch!=null){ - ISBN = ISBN + ", " + identifierMatch[2]; - } - } - newItem.ISBN = ISBN; - - if (xml.publisher[0]) { - newItem.publisher = xml.publisher[0].toString(); - } - - newItem.title = xml.title[0].toString(); - - var url = itemUrlBase + xml.identifier[0]; - newItem.attachments = [{title:"Google Books Link", snapshot:false, mimeType:"text/html", url:url}]; - - newItem.complete(); - }, function() { Zotero.done(); }, null); - Zotero.wait(); + newItem.title = xml.title[0].toString(); + + var url = itemUrlBase + xml.identifier[0]; + + newItem.attachments = [{title:"Google Books Link", snapshot:false, mimeType:"text/html", url:url}]; + + newItem.complete(); } /**