diff --git a/translators/Google Books.js b/translators/Google Books.js index 0ffa016c3..4248ac27a 100644 --- a/translators/Google Books.js +++ b/translators/Google Books.js @@ -2,15 +2,16 @@ "translatorID":"3e684d82-73a3-9a34-095f-19b112d88bbf", "translatorType":4, "label":"Google Books", - "creator":"Simon Kornblith and Michael Berkowitz", - "target":"^http://(books|www)\\.google\\.[a-z]+(\\.[a-z]+)?/books\\?(.*id=.*|.*q=.*)", + "creator":"Simon Kornblith, Michael Berkowitz and Rintze Zelle", + "target":"^http://(books|www)\.google\.[a-z]+(\.[a-z]+)?/books\?(.*id=.*|.*q=.*)", "minVersion":"1.0.0b3.r1", "maxVersion":"", "priority":100, "inRepository":true, - "lastUpdated":"2009-02-03 05:45:00" + "lastUpdated":"2009-05-14 05:45:00" } + function detectWeb(doc, url) { var re = new RegExp('^http://(books|www)\\.google\\.[a-z]+(\.[a-z]+)?/books\\?id=([^&]+)', 'i'); if(re.test(doc.location.href)) { @@ -19,8 +20,12 @@ function detectWeb(doc, url) { return "multiple"; } } - function doWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + // get local domain suffix var psRe = new RegExp("https?://(books|www)\.google\.([^/]+)/"); var psMatch = psRe.exec(url); @@ -32,9 +37,9 @@ function doWeb(doc, url) { var re = new RegExp('^http://(?:books|www)\\.google\\.[a-z]+(\.[a-z]+)?/books\\?id=([^&]+)', 'i'); var m = re.exec(uri); if(m) { - newUris.push('http://'+prefix+'.google.'+suffix+'/books?id='+m[2]); + newUris.push("http://books.google.com/books/feeds/volumes/"+m[2]); } else { - var items = Zotero.Utilities.getItemArray(doc, doc, 'http://'+prefix+'\\.google\\.' + suffix + '/books\\?id=([^&]+)', '^(?:All matching pages|About this Book|Table of Contents|Index)'); + var items = getItemArrayGB(doc, doc, 'http://'+prefix+'\\.google\\.' + suffix + '/books\\?id=([^&]+)', '^(?:All matching pages|About this Book|Table of Contents|Index)'); // Drop " - Page" thing for(var i in items) { items[i] = items[i].replace(/- Page [0-9]+\s*$/, ""); @@ -47,66 +52,122 @@ function doWeb(doc, url) { for(var i in items) { var m = re.exec(i); - newUris.push('http://'+prefix+'.google.'+suffix+'/books?id='+m[2]); + newUris.push("http://books.google.com/books/feeds/volumes/"+m[2]); } } - Zotero.debug(newUris); - Zotero.Utilities.processDocuments(newUris, function(newDoc) { - var newItem = new Zotero.Item("book"); - newItem.extra = ""; - - var namespace = newDoc.documentElement.namespaceURI; - var nsResolver = namespace ? function(prefix) { - if (prefix == 'x') return namespace; else return null; - } : null; + + var itemUrlBase = "http://"+prefix+".google."+suffix+"/books?id="; + + Zotero.Utilities.HTTP.doGet(newUris, function(text) { + // Remove xml parse instruction and doctype + text = text.replace(/]*>/, "").replace(/<\?xml[^>]*\?>/, ""); - var xpath = '//h2[@class="title"]' - var elmt; - if (elmt = newDoc.evaluate(xpath, newDoc, nsResolver, - XPathResult.ANY_TYPE, null).iterateNext()){ - var title = Zotero.Utilities.superCleanString(elmt.textContent); - newItem.title = title; - Zotero.debug("title: " + title); - } - xpath = '//div[@class="titlewrap"]/span[@class="addmd"]' - if (elmt = newDoc.evaluate(xpath, newDoc, nsResolver, - XPathResult.ANY_TYPE, null).iterateNext()){ - var authors = Zotero.Utilities.superCleanString(elmt.textContent); - if (authors.substring(0, 3) == "By "){ - authors = authors.substring(3); - } - authors = authors.split(", "); - for(j in authors) { - newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[j], "author")); - } + var xml = new XML(text); + + default xml namespace = "http://purl.org/dc/terms"; with ({}); + + var newItem = new Zotero.Item("book"); + + var authors = xml.creator; + for (var i in authors) { + newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[i].toString(), "author")); } - xpath = '//td[2][@id="bookinfo"]/div[@class="bookinfo_sectionwrap"]/div'; - var elmts = newDoc.evaluate(xpath, newDoc, nsResolver, - XPathResult.ANY_TYPE, null); - while(elmt = elmts.iterateNext()) { - var fieldelmt = newDoc.evaluate('.//text()', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); - if(fieldelmt) { - field = Zotero.Utilities.superCleanString(fieldelmt.nodeValue); - Zotero.debug("output: " + field); - if(field.substring(0,10) == "Published ") { - newItem.date = field.substring(field.length-4); - var publisher = newDoc.evaluate('..//a', fieldelmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); - if (publisher){ - publisher = Zotero.Utilities.superCleanString(publisher.textContent); - newItem.publisher = publisher; - } - } else if(field.substring(0,5) == "ISBN ") { - newItem.ISBN = field.substring(5); - } else if(field.substring(field.length-6) == " pages") { - newItem.pages = field.substring(0, field.length-6); - } else if(field.substring(0,12) == "Contributor ") { - newItem.creators.push(Zotero.Utilities.cleanAuthor(field.substring(12), "contributor")); - } + newItem.date = xml.date.toString(); + + var pages = xml.format.toString(); + var pagesRe = new RegExp(/(\d+)( pages)/); + var pagesMatch = pagesRe.exec(pages); + if (pagesMatch!=null) { + newItem.pages = pagesMatch[1]; + } else { + newItem.pages = pages; + } + + var ISBN; + var identifiers = xml.identifier; + var identifiersRe = new RegExp(/(ISBN:)(\w+)/); + for (var i in identifiers) { + var identifierMatch = identifiersRe.exec(identifiers[i].toString()); + if (identifierMatch!=null && !ISBN) { + ISBN = identifierMatch[2]; + } else if (identifierMatch!=null){ + ISBN = ISBN + ", " + identifierMatch[2]; } - } + } + newItem.ISBN = ISBN; + + newItem.publisher = xml.publisher[0].toString(); + + newItem.title = xml.title[0].toString(); + + newItem.url = itemUrlBase + xml.identifier[0]; + newItem.complete(); }, function() { Zotero.done(); }, null); - Zotero.wait(); +} + +/** + * Grabs items based on URLs + * + * @param {Document} doc DOM document object + * @param {Element|Element[]} inHere DOM element(s) to process + * @param {RegExp} [urlRe] Regexp of URLs to add to list + * @param {RegExp} [urlRe] Regexp of URLs to reject + * @return {Object} Associative array of link => textContent pairs, suitable for passing to + * Zotero.selectItems from within a translator + */ +function getItemArrayGB (doc, inHere, urlRe, rejectRe) { + var availableItems = new Object(); // Technically, associative arrays are objects + + // Require link to match this + if(urlRe) { + if(urlRe.exec) { + var urlRegexp = urlRe; + } else { + var urlRegexp = new RegExp(); + urlRegexp.compile(urlRe, "i"); + } + } + // Do not allow text to match this + if(rejectRe) { + if(rejectRe.exec) { + var rejectRegexp = rejectRe; + } else { + var rejectRegexp = new RegExp(); + rejectRegexp.compile(rejectRe, "i"); + } + } + + if(!inHere.length) { + inHere = new Array(inHere); + } + + for(var j=0; j