parent
6006b597b1
commit
0f2ad9bed9
|
@ -2,15 +2,16 @@
|
|||
"translatorID":"3e684d82-73a3-9a34-095f-19b112d88bbf",
|
||||
"translatorType":4,
|
||||
"label":"Google Books",
|
||||
"creator":"Simon Kornblith and Michael Berkowitz",
|
||||
"target":"^http://(books|www)\\.google\\.[a-z]+(\\.[a-z]+)?/books\\?(.*id=.*|.*q=.*)",
|
||||
"creator":"Simon Kornblith, Michael Berkowitz and Rintze Zelle",
|
||||
"target":"^http://(books|www)\.google\.[a-z]+(\.[a-z]+)?/books\?(.*id=.*|.*q=.*)",
|
||||
"minVersion":"1.0.0b3.r1",
|
||||
"maxVersion":"",
|
||||
"priority":100,
|
||||
"inRepository":true,
|
||||
"lastUpdated":"2009-02-03 05:45:00"
|
||||
"lastUpdated":"2009-05-14 05:45:00"
|
||||
}
|
||||
|
||||
|
||||
function detectWeb(doc, url) {
|
||||
var re = new RegExp('^http://(books|www)\\.google\\.[a-z]+(\.[a-z]+)?/books\\?id=([^&]+)', 'i');
|
||||
if(re.test(doc.location.href)) {
|
||||
|
@ -19,8 +20,12 @@ function detectWeb(doc, url) {
|
|||
return "multiple";
|
||||
}
|
||||
}
|
||||
|
||||
function doWeb(doc, url) {
|
||||
var namespace = doc.documentElement.namespaceURI;
|
||||
var nsResolver = namespace ? function(prefix) {
|
||||
if (prefix == 'x') return namespace; else return null;
|
||||
} : null;
|
||||
|
||||
// get local domain suffix
|
||||
var psRe = new RegExp("https?://(books|www)\.google\.([^/]+)/");
|
||||
var psMatch = psRe.exec(url);
|
||||
|
@ -32,9 +37,9 @@ function doWeb(doc, url) {
|
|||
var re = new RegExp('^http://(?:books|www)\\.google\\.[a-z]+(\.[a-z]+)?/books\\?id=([^&]+)', 'i');
|
||||
var m = re.exec(uri);
|
||||
if(m) {
|
||||
newUris.push('http://'+prefix+'.google.'+suffix+'/books?id='+m[2]);
|
||||
newUris.push("http://books.google.com/books/feeds/volumes/"+m[2]);
|
||||
} else {
|
||||
var items = Zotero.Utilities.getItemArray(doc, doc, 'http://'+prefix+'\\.google\\.' + suffix + '/books\\?id=([^&]+)', '^(?:All matching pages|About this Book|Table of Contents|Index)');
|
||||
var items = getItemArrayGB(doc, doc, 'http://'+prefix+'\\.google\\.' + suffix + '/books\\?id=([^&]+)', '^(?:All matching pages|About this Book|Table of Contents|Index)');
|
||||
// Drop " - Page" thing
|
||||
for(var i in items) {
|
||||
items[i] = items[i].replace(/- Page [0-9]+\s*$/, "");
|
||||
|
@ -47,66 +52,122 @@ function doWeb(doc, url) {
|
|||
|
||||
for(var i in items) {
|
||||
var m = re.exec(i);
|
||||
newUris.push('http://'+prefix+'.google.'+suffix+'/books?id='+m[2]);
|
||||
newUris.push("http://books.google.com/books/feeds/volumes/"+m[2]);
|
||||
}
|
||||
}
|
||||
Zotero.debug(newUris);
|
||||
Zotero.Utilities.processDocuments(newUris, function(newDoc) {
|
||||
var newItem = new Zotero.Item("book");
|
||||
newItem.extra = "";
|
||||
|
||||
var namespace = newDoc.documentElement.namespaceURI;
|
||||
var nsResolver = namespace ? function(prefix) {
|
||||
if (prefix == 'x') return namespace; else return null;
|
||||
} : null;
|
||||
|
||||
var itemUrlBase = "http://"+prefix+".google."+suffix+"/books?id=";
|
||||
|
||||
Zotero.Utilities.HTTP.doGet(newUris, function(text) {
|
||||
// Remove xml parse instruction and doctype
|
||||
text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, "");
|
||||
|
||||
var xpath = '//h2[@class="title"]'
|
||||
var elmt;
|
||||
if (elmt = newDoc.evaluate(xpath, newDoc, nsResolver,
|
||||
XPathResult.ANY_TYPE, null).iterateNext()){
|
||||
var title = Zotero.Utilities.superCleanString(elmt.textContent);
|
||||
newItem.title = title;
|
||||
Zotero.debug("title: " + title);
|
||||
}
|
||||
xpath = '//div[@class="titlewrap"]/span[@class="addmd"]'
|
||||
if (elmt = newDoc.evaluate(xpath, newDoc, nsResolver,
|
||||
XPathResult.ANY_TYPE, null).iterateNext()){
|
||||
var authors = Zotero.Utilities.superCleanString(elmt.textContent);
|
||||
if (authors.substring(0, 3) == "By "){
|
||||
authors = authors.substring(3);
|
||||
}
|
||||
authors = authors.split(", ");
|
||||
for(j in authors) {
|
||||
newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[j], "author"));
|
||||
}
|
||||
var xml = new XML(text);
|
||||
|
||||
default xml namespace = "http://purl.org/dc/terms"; with ({});
|
||||
|
||||
var newItem = new Zotero.Item("book");
|
||||
|
||||
var authors = xml.creator;
|
||||
for (var i in authors) {
|
||||
newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[i].toString(), "author"));
|
||||
}
|
||||
|
||||
xpath = '//td[2][@id="bookinfo"]/div[@class="bookinfo_sectionwrap"]/div';
|
||||
var elmts = newDoc.evaluate(xpath, newDoc, nsResolver,
|
||||
XPathResult.ANY_TYPE, null);
|
||||
while(elmt = elmts.iterateNext()) {
|
||||
var fieldelmt = newDoc.evaluate('.//text()', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
if(fieldelmt) {
|
||||
field = Zotero.Utilities.superCleanString(fieldelmt.nodeValue);
|
||||
Zotero.debug("output: " + field);
|
||||
if(field.substring(0,10) == "Published ") {
|
||||
newItem.date = field.substring(field.length-4);
|
||||
var publisher = newDoc.evaluate('..//a', fieldelmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
if (publisher){
|
||||
publisher = Zotero.Utilities.superCleanString(publisher.textContent);
|
||||
newItem.publisher = publisher;
|
||||
}
|
||||
} else if(field.substring(0,5) == "ISBN ") {
|
||||
newItem.ISBN = field.substring(5);
|
||||
} else if(field.substring(field.length-6) == " pages") {
|
||||
newItem.pages = field.substring(0, field.length-6);
|
||||
} else if(field.substring(0,12) == "Contributor ") {
|
||||
newItem.creators.push(Zotero.Utilities.cleanAuthor(field.substring(12), "contributor"));
|
||||
}
|
||||
newItem.date = xml.date.toString();
|
||||
|
||||
var pages = xml.format.toString();
|
||||
var pagesRe = new RegExp(/(\d+)( pages)/);
|
||||
var pagesMatch = pagesRe.exec(pages);
|
||||
if (pagesMatch!=null) {
|
||||
newItem.pages = pagesMatch[1];
|
||||
} else {
|
||||
newItem.pages = pages;
|
||||
}
|
||||
|
||||
var ISBN;
|
||||
var identifiers = xml.identifier;
|
||||
var identifiersRe = new RegExp(/(ISBN:)(\w+)/);
|
||||
for (var i in identifiers) {
|
||||
var identifierMatch = identifiersRe.exec(identifiers[i].toString());
|
||||
if (identifierMatch!=null && !ISBN) {
|
||||
ISBN = identifierMatch[2];
|
||||
} else if (identifierMatch!=null){
|
||||
ISBN = ISBN + ", " + identifierMatch[2];
|
||||
}
|
||||
}
|
||||
}
|
||||
newItem.ISBN = ISBN;
|
||||
|
||||
newItem.publisher = xml.publisher[0].toString();
|
||||
|
||||
newItem.title = xml.title[0].toString();
|
||||
|
||||
newItem.url = itemUrlBase + xml.identifier[0];
|
||||
|
||||
newItem.complete();
|
||||
}, function() { Zotero.done(); }, null);
|
||||
|
||||
Zotero.wait();
|
||||
}
|
||||
|
||||
/**
|
||||
* Grabs items based on URLs
|
||||
*
|
||||
* @param {Document} doc DOM document object
|
||||
* @param {Element|Element[]} inHere DOM element(s) to process
|
||||
* @param {RegExp} [urlRe] Regexp of URLs to add to list
|
||||
* @param {RegExp} [urlRe] Regexp of URLs to reject
|
||||
* @return {Object} Associative array of link => textContent pairs, suitable for passing to
|
||||
* Zotero.selectItems from within a translator
|
||||
*/
|
||||
function getItemArrayGB (doc, inHere, urlRe, rejectRe) {
|
||||
var availableItems = new Object(); // Technically, associative arrays are objects
|
||||
|
||||
// Require link to match this
|
||||
if(urlRe) {
|
||||
if(urlRe.exec) {
|
||||
var urlRegexp = urlRe;
|
||||
} else {
|
||||
var urlRegexp = new RegExp();
|
||||
urlRegexp.compile(urlRe, "i");
|
||||
}
|
||||
}
|
||||
// Do not allow text to match this
|
||||
if(rejectRe) {
|
||||
if(rejectRe.exec) {
|
||||
var rejectRegexp = rejectRe;
|
||||
} else {
|
||||
var rejectRegexp = new RegExp();
|
||||
rejectRegexp.compile(rejectRe, "i");
|
||||
}
|
||||
}
|
||||
|
||||
if(!inHere.length) {
|
||||
inHere = new Array(inHere);
|
||||
}
|
||||
|
||||
for(var j=0; j<inHere.length; j++) {
|
||||
var links = inHere[j].getElementsByTagName("a");
|
||||
for(var i=0; i<links.length; i++) {
|
||||
if(!urlRe || urlRegexp.test(links[i].href)) {
|
||||
var text = links[i].textContent;
|
||||
//Rintze Zelle: the three lines below are for compatibility with Google Books cover view
|
||||
if(!text) {
|
||||
var text = links[i].firstChild.alt;
|
||||
}
|
||||
if(text) {
|
||||
text = Zotero.Utilities.trimInternal(text);
|
||||
if(!rejectRe || !rejectRegexp.test(text)) {
|
||||
if(availableItems[links[i].href]) {
|
||||
if(text != availableItems[links[i].href]) {
|
||||
availableItems[links[i].href] += " "+text;
|
||||
}
|
||||
} else {
|
||||
availableItems[links[i].href] = text;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return availableItems;
|
||||
}
|
Loading…
Reference in New Issue
Block a user