Adds a translator for Informaworld.
This commit is contained in:
parent
63f63de37a
commit
27e0887588
97
scrapers.sql
97
scrapers.sql
|
@ -22,7 +22,7 @@
|
|||
|
||||
|
||||
-- Set the following timestamp to the most recent scraper update date
|
||||
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-01-31 23:00:00'));
|
||||
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-02-01 18:00:00'));
|
||||
|
||||
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
|
||||
'function detectWeb(doc, url) {
|
||||
|
@ -2088,6 +2088,101 @@ function getData(ids){
|
|||
Zotero.wait();
|
||||
}');
|
||||
|
||||
REPLACE INTO translators VALUES ('1885b93c-cf37-4b25-aef5-283f42eada9d', '1.0.0b4.r5', '', '2008-02-01 18:00:00', '0', '100', '4', 'Informaworld', 'Michael Berkowitz', 'http://www.informaworld.com',
|
||||
'function detectWeb(doc, url) {
|
||||
if (url.indexOf("quicksearch") != -1) {
|
||||
return "multiple";
|
||||
} else if (doc.evaluate(''//a[substring(text(), 2, 8) = "Download"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||
if (doc.evaluate(''//div[@id="metahead"]/div/strong[text() = "Published in:"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||
var pubtype = doc.evaluate(''//img[substring(@title, 1, 17) = "Publication type:"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext().title;
|
||||
if (pubtype.match("journal")) {
|
||||
return "journalArticle";
|
||||
} else if (pubtype.match("book")) {
|
||||
return "bookSection";
|
||||
}
|
||||
} else {
|
||||
return "book";
|
||||
}
|
||||
} else if (url.indexOf("content=g") != -1 ||
|
||||
doc.evaluate(''//div[@id="browse"]//tbody/tr/td[2]/a[2]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext() ||
|
||||
doc.evaluate(''//div[@id="title"]//td[2]/div/strong/a'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||
return "multiple";
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
',
|
||||
'function doWeb(doc, url) {
|
||||
var links = new Array();
|
||||
if (detectWeb(doc, url) == "multiple") {
|
||||
var items = new Object();
|
||||
if (doc.evaluate(''//div[@id="quicksearch"]//tr/td/b/a'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||
var xpath = ''//div[@id="quicksearch"]//tr/td/b/a'';
|
||||
} else if (doc.evaluate(''//div[@id="title"]/table/tbody/tr[2]//strong/a'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||
var xpath = ''//div[@id="title"]/table/tbody/tr[2]//strong/a'';
|
||||
} else if (doc.evaluate(''//div[@id="browse"]//tbody/tr/td[2]/a[2]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||
var xpath = ''//div[@id="browse"]//tbody/tr/td[2]/a[2]'';
|
||||
} else if (doc.evaluate(''//div[@id="title"]//td[2]/div/strong/a'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||
var xpath = ''//div[@id="title"]//td[2]/div/strong/a'';
|
||||
}
|
||||
var titles = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
|
||||
var title = titles.iterateNext();
|
||||
while (title) {
|
||||
items[title.href] = title.textContent;
|
||||
title = titles.iterateNext();
|
||||
}
|
||||
items = Zotero.selectItems(items);
|
||||
for (var i in items) {
|
||||
links.push(i);
|
||||
}
|
||||
} else {
|
||||
links = [url];
|
||||
}
|
||||
Zotero.debug(links);
|
||||
|
||||
Zotero.Utilities.processDocuments(links, function(newDoc) {
|
||||
var xpath = ''//div[@id="metahead"]/div'';
|
||||
var stuff = newDoc.evaluate(xpath, newDoc, null, XPathResult.ANY_TYPE, null);
|
||||
var thing = stuff.iterateNext() ;
|
||||
while (thing) {
|
||||
if (thing.textContent.match(/DOI/)) {
|
||||
var doi = Zotero.Utilities.trimInternal(thing.textContent).match(/:\s+(.*)/)[1];
|
||||
}
|
||||
thing = stuff.iterateNext();
|
||||
}
|
||||
|
||||
var id = newDoc.location.href.match(/content=([\w\d]+)/);
|
||||
var post = ''tab=citation&selecteditems='' + id[1].substr(1) + ''&content='' + id[1] + ''&citstyle=refworks&showabs=false&format=file'';
|
||||
Zotero.Utilities.HTTP.doPost(''http://www.informaworld.com/smpp/content'', post, function(text) {
|
||||
text = text.replace(/RT/, "TY");
|
||||
text = text.replace(/VO/, "VL");
|
||||
text = text.replace(/LK/, "UR");
|
||||
text = text.replace(/YR/, "PY");
|
||||
text = text.replace(/([A-Z][A-Z\d]\s)/g, "$1 - ")
|
||||
var translator = Zotero.loadTranslator("import");
|
||||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||||
translator.setString(text);
|
||||
translator.setHandler("itemDone", function(obj, item) {
|
||||
var type = text.match(/TY\s+\-\s+([^\n]*)/)[1];
|
||||
Zotero.debug(type);
|
||||
if (type == "Journal") {
|
||||
item.itemType = "journalArticle";
|
||||
} else if (type == "Book, Whole") {
|
||||
item.itemType = "book";
|
||||
} else if (type == "Book, Section") {
|
||||
item.itemType = "bookSection";
|
||||
}
|
||||
if (doi) {
|
||||
item.DOI = doi;
|
||||
}
|
||||
item.complete();
|
||||
});
|
||||
translator.translate();
|
||||
|
||||
});
|
||||
}, function() {Zotero.done;});
|
||||
}');
|
||||
|
||||
REPLACE INTO translators VALUES ('f880bf79-d42f-4337-b0d2-7a7de4a48b7d', '1.0.0b4.r5', '', '2008-01-31 20:00:00', '0', '100', '6', 'Library Catalog (X-OPAC)', 'Michael Berkowitz', '(xopac|hylib)',
|
||||
'function detectWeb(doc, url) {
|
||||
if (url.indexOf("&nd=") != -1) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user