-Changse ESA Journals translator to work with new site.
This commit is contained in:
parent
1188978174
commit
5470ed0a11
123
scrapers.sql
123
scrapers.sql
|
@ -22,7 +22,7 @@
|
|||
|
||||
|
||||
-- Set the following timestamp to the most recent scraper update date
|
||||
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-03-24 15:30:00'));
|
||||
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-03-24 18:00:00'));
|
||||
|
||||
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2008-03-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats and Michael Berkowitz', '^https?://(?:www\.)?amazon',
|
||||
'function detectWeb(doc, url) {
|
||||
|
@ -5275,7 +5275,7 @@ function doWeb(doc, url) {
|
|||
}
|
||||
');
|
||||
|
||||
REPLACE INTO translators VALUES ('5af42734-7cd5-4c69-97fc-bc406999bdba', '1.0.0b4.r5', '', '2008-01-06 23:55:00', '0', '100', '4', 'ESA Journals', 'Michael Berkowitz', '^http://www.esajournals.org/*',
|
||||
REPLACE INTO translators VALUES ('5af42734-7cd5-4c69-97fc-bc406999bdba', '1.0.0b4.r5', '', '2008-03-24 18:00:00', '1', '100', '4', 'ESA Journals', 'Michael Berkowitz', 'http://www.esajournals.org/',
|
||||
'function detectWeb(doc, url) {
|
||||
if (url.indexOf("get-toc") != -1 || url.indexOf("searchtype") != -1) {
|
||||
return "multiple";
|
||||
|
@ -5298,83 +5298,72 @@ REPLACE INTO translators VALUES ('5af42734-7cd5-4c69-97fc-bc406999bdba', '1.0.0b
|
|||
return sen.join("");
|
||||
}
|
||||
|
||||
function fixURL(doistr) {
|
||||
var swapTable = {
|
||||
"%2F":"/",
|
||||
"%28":"(",
|
||||
"%29":")",
|
||||
"%5B":"[",
|
||||
"%5D":"]",
|
||||
"%3A":":",
|
||||
"%3B":";"
|
||||
}
|
||||
for (var probstr in swapTable) {
|
||||
doistr = doistr.replace(probstr, swapTable[probstr]);
|
||||
}
|
||||
return doistr;
|
||||
}
|
||||
|
||||
function doWeb(doc, url) {
|
||||
var namespace = doc.documentElement.namespaceURI;
|
||||
var nsResolver = namespace ? function(prefix) {
|
||||
if (prefix == ''x'') return namespace; else return null;
|
||||
} : null;
|
||||
|
||||
var items = new Array();
|
||||
if (url.indexOf("get-toc") != -1) {
|
||||
var titlesAr = new Array();
|
||||
var linksAr = new Array();
|
||||
|
||||
var group_xpath = ''//div[@class="group"]'';
|
||||
var articles = doc.evaluate(group_xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||||
while (group = articles.iterateNext()) {
|
||||
//gets article titles
|
||||
titlesAr.push(doc.evaluate(''.//p[@class="title"]'', group, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent);
|
||||
|
||||
//gets full text links, or abstracts if that''s all that''s available
|
||||
var link_xpath = ''.//p[@class="link"]'';
|
||||
if (group.textContent.indexOf("Full Text") != -1) {
|
||||
link_xpath += ''/a[substring(text(), 1, 4) = "Full"]'';
|
||||
} else if (group.textContent.indexOf("Abstract") != -1) {
|
||||
link_xpath += ''/a[substring(text(), 1, 8) = "Abstract"]'';
|
||||
}
|
||||
linksAr.push(doc.evaluate(link_xpath, group, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().href);
|
||||
var articles = new Array();
|
||||
if (detectWeb(doc, url) == "multiple") {
|
||||
var items = new Object();
|
||||
var resultItems = doc.evaluate(''//div[@class="nocolumn"][@id="content"]/div//*[@class="group"]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||||
var next_item;
|
||||
while (next_item = resultItems.iterateNext()) {
|
||||
var link = doc.evaluate(''.//a[1]'', next_item, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().href;
|
||||
var title = senCase(doc.evaluate(''.//*[@class="title"]'', next_item, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent);
|
||||
items[link] = title;
|
||||
}
|
||||
|
||||
var articles = new Object();
|
||||
for (var i = 0 ; i < linksAr.length ; i++) {
|
||||
articles[linksAr[i]] = senCase(titlesAr[i]);
|
||||
}
|
||||
|
||||
articles = Zotero.selectItems(articles);
|
||||
|
||||
|
||||
|
||||
for (var i in articles) {
|
||||
items.push(i);
|
||||
items = Zotero.selectItems(items);
|
||||
for (var i in items) {
|
||||
articles.push(i);
|
||||
}
|
||||
} else {
|
||||
items.push(url);
|
||||
articles.push(url);
|
||||
}
|
||||
|
||||
for (var i = 0 ; i < items.length ; i++) {
|
||||
var re= /<a href=\"([^"]*)\"?>RefWorks Format/;
|
||||
var doi = items[i].split("doi=")[1];
|
||||
var URI = "http://www.esajournals.org/perlserv/?request=cite-builder&doi=" + doi;
|
||||
Zotero.Utilities.HTTP.doGet(URI, function(text) {
|
||||
var newURI = Zotero.Utilities.unescapeHTML(text.match(re)[1]);
|
||||
Zotero.Utilities.HTTP.doGet("http://" + doc.location.host + "/perlserv/" + newURI, function(text) {
|
||||
var translator = Zotero.loadTranslator("import");
|
||||
text = text.replace(/RT/, "TY");
|
||||
text = text.replace(/VO/, "VL");
|
||||
text = text.replace(/LK/, "UR");
|
||||
text = text.replace(/YR/, "PY");
|
||||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||||
translator.setString(text.replace(/([A-Z][A-Z\d]\s)/g, "$1 - "));
|
||||
translator.setHandler("itemDone", function(obj, item) {
|
||||
item.attachments = [
|
||||
{url:item.url, title:"EAS Snapshot", mimeType:"text/html"},
|
||||
{url:"http://www.esajournals.org/perlserv/?request=res-loc&uri=urn%3Aap%3Apdf%3Adoi%3A" + doi, title:"EAS Full Text PDF", mimeType:"application/pdf"}
|
||||
];
|
||||
item.itemType = "journalArticle";
|
||||
item.title = item.title.replace(/\s+\-\s+/g, " ");
|
||||
if (item.title.indexOf("JF") != -1) {
|
||||
item.title = item.title.substring(0, item.title.indexOf("JF") - 1);
|
||||
}
|
||||
item.title = senCase(item.title);
|
||||
item.abstractNote = item.notes[0][''note''];
|
||||
item.complete();
|
||||
});
|
||||
translator.translate();
|
||||
Zotero.wait();
|
||||
Zotero.done();
|
||||
Zotero.Utilities.processDocuments(articles, function(newDoc) {
|
||||
var newlink = newDoc.evaluate(''//a[text() = "Create Reference"]'', newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().href;
|
||||
var itemurl = newDoc.location.href;
|
||||
var doi = itemurl.match(/doi=([^&]+)/)[1];
|
||||
var issn = newDoc.evaluate(''//div[@id="pageTitle"]/p/a'', newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().href.match(/issn=([^&]+)/)[1];
|
||||
newlink = newlink.replace(''cite-builder'', ''download-citation&t=refman&site=esaonline'');
|
||||
Zotero.Utilities.HTTP.doGet(newlink, function(text) {
|
||||
// load translator for RIS
|
||||
var translator = Zotero.loadTranslator("import");
|
||||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||||
translator.setString(text);
|
||||
translator.setHandler("itemDone", function(obj, item) {
|
||||
item.url = fixURL(itemurl);
|
||||
item.DOI = fixURL(doi);
|
||||
var bits = new Array(issn, item.volume, item.issue);
|
||||
var pdfurl = ''http://www.esajournals.org/archive/'' + bits.join("/") + "/pdf/i" + bits.join("-") + "-" + item.pages.match(/\d+/)[0] + ".pdf";
|
||||
item.attachments = [
|
||||
{url:item.url, title:"ESA Journals Snapshot", mimeType:"text/html"},
|
||||
{url:pdfurl, title:"ESA Full Text PDF", mimeType:"application/pdf"}
|
||||
];
|
||||
item.complete();
|
||||
});
|
||||
translator.translate();
|
||||
|
||||
Zotero.done();
|
||||
});
|
||||
}
|
||||
}, function() {Zotero.done;});
|
||||
Zotero.wait();
|
||||
}');
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user