zotero/translators/ePrint IACR.js
2010-03-29 15:01:31 +00:00

110 lines
3.4 KiB
JavaScript

{
"translatorID":"04a23cbe-5f8b-d6cd-8eb1-2e23bcc8ae8f",
"translatorType":4,
"label":"ePrint IACR",
"creator":"Jonas Schrieb",
"minVersion":"1.0.0b3.r1",
"target":"^http://eprint\\.iacr\\.org/",
"maxVersion":"",
"priority":100,
"inRepository":false,
"lastUpdated":"2010-03-03 14:00:00"
}
function detectWeb(doc, url) {
var singleRe = /^http:\/\/eprint\.iacr\.org\/(\d{4}\/\d{3}|cgi-bin\/print\.pl)/;
var multipleRe = /^http:\/\/eprint\.iacr\.org\/(complete|curr|\d{4}|cgi-bin\/search\.pl)/;
if(singleRe.test(url)) {
return "report";
} else if(multipleRe.test(url)) {
return "multiple";
}
}
function scrape(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var reportNoXPath = "//h2";
var titleXPath = "//p[1]/b";
var authorsXPath = "//p[2]/i";
var abstractXPath = "//p[starts-with(b/text(),\"Abstract\")]/text() | //p[not(*)]";
var keywordsXPath = "//p[starts-with(b/text(),\"Category\")]";
var reportNo = doc.evaluate(reportNoXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
reportNo = reportNo.match(/(\d{4})\/(\d{3})$/);
var year = reportNo[1];
var no = reportNo[2];
var title = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
var authors = doc.evaluate(authorsXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
authors = authors.split(" and ");
var abstr = "";
var abstractLines = doc.evaluate(abstractXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
var nextLine;
while(nextLine = abstractLines.iterateNext()) {
abstr += nextLine.textContent;
}
var keywords = doc.evaluate(keywordsXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
var tmp = keywords.match(/Category \/ Keywords: (?:([^\/]*) \/ )?([^\/]*)/);
keywords = tmp[2].split(", ")
keywords.unshift(tmp[1]);
var newItem = new Zotero.Item("report");
newItem.date = year;
newItem.reportNumber = no;
newItem.url = "http://eprint.iacr.org/"+year+"/"+no;
newItem.title = title;
newItem.abstractNote = abstr;
for (var i in authors) {
newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author"));
}
for (var i = 0; i < keywords.length; i++) {
newItem.tags[i] = keywords[i];
}
newItem.attachments = [
{url:newItem.url, title:"ePrint IACR Snapshot", mimeType:"text/html"},
{url:newItem.url+".pdf", title:"ePrint IACR Full Text PDF", mimeType:"application/pdf"}
];
newItem.complete();
}
function doWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var articles = new Array();
var items = new Object();
var nextTitle;
if (detectWeb(doc, url) == "multiple") {
var titleXPath = "//dl/dd/b";
var linkXPath = "//dl/dt/a[1]";
var titles = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
var links = doc.evaluate(linkXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
while (nextTitle = titles.iterateNext()) {
nextLink = links.iterateNext();
items[nextLink.href] = nextTitle.textContent;
}
items = Zotero.selectItems(items);
for (var i in items) {
articles.push(i);
}
} else {
articles = [url];
}
Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});
Zotero.wait();
}