migrate sciencedirect to use processAsycn(), something strange with the DOI xpath, using the data from RIS and regex instead
This commit is contained in:
parent
5e080b78f0
commit
fb104160d5
|
@ -58,9 +58,17 @@ function doWeb(doc, url) {
|
||||||
Zotero.debug('no items');
|
Zotero.debug('no items');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Zotero.Utilities.processDocuments(articles, function(newDoc) {
|
|
||||||
var doi = newDoc.evaluate('//div[@class="articleHeaderInner"][@id="articleHeader"]/a[contains(text(), "doi")]', newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.substr(4);
|
|
||||||
|
|
||||||
|
var sets = [];
|
||||||
|
for each (article in articles) {
|
||||||
|
sets.push({article:article});
|
||||||
|
}
|
||||||
|
var first = function(set, next) {
|
||||||
|
|
||||||
|
var article = set.article;
|
||||||
|
|
||||||
|
|
||||||
|
Zotero.Utilities.processDocuments(article, function(newDoc) {
|
||||||
var tempPDF = newDoc.evaluate('//a[@class="noul" and div/div[contains(text(), "PDF")]]', newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
var tempPDF = newDoc.evaluate('//a[@class="noul" and div/div[contains(text(), "PDF")]]', newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
if (!tempPDF) { // PDF xpath failed, lets try another
|
if (!tempPDF) { // PDF xpath failed, lets try another
|
||||||
tempPDF = newDoc.evaluate('//a[@class="noul" and contains(text(), "PDF")]', newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
tempPDF = newDoc.evaluate('//a[@class="noul" and contains(text(), "PDF")]', newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
|
@ -86,6 +94,27 @@ function doWeb(doc, url) {
|
||||||
{url:url, title:"ScienceDirect Snapshot", mimeType:"text/html"},
|
{url:url, title:"ScienceDirect Snapshot", mimeType:"text/html"},
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
// This does not work, not sure why.
|
||||||
|
//var doi = newDoc.evaluate('//a[contains(text(), "doi")]/text()', newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
|
//Zotero.debug(doi);
|
||||||
|
//doi = doi.textContent.substr(4);
|
||||||
|
|
||||||
|
|
||||||
|
// pass these values to the next function
|
||||||
|
//set.doi = doi;
|
||||||
|
set.url = url;
|
||||||
|
set.get = get;
|
||||||
|
set.attachments = attachments;
|
||||||
|
|
||||||
|
next();
|
||||||
|
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
var second = function(set, next) {
|
||||||
|
var url = set.url;
|
||||||
|
var get = set.get;
|
||||||
|
|
||||||
Zotero.Utilities.HTTP.doGet(get, function(text) {
|
Zotero.Utilities.HTTP.doGet(get, function(text) {
|
||||||
var md5 = text.match(/<input type=hidden name=md5 value=([^>]+)>/)[1];
|
var md5 = text.match(/<input type=hidden name=md5 value=([^>]+)>/)[1];
|
||||||
var acct = text.match(/<input type=hidden name=_acct value=([^>]+)>/)[1];
|
var acct = text.match(/<input type=hidden name=_acct value=([^>]+)>/)[1];
|
||||||
|
@ -101,6 +130,23 @@ function doWeb(doc, url) {
|
||||||
}
|
}
|
||||||
var post = "_ob=DownloadURL&_method=finish&_acct=" + acct + "&_userid=" + userid + "&_docType=FLA&" + docID + "&md5=" + md5 + "&count=1&JAVASCRIPT_ON=Y&format=cite-abs&citation-type=RIS&Export=Export&x=26&y=17";
|
var post = "_ob=DownloadURL&_method=finish&_acct=" + acct + "&_userid=" + userid + "&_docType=FLA&" + docID + "&md5=" + md5 + "&count=1&JAVASCRIPT_ON=Y&format=cite-abs&citation-type=RIS&Export=Export&x=26&y=17";
|
||||||
var baseurl = url.match(/https?:\/\/[^/]+\//)[0];
|
var baseurl = url.match(/https?:\/\/[^/]+\//)[0];
|
||||||
|
|
||||||
|
set.post = post;
|
||||||
|
set.baseurl = baseurl;
|
||||||
|
|
||||||
|
next();
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
var third = function(set, next) {
|
||||||
|
var baseurl = set.baseurl;
|
||||||
|
var post = set.post;
|
||||||
|
var attachments = set.attachments;
|
||||||
|
//var doi = set.doi;
|
||||||
|
|
||||||
|
|
||||||
Zotero.Utilities.HTTP.doPost(baseurl + 'science', post, function(text) {
|
Zotero.Utilities.HTTP.doPost(baseurl + 'science', post, function(text) {
|
||||||
var translator = Zotero.loadTranslator("import");
|
var translator = Zotero.loadTranslator("import");
|
||||||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||||||
|
@ -112,15 +158,22 @@ function doWeb(doc, url) {
|
||||||
item.abstractNote = item.notes[0].note;
|
item.abstractNote = item.notes[0].note;
|
||||||
item.notes = new Array();
|
item.notes = new Array();
|
||||||
}
|
}
|
||||||
if (doi) {
|
item.DOI = item.DOI.substr(10);
|
||||||
item.DOI = doi;
|
//if (doi) {
|
||||||
}
|
// item.DOI = doi;
|
||||||
|
//}
|
||||||
item.complete();
|
item.complete();
|
||||||
});
|
});
|
||||||
translator.translate();
|
translator.translate();
|
||||||
|
|
||||||
|
next();
|
||||||
}, false, 'windows-1252');
|
}, false, 'windows-1252');
|
||||||
});
|
|
||||||
}, function() {Zotero.done();});
|
|
||||||
|
};
|
||||||
|
var functioncallbacks = [first, second, third];
|
||||||
|
Zotero.Utilities.processAsync(sets, functioncallbacks, function() {Zotero.done()});
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
var articles = new Array();
|
var articles = new Array();
|
||||||
if (detectWeb(doc, url) == "multiple") {
|
if (detectWeb(doc, url) == "multiple") {
|
||||||
|
@ -149,7 +202,19 @@ function doWeb(doc, url) {
|
||||||
Zotero.debug('no items');
|
Zotero.debug('no items');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Zotero.Utilities.processDocuments(articles, function(doc2) {
|
|
||||||
|
|
||||||
|
var sets = [];
|
||||||
|
for each (article in articles) {
|
||||||
|
sets.push({article:article});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
var first = function(set, next) {
|
||||||
|
|
||||||
|
var article = set.article;
|
||||||
|
|
||||||
|
Zotero.Utilities.processDocuments(article, function(doc2) {
|
||||||
var item = new Zotero.Item("journalArticle");
|
var item = new Zotero.Item("journalArticle");
|
||||||
item.repository = "ScienceDirect";
|
item.repository = "ScienceDirect";
|
||||||
item.url = doc2.location.href;
|
item.url = doc2.location.href;
|
||||||
|
@ -161,11 +226,12 @@ function doWeb(doc, url) {
|
||||||
if (voliss.match(/Issues?\s+[^,]+/)) item.issue = voliss.match(/Issues?\s+([^,]+)/)[1];
|
if (voliss.match(/Issues?\s+[^,]+/)) item.issue = voliss.match(/Issues?\s+([^,]+)/)[1];
|
||||||
if (voliss.match(/(J|F|M|A|S|O|N|D)\w+\s+\d{4}/)) item.date = voliss.match(/(J|F|M|A|S|O|N|D)\w+\s+\d{4}/)[0];
|
if (voliss.match(/(J|F|M|A|S|O|N|D)\w+\s+\d{4}/)) item.date = voliss.match(/(J|F|M|A|S|O|N|D)\w+\s+\d{4}/)[0];
|
||||||
if (voliss.match(/Pages?\s+[^,^\s]+/)) item.pages = voliss.match(/Pages?\s+([^,^\s]+)/)[1];
|
if (voliss.match(/Pages?\s+[^,^\s]+/)) item.pages = voliss.match(/Pages?\s+([^,^\s]+)/)[1];
|
||||||
item.DOI = doc2.evaluate('//div[@class="articleHeaderInner"][@id="articleHeader"]/a[contains(text(), "doi")]', doc2, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.substr(4);
|
// why doesn't this work?
|
||||||
|
//item.DOI = doc2.evaluate('//a[contains(text(), "doi")]/text()', doc2, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.substr(4);
|
||||||
var abspath = '//div[@class="articleHeaderInner"][@id="articleHeader"]/div[@class="articleText"]/p';
|
var abspath = '//div[@class="articleHeaderInner"][@id="articleHeader"]/div[@class="articleText"]/p';
|
||||||
var absx = doc2.evaluate(abspath, doc2, nsResolver, XPathResult.ANY_TYPE, null);
|
var absx = doc2.evaluate(abspath, doc2, nsResolver, XPathResult.ANY_TYPE, null);
|
||||||
var ab;
|
var ab;
|
||||||
item.abstractNote = ""
|
item.abstractNote = "";
|
||||||
while (ab = absx.iterateNext()) {
|
while (ab = absx.iterateNext()) {
|
||||||
item.abstractNote += Zotero.Utilities.trimInternal(ab.textContent) + " ";
|
item.abstractNote += Zotero.Utilities.trimInternal(ab.textContent) + " ";
|
||||||
}
|
}
|
||||||
|
@ -182,7 +248,22 @@ function doWeb(doc, url) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
item.attachments.push({url:doc2.location.href, title:"ScienceDirect Snapshot", mimeType:"text/html"});
|
item.attachments.push({url:doc2.location.href, title:"ScienceDirect Snapshot", mimeType:"text/html"});
|
||||||
|
|
||||||
|
set.item = item;
|
||||||
|
|
||||||
|
next();
|
||||||
|
});
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
var second = function(set, next) {
|
||||||
|
|
||||||
|
var item = set.item;
|
||||||
|
|
||||||
Zotero.Utilities.HTTP.doGet(item.url, function(text) {
|
Zotero.Utilities.HTTP.doGet(item.url, function(text) {
|
||||||
|
item.DOI = text.match(/>doi:([^<]*)/)[1];
|
||||||
|
|
||||||
|
try {
|
||||||
var aus = text.match(/<strong>\s+<p>.*<\/strong>/)[0].replace(/<sup>/g, "$").replace(/<\/sup>/g, "$");
|
var aus = text.match(/<strong>\s+<p>.*<\/strong>/)[0].replace(/<sup>/g, "$").replace(/<\/sup>/g, "$");
|
||||||
aus = aus.replace(/\$[^$]*\$/g, "");
|
aus = aus.replace(/\$[^$]*\$/g, "");
|
||||||
aus = aus.replace(/<a[^>]*>/g, "$").replace(/<\/a[^>]*>/g, "$");
|
aus = aus.replace(/<a[^>]*>/g, "$").replace(/<\/a[^>]*>/g, "$");
|
||||||
|
@ -194,9 +275,16 @@ function doWeb(doc, url) {
|
||||||
item.creators.push(Zotero.Utilities.cleanAuthor(Zotero.Utilities.unescapeHTML(Zotero.Utilities.trimInternal(aus[a]), "author")));
|
item.creators.push(Zotero.Utilities.cleanAuthor(Zotero.Utilities.unescapeHTML(Zotero.Utilities.trimInternal(aus[a]), "author")));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} catch(e) {
|
||||||
|
Zotero.debug("No Authors listed.");
|
||||||
|
}
|
||||||
item.complete();
|
item.complete();
|
||||||
|
next();
|
||||||
});
|
});
|
||||||
}, function() {Zotero.done();});
|
};
|
||||||
|
var functioncallbacks = [first, second];
|
||||||
|
Zotero.Utilities.processAsync(sets, functioncallbacks, function() {Zotero.done()});
|
||||||
|
|
||||||
}
|
}
|
||||||
Zotero.wait();
|
Zotero.wait();
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user