Closes #516 for PubMed direct hits and refines Max Planck VL Library support

This commit is contained in:
Sean Takats 2007-02-15 22:42:36 +00:00
parent 58235c6bf6
commit 2e1fa819ab

View File

@ -1,4 +1,4 @@
-- 172 -- 173
-- ***** BEGIN LICENSE BLOCK ***** -- ***** BEGIN LICENSE BLOCK *****
-- --
@ -22,7 +22,7 @@
-- Set the following timestamp to the most recent scraper update date -- Set the following timestamp to the most recent scraper update date
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-02-07 02:10:00')); REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-02-15 22:50:00'));
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b3.r1', '', '2006-12-15 03:40:00', 1, 100, 4, 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon', REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b3.r1', '', '2006-12-15 03:40:00', 1, 100, 4, 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
'function detectWeb(doc, url) { 'function detectWeb(doc, url) {
@ -2747,21 +2747,22 @@ REPLACE INTO translators VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '1.0.0b
} }
}'); }');
REPLACE INTO translators VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '1.0.0b3.r1', '', '2006-12-14 17:53:00', 1, 100, 12, 'NCBI PubMed', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?.*db=PubMed', REPLACE INTO translators VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '1.0.0b3.r1', '', '2007-02-15 22:50:00', '1', '100', '4', 'NCBI PubMed', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?.*db=PubMed',
'function detectWeb(doc, url) { 'function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI; var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) { var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null; if (prefix == ''x'') return namespace; else return null;
} : null; } : null;
if(doc.location.href.indexOf("list_uids=") >= 0) { var uids = doc.evaluate(''//input[@name="uid"]'', doc,
nsResolver, XPathResult.ANY_TYPE, null);
if(uids.iterateNext()) {
if (uids.iterateNext()){
return "multiple";
}
return "journalArticle"; return "journalArticle";
} else if(doc.evaluate(''//div[@class="ResultSet"]/table/tbody'', doc,
nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
return "multiple";
} }
} }
function getPMID(co) { function getPMID(co) {
var coParts = co.split("&"); var coParts = co.split("&");
for each(part in coParts) { for each(part in coParts) {
@ -2781,34 +2782,35 @@ function detectSearch(item) {
} }
} }
return false; return false;
}', }
',
'function lookupPMIDs(ids, doc) { 'function lookupPMIDs(ids, doc) {
Zotero.wait(); Zotero.wait();
var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=PubMed&retmode=xml&rettype=citation&id="+ids.join(","); var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=PubMed&retmode=xml&rettype=citation&id="+ids.join(",");
Zotero.Utilities.HTTP.doGet(newUri, function(text) { Zotero.Utilities.HTTP.doGet(newUri, function(text) {
// Remove xml parse instruction and doctype // Remove xml parse instruction and doctype
text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, ""); text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, "");
var xml = new XML(text); var xml = new XML(text);
for(var i=0; i<xml.PubmedArticle.length(); i++) { for(var i=0; i<xml.PubmedArticle.length(); i++) {
var newItem = new Zotero.Item("journalArticle"); var newItem = new Zotero.Item("journalArticle");
var citation = xml.PubmedArticle[i].MedlineCitation; var citation = xml.PubmedArticle[i].MedlineCitation;
var PMID = citation.PMID.text().toString(); var PMID = citation.PMID.text().toString();
newItem.accessionNumber = "PMID "+PMID; newItem.accessionNumber = "PMID "+PMID;
// add attachments // add attachments
if(doc) { if(doc) {
newItem.attachments.push({document:doc, title:"PubMed Snapshot"}); newItem.attachments.push({document:doc, title:"PubMed Snapshot"});
} else { } else {
var url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=pubmed&cmd=Retrieve&dopt=AbstractPlus&list_uids="+PMID; var url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=pubmed&cmd=Retrieve&dopt=AbstractPlus&list_uids="+PMID;
newItem.attachments.push({url:url, title:"PubMed Snapshot", newItem.attachments.push({url:url, title:"PubMed Snapshot",
mimeType:"text/html"}); mimeType:"text/html"});
} }
var article = citation.Article; var article = citation.Article;
if(article.ArticleTitle.length()) { if(article.ArticleTitle.length()) {
var title = article.ArticleTitle.text().toString(); var title = article.ArticleTitle.text().toString();
@ -2817,24 +2819,24 @@ function detectSearch(item) {
} }
newItem.title = title; newItem.title = title;
} }
if (article.Pagination.MedlinePgn.length()){ if (article.Pagination.MedlinePgn.length()){
newItem.pages = article.Pagination.MedlinePgn.text().toString(); newItem.pages = article.Pagination.MedlinePgn.text().toString();
} }
if(article.Journal.length()) { if(article.Journal.length()) {
var issn = article.Journal.ISSN.text().toString(); var issn = article.Journal.ISSN.text().toString();
if(issn) { if(issn) {
newItem.ISSN = issn.replace(/[^0-9]/g, ""); newItem.ISSN = issn.replace(/[^0-9]/g, "");
} }
newItem.journalAbbreviation = Zotero.Utilities.superCleanString(citation.MedlineJournalInfo.MedlineTA.text().toString()); newItem.journalAbbreviation = Zotero.Utilities.superCleanString(citation.MedlineJournalInfo.MedlineTA.text().toString());
if(article.Journal.Title.length()) { if(article.Journal.Title.length()) {
newItem.publicationTitle = Zotero.Utilities.superCleanString(article.Journal.Title.text().toString()); newItem.publicationTitle = Zotero.Utilities.superCleanString(article.Journal.Title.text().toString());
} else if(citation.MedlineJournalInfo.MedlineTA.length()) { } else if(citation.MedlineJournalInfo.MedlineTA.length()) {
newItem.publicationTitle = newItem.journalAbbreviation; newItem.publicationTitle = newItem.journalAbbreviation;
} }
if(article.Journal.JournalIssue.length()) { if(article.Journal.JournalIssue.length()) {
newItem.volume = article.Journal.JournalIssue.Volume.text().toString(); newItem.volume = article.Journal.JournalIssue.Volume.text().toString();
newItem.issue = article.Journal.JournalIssue.Issue.text().toString(); newItem.issue = article.Journal.JournalIssue.Issue.text().toString();
@ -2849,7 +2851,7 @@ function detectSearch(item) {
} }
} }
} }
if(article.AuthorList.length() && article.AuthorList.Author.length()) { if(article.AuthorList.length() && article.AuthorList.Author.length()) {
var authors = article.AuthorList.Author; var authors = article.AuthorList.Author;
for(var j=0; j<authors.length(); j++) { for(var j=0; j<authors.length(); j++) {
@ -2863,55 +2865,54 @@ function detectSearch(item) {
} }
} }
} }
newItem.abstractNote = article.Abstract.AbstractText.toString() newItem.abstractNote = article.Abstract.AbstractText.toString()
newItem.complete(); newItem.complete();
} }
Zotero.done(); Zotero.done();
}); });
} }
function doWeb(doc, url) { function doWeb(doc, url) {
var uri = doc.location.href; var namespace = doc.documentElement.namespaceURI;
var ids = new Array(); var nsResolver = namespace ? function(prefix) {
var idRegexp = /[\?\&]list_uids=([0-9\,]+)/; if (prefix == ''x'') return namespace; else return null;
var m = idRegexp.exec(uri);
if(m) {
ids.push(m[1]);
lookupPMIDs(ids, doc);
} else {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null; } : null;
var ids = new Array();
var items = new Array(); var uids = doc.evaluate(''//input[@name="uid"]'', doc,
var tableRows = doc.evaluate(''//div[@class="ResultSet"]/table/tbody'', doc, nsResolver, XPathResult.ANY_TYPE, null);
nsResolver, XPathResult.ANY_TYPE, null); var uid = uids.iterateNext();
var tableRow; if(uid) {
// Go through table rows if (uids.iterateNext()){
while(tableRow = tableRows.iterateNext()) { var items = new Array();
var link = doc.evaluate(''.//a'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); var tableRows = doc.evaluate(''//div[@class="ResultSet"]/table/tbody'', doc,
var article = doc.evaluate(''./tr[2]/td[2]/text()[1]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); nsResolver, XPathResult.ANY_TYPE, null);
items[link.href] = article.nodeValue; var tableRow;
// Go through table rows
while(tableRow = tableRows.iterateNext()) {
var link = doc.evaluate(''.//a'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
uid = doc.evaluate(''.//input[@name="uid"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
var article = doc.evaluate(''./tr[2]/td[2]/text()[1]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
items[uid.value] = article.nodeValue;
}
items = Zotero.selectItems(items);
if(!items) {
return true;
}
for(var i in items) {
ids.push(i);
}
lookupPMIDs(ids);
} else {
ids.push(uid.value);
lookupPMIDs(ids, doc);
} }
items = Zotero.selectItems(items);
if(!items) {
return true;
}
for(var i in items) {
var m = idRegexp.exec(i);
ids.push(m[1]);
}
lookupPMIDs(ids);
} }
} }
@ -2920,6 +2921,7 @@ function doSearch(item) {
lookupPMIDs([getPMID(item.contextObject)]); lookupPMIDs([getPMID(item.contextObject)]);
}'); }');
REPLACE INTO translators VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '1.0.0b3.r1', '', '2006-12-12 23:41:00', 1, 100, 4, 'Embedded RDF', 'Simon Kornblith', NULL, REPLACE INTO translators VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '1.0.0b3.r1', '', '2006-12-12 23:41:00', 1, 100, 4, 'Embedded RDF', 'Simon Kornblith', NULL,
'function detectWeb(doc, url) { 'function detectWeb(doc, url) {
var metaTags = doc.getElementsByTagName("meta"); var metaTags = doc.getElementsByTagName("meta");
@ -6468,7 +6470,7 @@ function doWeb(doc, url) {
}'); }');
REPLACE INTO translators VALUES ('66928fe3-1e93-45a7-8e11-9df6de0a11b3', '1.0.0b3r1', '', '2007-02-06 02:10:00', '0', '100', '4', 'Max Planck VL Library', 'Sean Takats', 'http://vlp.mpiwg-berlin.mpg.de/library/', REPLACE INTO translators VALUES ('66928fe3-1e93-45a7-8e11-9df6de0a11b3', '1.0.0b3r1', '', '2007-02-15 22:50:00', '0', '100', '4', 'Max Planck Institute for the History of Science: Virtual Laboratory Library', 'Sean Takats', 'http://vlp.mpiwg-berlin.mpg.de/library/',
'function detectWeb(doc, url){ 'function detectWeb(doc, url){
var namespace = doc.documentElement.namespaceURI; var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) { var nsResolver = namespace ? function(prefix) {
@ -6478,7 +6480,7 @@ REPLACE INTO translators VALUES ('66928fe3-1e93-45a7-8e11-9df6de0a11b3', '1.0.0b
if (elmt){ if (elmt){
return "book"; return "book";
} }
elmt = doc.evaluate(''//span[starts-with(@title, "lit")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); elmt = doc.evaluate(''//span[starts-with(@title, "lit")] | //a[starts-with(@title, "lit")] | //p[starts-with(@title, "lit")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (elmt){ if (elmt){
return "multiple"; return "multiple";
} }
@ -6493,17 +6495,17 @@ REPLACE INTO translators VALUES ('66928fe3-1e93-45a7-8e11-9df6de0a11b3', '1.0.0b
var baseElmt = doc.evaluate(''//base[contains(@href, "/library/data/lit")]/@href'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); var baseElmt = doc.evaluate(''//base[contains(@href, "/library/data/lit")]/@href'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (baseElmt){ if (baseElmt){
var docID = baseElmt.nodeValue; var docID = baseElmt.nodeValue;
var idRe = /lit[^\/]+/; var idRe = /lit[0-9]+/;
var m = idRe.exec(docID); var m = idRe.exec(docID);
uris.push("http://vlp.mpiwg-berlin.mpg.de/library/meta?id=" + m[0]); uris.push("http://vlp.mpiwg-berlin.mpg.de/library/meta?id=" + m[0]);
} else { } else {
var searchElmts = doc.evaluate(''//a[starts-with(@title, "lit")]'', doc, nsResolver, XPathResult.ANY_TYPE, null); var searchElmts = doc.evaluate(''//span[starts-with(@title, "lit")] | //a[starts-with(@title, "lit")] | //p[starts-with(@title, "lit")]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
var searchElmt; var searchElmt;
var links = new Array(); var links = new Array();
var availableItems = new Array(); var availableItems = new Array();
var i = 0; var i = 0;
while (searchElmt = searchElmts.iterateNext()){ while (searchElmt = searchElmts.iterateNext()){
availableItems[i] = searchElmt.textContent; availableItems[i] = Zotero.Utilities.cleanString(searchElmt.textContent);
var docID = doc.evaluate(''./@title'', searchElmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue; var docID = doc.evaluate(''./@title'', searchElmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
links.push("http://vlp.mpiwg-berlin.mpg.de/library/meta?id=" + docID); links.push("http://vlp.mpiwg-berlin.mpg.de/library/meta?id=" + docID);
i++; i++;
@ -6523,7 +6525,6 @@ REPLACE INTO translators VALUES ('66928fe3-1e93-45a7-8e11-9df6de0a11b3', '1.0.0b
translator.setTranslator("881f60f2-0802-411a-9228-ce5f47b64c7d"); translator.setTranslator("881f60f2-0802-411a-9228-ce5f47b64c7d");
translator.setString(text); translator.setString(text);
translator.setHandler("itemDone", function(obj, item) { translator.setHandler("itemDone", function(obj, item) {
// TODO item.attachments.push({url:"http://www.arxiv.org/pdf/" + articleID, mimeType:"application/pdf", title:"VL Library PDF"}
item.type = undefined; item.type = undefined;
item.complete(); item.complete();
}); });