-Fixes Cell Press translator so that it a) grabs the right PDF, and b) saves results from full-text pages.

This commit is contained in:
Michael Berkowitz 2008-04-24 14:35:32 +00:00
parent 7c9b9a531b
commit 22e446dadf

View File

@ -22,7 +22,7 @@
-- Set the following timestamp to the most recent scraper update date
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-04-23 18:00:00'));
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-04-24 15:00:00'));
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2008-03-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats and Michael Berkowitz', '^https?://(?:www\.)?amazon',
'function detectWeb(doc, url) {
@ -4873,7 +4873,7 @@ function doWeb(doc, url) {
}');
REPLACE INTO translators VALUES ('f26cfb71-efd7-47ae-a28c-d4d8852096bd', '1.0.0b4.r5', '', '2008-02-14 23:15:00', '0', '99', '4', 'Cell Press', 'Michael Berkowitz', 'http://www.(cancercell|cell|cellhostandmicrobe|cellmetabolism|cellstemcell|chembiol|current-biology|developmentalcell|immunity|molecule|neuron|structure).(org|com)',
REPLACE INTO translators VALUES ('f26cfb71-efd7-47ae-a28c-d4d8852096bd', '1.0.0b4.r5', '', '2008-04-24 15:00:00', '0', '99', '4', 'Cell Press', 'Michael Berkowitz', 'http://www.(cancercell|cell|cellhostandmicrobe|cellmetabolism|cellstemcell|chembiol|current-biology|developmentalcell|immunity|molecule|neuron|structure).(org|com)',
'function detectWeb(doc, url) {
if (url.indexOf("search/results?") != -1) {
return "multiple";
@ -4903,31 +4903,32 @@ REPLACE INTO translators VALUES ('f26cfb71-efd7-47ae-a28c-d4d8852096bd', '1.0.0b
Zotero.Utilities.processDocuments(articles, function(newDoc) {
var newItem = new Zotero.Item("journalArticle");
newItem.title = newDoc.evaluate(''//h1[@class="article_title"]'', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
var voliss = newDoc.evaluate(''//div[@class="article_citation"]/p[1]'', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(".")[2].split(",");
var voliss = newDoc.evaluate(''//div[contains(@class, "article_citation")]/p[1]'', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(".")[2].split(",");
newItem.publicationTitle = voliss[0];
newItem.volume = voliss[1].match(/\d+/)[0];
newItem.pages = voliss[2];
newItem.date = voliss[3];
newItem.pages = Zotero.Utilities.trimInternal(voliss[2]);
newItem.date = Zotero.Utilities.trimInternal(voliss[3]);
newItem.abstractNote = newDoc.evaluate(''//div[@class="panelcontent article_summary"]/p[contains(text(), " ")]'', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
var authors = newDoc.evaluate(''//p[@class="authors"]'', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(",");
for (var i in authors) {
var next_author = authors[i];
if (next_author.match(/[a-z]/)) {
next_author = Zotero.Utilities.trimInternal(next_author.match(/[\w\s\.\-]+/)[0].replace(/\d/g, ""));
next_author = Zotero.Utilities.trimInternal(next_author.replace(/\d/g, ""));
if (next_author.substr(0, 3) == "and") {
next_author = next_author.substr(4);
}
newItem.creators.push(Zotero.Utilities.cleanAuthor(next_author, "author"));
}
}
var pdfx = ''//a[contains(text(), "PDF")]'';
var pdfurl = newDoc.evaluate(pdfx, newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().href;
var newurl = newDoc.location.href;
if (newurl.indexOf("abstract") != -1) {
newurl = newurl.replace("abstract", "fulltext");
}
var uid = newurl.match(/uid=([^&]+)/)[1];
var pdfx = ''//a[contains(text(), "PDF")][contains(@href, "'' + uid + ''")]'';
var pdfurl = newDoc.evaluate(pdfx, newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().href;
newItem.attachments = [
{url:url, title:"Cell Press Snapshot", mimeType:"text/html"},
{url:newurl, title:"Cell Press Snapshot", mimeType:"text/html"},
{url:pdfurl, title:"Cell Press Full Text PDF", mimeType:"application/pdf"}
];
newItem.complete();