diff --git a/scrapers.sql b/scrapers.sql index e3b86674b..67056a1a2 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -22,7 +22,7 @@ -- Set the following timestamp to the most recent scraper update date -REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-04-23 18:00:00')); +REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-04-24 15:00:00')); REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2008-03-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats and Michael Berkowitz', '^https?://(?:www\.)?amazon', 'function detectWeb(doc, url) { @@ -4873,7 +4873,7 @@ function doWeb(doc, url) { }'); -REPLACE INTO translators VALUES ('f26cfb71-efd7-47ae-a28c-d4d8852096bd', '1.0.0b4.r5', '', '2008-02-14 23:15:00', '0', '99', '4', 'Cell Press', 'Michael Berkowitz', 'http://www.(cancercell|cell|cellhostandmicrobe|cellmetabolism|cellstemcell|chembiol|current-biology|developmentalcell|immunity|molecule|neuron|structure).(org|com)', +REPLACE INTO translators VALUES ('f26cfb71-efd7-47ae-a28c-d4d8852096bd', '1.0.0b4.r5', '', '2008-04-24 15:00:00', '0', '99', '4', 'Cell Press', 'Michael Berkowitz', 'http://www.(cancercell|cell|cellhostandmicrobe|cellmetabolism|cellstemcell|chembiol|current-biology|developmentalcell|immunity|molecule|neuron|structure).(org|com)', 'function detectWeb(doc, url) { if (url.indexOf("search/results?") != -1) { return "multiple"; @@ -4903,31 +4903,32 @@ REPLACE INTO translators VALUES ('f26cfb71-efd7-47ae-a28c-d4d8852096bd', '1.0.0b Zotero.Utilities.processDocuments(articles, function(newDoc) { var newItem = new Zotero.Item("journalArticle"); newItem.title = newDoc.evaluate(''//h1[@class="article_title"]'', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; - var voliss = newDoc.evaluate(''//div[@class="article_citation"]/p[1]'', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(".")[2].split(","); + var voliss = newDoc.evaluate(''//div[contains(@class, "article_citation")]/p[1]'', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(".")[2].split(","); newItem.publicationTitle = voliss[0]; newItem.volume = voliss[1].match(/\d+/)[0]; - newItem.pages = voliss[2]; - newItem.date = voliss[3]; + newItem.pages = Zotero.Utilities.trimInternal(voliss[2]); + newItem.date = Zotero.Utilities.trimInternal(voliss[3]); newItem.abstractNote = newDoc.evaluate(''//div[@class="panelcontent article_summary"]/p[contains(text(), " ")]'', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; var authors = newDoc.evaluate(''//p[@class="authors"]'', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(","); for (var i in authors) { var next_author = authors[i]; if (next_author.match(/[a-z]/)) { - next_author = Zotero.Utilities.trimInternal(next_author.match(/[\w\s\.\-]+/)[0].replace(/\d/g, "")); + next_author = Zotero.Utilities.trimInternal(next_author.replace(/\d/g, "")); if (next_author.substr(0, 3) == "and") { next_author = next_author.substr(4); } newItem.creators.push(Zotero.Utilities.cleanAuthor(next_author, "author")); } } - var pdfx = ''//a[contains(text(), "PDF")]''; - var pdfurl = newDoc.evaluate(pdfx, newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().href; var newurl = newDoc.location.href; if (newurl.indexOf("abstract") != -1) { newurl = newurl.replace("abstract", "fulltext"); } + var uid = newurl.match(/uid=([^&]+)/)[1]; + var pdfx = ''//a[contains(text(), "PDF")][contains(@href, "'' + uid + ''")]''; + var pdfurl = newDoc.evaluate(pdfx, newDoc, null, XPathResult.ANY_TYPE, null).iterateNext().href; newItem.attachments = [ - {url:url, title:"Cell Press Snapshot", mimeType:"text/html"}, + {url:newurl, title:"Cell Press Snapshot", mimeType:"text/html"}, {url:pdfurl, title:"Cell Press Full Text PDF", mimeType:"application/pdf"} ]; newItem.complete();