diff --git a/scrapers.sql b/scrapers.sql index a5c1befb1..1c17ed538 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -22,7 +22,7 @@ -- Set the following timestamp to the most recent scraper update date -REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-01-22 19:00:00')); +REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-01-23 04:30:00')); REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon', 'function detectWeb(doc, url) { @@ -11148,6 +11148,105 @@ REPLACE INTO translators VALUES ('cb48083-4d9-4ed-ac95-2e93dceea0ec', '1.0.0b3.r Zotero.wait(); }'); +REPLACE INTO translators VALUES ('df966c80-c199-4329-ab02-fa410c8eb6dc', '1.0.0b3.r1', '', '2008-01-23 04:30:00', '1', '100', '4', 'University of Chicago', 'Sean Takats', '^https?://www\.journals\.uchicago\.edu[^/]*/(?:doi/abs|doi/full|toc)', +'function detectWeb(doc, url) { + if(url.indexOf("toc") != -1) { + return "multiple"; + } else { + return "journalArticle"; + } +}', +'function doWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var post = ""; + + var fulltext = new Object(); + + if(url.indexOf("toc") != -1) { + var items = new Array(); + var links = new Array(); + + var tableRows = doc.evaluate(''//li[div[@class="articleListing_col3"]/label][//input[@name="doi"]]'', doc, + nsResolver, XPathResult.ANY_TYPE, null); + var tableRow; + // Go through table rows + while(tableRow = tableRows.iterateNext()) { + var id = doc.evaluate(''.//input[@name="doi"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, + null).iterateNext().value; + items[id] = Zotero.Utilities.trimInternal(doc.evaluate(''.//label'', tableRow, + nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent); + } + + var items = Zotero.selectItems(items); + if(!items) return true; + + // find all fulltext links so we can determine where we can scrape the fulltext article + var fulltextLinks = doc.evaluate(''//a[starts-with(text(), "Full Text")]'', doc, + nsResolver, XPathResult.ANY_TYPE, null); + var fulltextLink; + while(fulltextLink = fulltextLinks.iterateNext()) { + links.push(fulltextLink.href.toString()); + } + + for(var i in items) { + post += "doi="+encodeURIComponent(i)+"&"; + + // check for fulltext links + for each(var link in links) { + if(link.indexOf(i) != -1) { + fulltext[i] = true; + break; + } + } + } + } else { + var m = url.match(/https?:\/\/[^\/]+\/doi\/[^\/]+\/([^\?]+)(\?|$)/); + if (m) { + var doi = m[1]; + } else { + m = url.match(/https?:\/\/[^\/]+\/links\/doi\/([^\?]+)(\?|$)/); + var doi = m[1]; + } + post += "doi="+encodeURIComponent(doi)+"&"; + + if(url.indexOf("doi/full") != -1 || + doc.evaluate(''//img[@alt="Full Text Article"]'', doc, nsResolver, XPathResult.ANY_TYPE, + null).iterateNext()) { + fulltext[doi] = true; + } + } + + post += "include=cit&downloadFileName=deadbeef&format=refman&direct=on&submit=Download+article+citation+data"; + + Zotero.Utilities.HTTP.doPost("http://www.journals.uchicago.edu/action/downloadCitation", post, function(text) { + // load translator for RIS + var translator = Zotero.loadTranslator("import"); + translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7"); + translator.setString(text); + translator.setHandler("itemDone", function(obj, item) { + item.attachments = [ + {url:item.url, title:"University of Chicago Journals Snapshot", mimeType:"text/html"}, + {url:item.url.replace("/doi/abs", "/doi/pdf"), title:"University of Chicago Full Text PDF", mimeType:"application/pdf"} + ]; + // use fulltext if possible + if(fulltext[item.DOI.substr(4)]) { + item.attachments[0].url = item.attachments[0].url.replace("/doi/abs", "/doi/full"); + } + + item.complete(); + }); + translator.translate(); + + Zotero.done(); + }); + + Zotero.wait(); +}'); + REPLACE INTO translators VALUES ('f8765470-5ace-4a31-b4bd-4327b960ccd', '1.0.0b3.r1', '', '2007-03-24 22:20:00', 1, 100, 4, 'SpringerLink', 'Simon Kornblith', '^https?://(?:www\.springerlink\.com|springerlink.metapress.com)[^/]*/content/', 'function detectWeb(doc, url) { var namespace = doc.documentElement.namespaceURI;