From 110bb1b685ab0b93d76ea58b56f6bd61c3f51d3a Mon Sep 17 00:00:00 2001 From: Sean Takats Date: Mon, 17 Sep 2007 21:01:08 +0000 Subject: [PATCH] Fixes PDF download for ACS. Maybe. --- scrapers.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scrapers.sql b/scrapers.sql index 3daf4b3ac..3a1c68830 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -22,7 +22,7 @@ -- Set the following timestamp to the most recent scraper update date -REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-09-15 21:00:00')); +REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-09-17 23:00:00')); REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon', 'function detectWeb(doc, url) { @@ -11920,7 +11920,7 @@ REPLACE INTO translators VALUES ('a354331-981b-43de-a61-bc26dd1be3a9', '1.0.0b3. }); }'); -REPLACE INTO translators VALUES ('938ebe32-2b2e-4349-a5b3-b3a05d3de627', '1.0.0b3.r1', '', '2007-04-29 17:30:00', '1', '100', '4', 'ACS Publications', 'Sean Takats', '[^/]*/(?:wls/journals/query/subscriberResults\.html|acs/journals/toc.page|cgi-bin/(?:article|abstract|sample).cgi/[^/]+/[0-9]+/[0-9]+/i[0-9]+/(?:html|abs)/[^\.]+.html)', +REPLACE INTO translators VALUES ('938ebe32-2b2e-4349-a5b3-b3a05d3de627', '1.0.0b3.r1', '', '2007-09-17 23:00:00', '1', '100', '4', 'ACS Publications', 'Sean Takats', '[^/]*/(?:wls/journals/query/subscriberResults\.html|acs/journals/toc.page|cgi-bin/(?:article|abstract|sample).cgi/[^/]+/[0-9]+/[0-9]+/i[0-9]+/(?:html|abs)/[^\.]+.html)', 'function detectWeb(doc, url) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { @@ -12015,6 +12015,7 @@ function doWeb(doc, url) { jid = jid.substr(jid.indexOf("/")+1); var pdf = doc.evaluate(''/html/body/a[text()="[PDF version of this article]"]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); if(pdf) pdf = pdf.href; + pdf = pdf.replace("searchRedirect.cgi", "article.cgi"); var requests = [{jid:"jid=" + encodeURIComponent(jid)}]; pdfs.push(pdf+"?sessid=");