Pushed updated EBSCO and ACS Pubs to repo
Fixed scraping error and invalid HEAD request in ACS Pubs on journals using a different format for the PDF download string (and created #767 to support those). Removed Sean's custom debug line. Such vanity.
This commit is contained in:
parent
4ea517c088
commit
2167094e61
16
scrapers.sql
16
scrapers.sql
|
@ -22,7 +22,7 @@
|
|||
|
||||
|
||||
-- Set the following timestamp to the most recent scraper update date
|
||||
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-09-18 02:00:00'));
|
||||
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-09-18 07:10:00'));
|
||||
|
||||
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
|
||||
'function detectWeb(doc, url) {
|
||||
|
@ -8917,7 +8917,7 @@ function doWeb(doc, url) {
|
|||
Zotero.wait();
|
||||
}');
|
||||
|
||||
REPLACE INTO translators VALUES ('d0b1914a-11f1-4dd7-8557-b32fe8a3dd47', '1.0.0b3.r1', '', '2007-09-18 02:00:00', '1', '100', '4', 'EBSCOhost', 'Simon Kornblith', '^https?://[^/]+/(?:bsi|ehost)/(?:results|detail)',
|
||||
REPLACE INTO translators VALUES ('d0b1914a-11f1-4dd7-8557-b32fe8a3dd47', '1.0.0b3.r1', '', '2007-09-18 07:10:00', '1', '100', '4', 'EBSCOhost', 'Simon Kornblith', '^https?://[^/]+/(?:bsi|ehost)/(?:results|detail)',
|
||||
'function detectWeb(doc, url) {
|
||||
var namespace = doc.documentElement.namespaceURI;
|
||||
var nsResolver = namespace ? function(prefix) {
|
||||
|
@ -11925,7 +11925,7 @@ REPLACE INTO translators VALUES ('a354331-981b-43de-a61-bc26dd1be3a9', '1.0.0b3.
|
|||
});
|
||||
}');
|
||||
|
||||
REPLACE INTO translators VALUES ('938ebe32-2b2e-4349-a5b3-b3a05d3de627', '1.0.0b3.r1', '', '2007-09-17 23:30:00', '1', '100', '4', 'ACS Publications', 'Sean Takats', '[^/]*/(?:wls/journals/query/subscriberResults\.html|acs/journals/toc.page|cgi-bin/(?:article|abstract|sample).cgi/[^/]+/[0-9]+/[0-9]+/i[0-9]+/(?:html|abs)/[^\.]+.html)',
|
||||
REPLACE INTO translators VALUES ('938ebe32-2b2e-4349-a5b3-b3a05d3de627', '1.0.0b3.r1', '', '2007-09-18 07:10:00', '1', '100', '4', 'ACS Publications', 'Sean Takats', '[^/]*/(?:wls/journals/query/subscriberResults\.html|acs/journals/toc.page|cgi-bin/(?:article|abstract|sample).cgi/[^/]+/[0-9]+/[0-9]+/i[0-9]+/(?:html|abs)/[^\.]+.html)',
|
||||
'function detectWeb(doc, url) {
|
||||
var namespace = doc.documentElement.namespaceURI;
|
||||
var nsResolver = namespace ? function(prefix) {
|
||||
|
@ -11956,7 +11956,6 @@ REPLACE INTO translators VALUES ('938ebe32-2b2e-4349-a5b3-b3a05d3de627', '1.0.0b
|
|||
translator.setString(text);
|
||||
translator.setHandler("itemDone", function(obj, item) {
|
||||
var pdf = pdfs.shift();
|
||||
Zotero.debug("takats PDF: "+pdf);
|
||||
if(pdf) {
|
||||
item.attachments.push({
|
||||
title:"ACS Full Text PDF",
|
||||
|
@ -12018,11 +12017,12 @@ function doWeb(doc, url) {
|
|||
var jid = doc.evaluate(''//jid'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
jid = jid.substr(jid.indexOf("/")+1);
|
||||
var pdf = doc.evaluate(''/html/body/a[text()="[PDF version of this article]"]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
if(pdf) pdf = pdf.href;
|
||||
pdf = pdf.replace("searchRedirect.cgi", "article.cgi");
|
||||
|
||||
if (pdf) {
|
||||
pdf = pdf.href;
|
||||
pdf = pdf.replace("searchRedirect.cgi", "article.cgi");
|
||||
pdfs.push(pdf+"?sessid=");
|
||||
}
|
||||
var requests = [{jid:"jid=" + encodeURIComponent(jid)}];
|
||||
pdfs.push(pdf+"?sessid=");
|
||||
}
|
||||
|
||||
handleRequests(requests, pdfs);
|
||||
|
|
Loading…
Reference in New Issue
Block a user