Closes #622 by updating Google Scholar translator with cartesian's code.
This commit is contained in:
parent
a94f14870a
commit
0b8c2a2571
45
scrapers.sql
45
scrapers.sql
|
@ -22,7 +22,7 @@
|
||||||
|
|
||||||
|
|
||||||
-- Set the following timestamp to the most recent scraper update date
|
-- Set the following timestamp to the most recent scraper update date
|
||||||
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-09-25 18:00:00'));
|
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-10-03 08:00:00'));
|
||||||
|
|
||||||
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
|
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
|
||||||
'function detectWeb(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
|
@ -8779,7 +8779,7 @@ REPLACE INTO translators VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '1.0.0b
|
||||||
Zotero.wait();
|
Zotero.wait();
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO translators VALUES ('57a00950-f0d1-4b41-b6ba-44ff0fc30289', '1.0.0b3.r1', '', '2007-03-22 17:40:00', 1, 100, 4, 'Google Scholar', 'Simon Kornblith', '^http://scholar\.google\.[a-z]+/scholar',
|
REPLACE INTO translators VALUES ('57a00950-f0d1-4b41-b6ba-44ff0fc30289', '1.0.0b3.r1', '', '2007-10-03 08:00:00', '1', '100', '4', 'Google Scholar', 'Simon Kornblith', '^http://scholar\.google\.[a-z]+/scholar',
|
||||||
'function detectWeb(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
return "multiple";
|
return "multiple";
|
||||||
}',
|
}',
|
||||||
|
@ -8790,6 +8790,7 @@ function scrape(doc) {
|
||||||
|
|
||||||
var items = new Array();
|
var items = new Array();
|
||||||
var itemGrabLinks = new Array();
|
var itemGrabLinks = new Array();
|
||||||
|
var itemGrabLink;
|
||||||
var links = new Array();
|
var links = new Array();
|
||||||
var types = new Array();
|
var types = new Array();
|
||||||
|
|
||||||
|
@ -8800,21 +8801,23 @@ function scrape(doc) {
|
||||||
XPathResult.ANY_TYPE, null);
|
XPathResult.ANY_TYPE, null);
|
||||||
var elmt;
|
var elmt;
|
||||||
var i=0;
|
var i=0;
|
||||||
Zotero.debug("get elms");
|
Zotero.debug("get elmts");
|
||||||
while(elmt = elmts.iterateNext()) {
|
while(elmt = elmts.iterateNext()) {
|
||||||
var isCitation = doc.evaluate("./font[1]/b[1]/text()[1]", elmt, nsResolver,
|
var isCitation = doc.evaluate("./font[1]/b[1]/text()[1]", elmt, nsResolver,
|
||||||
XPathResult.ANY_TYPE, null).iterateNext();
|
XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
|
|
||||||
// use EndNote links if available
|
// use EndNote links if available
|
||||||
if(haveEndNoteLinks) {
|
if(haveEndNoteLinks) {
|
||||||
var itemGrabLink = doc.evaluate(''.//a[text() = "Import into EndNote"]'',
|
itemGrabLink = doc.evaluate(''.//a[text() = "Import into EndNote"]'',
|
||||||
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
} else {
|
} else {
|
||||||
var itemGrabLink = doc.evaluate(''.//a[text() = "Related Articles"]'',
|
itemGrabLink = doc.evaluate(''.//a[text() = "Related Articles"]'',
|
||||||
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
}
|
}
|
||||||
|
|
||||||
var noLinkRe = /^\[[^\]]+\]$/;
|
var noLinkRe = /^\[[^\]]+\]$/;
|
||||||
if(itemGrabLinks) {
|
|
||||||
|
if(itemGrabLink) {
|
||||||
itemGrabLinks[i] = itemGrabLink.href;
|
itemGrabLinks[i] = itemGrabLink.href;
|
||||||
if(isCitation && noLinkRe.test(isCitation.textContent)) {
|
if(isCitation && noLinkRe.test(isCitation.textContent)) {
|
||||||
// get titles for [BOOK] or [CITATION] entries
|
// get titles for [BOOK] or [CITATION] entries
|
||||||
|
@ -8830,12 +8833,13 @@ function scrape(doc) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if(items[i]) {
|
if(items[i]) {
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
items = Zotero.selectItems(items);
|
items = Zotero.selectItems(items);
|
||||||
|
|
||||||
if(!items) {
|
if(!items) {
|
||||||
if(Zotero.done) Zotero.done(true);
|
if(Zotero.done) Zotero.done(true);
|
||||||
return true;
|
return true;
|
||||||
|
@ -8877,7 +8881,8 @@ function scrape(doc) {
|
||||||
function doWeb(doc, url) {
|
function doWeb(doc, url) {
|
||||||
var nsResolver = doc.createNSResolver(doc.documentElement);
|
var nsResolver = doc.createNSResolver(doc.documentElement);
|
||||||
|
|
||||||
doc.cookie = "GSP=ID=deadbeefdeadbeef:IN=ebe89f7e83a8fe75+7e6cc990821af63:CF=3; domain=.scholar.google.com";
|
//SR:Will use preference setting url instead of cookie to get EndNote links (works with ezproxy, doesn''t overwrite other prefs)
|
||||||
|
//doc.cookie = "GSP=ID=deadbeefdeadbeef:IN=ebe89f7e83a8fe75+7e6cc990821af63:CF=3; domain=.scholar.google.com";
|
||||||
|
|
||||||
// determine if we need to reload the page
|
// determine if we need to reload the page
|
||||||
|
|
||||||
|
@ -8886,18 +8891,24 @@ function doWeb(doc, url) {
|
||||||
haveEndNoteLinks = doc.evaluate(''//a[text() = "Import into EndNote"]'',
|
haveEndNoteLinks = doc.evaluate(''//a[text() = "Import into EndNote"]'',
|
||||||
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
if(!haveEndNoteLinks) {
|
if(!haveEndNoteLinks) {
|
||||||
// next check if there are docs with no related articles
|
// SR:Commenting out this bit as code for retrieving citations from "Related" links is unreliable and unnecessary
|
||||||
if(doc.evaluate(''//p[@class="g"][not(descendant-or-self::text() = "Related Articles")]'',
|
//// next check if there are docs with no related articles
|
||||||
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
//if(doc.evaluate(''''//p[@class="g"][not(descendant-or-self::text() = "Related Articles")]'''',
|
||||||
// now it''s reload time
|
// doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||||
haveEndNoteLinks = true;
|
|
||||||
Zotero.Utilities.loadDocument(url, scrape);
|
|
||||||
|
|
||||||
return;
|
// SR:Set preferences to show import links in English and do page reload
|
||||||
}
|
// (bit of a hack as it overwrites user prefs for language and import link type)
|
||||||
|
url = url.replace (/hl\=[^&]*&?/, "");
|
||||||
|
url = url.replace("scholar?", "scholar_setprefs?hl=en&scis=yes&scisf=3&submit=Save+Preferences&");
|
||||||
|
haveEndNoteLinks = true;
|
||||||
|
Zotero.Utilities.loadDocument(url, scrape);
|
||||||
|
Zotero.wait();
|
||||||
|
return;
|
||||||
|
//}
|
||||||
}
|
}
|
||||||
|
|
||||||
scrape(doc, url);
|
scrape(doc, url);
|
||||||
|
Zotero.wait();
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO translators VALUES ('9c335444-a562-4f88-b291-607e8f46a9bb', '1.0.0b3.r1', '', '2006-12-15 15:11:00', 1, 100, 4, 'Berkeley Library Catalog', 'Simon Kornblith', '^https?://[^/]*berkeley.edu[^/]*/WebZ/(?:html/results.html|FETCH)\?.*sessionid=',
|
REPLACE INTO translators VALUES ('9c335444-a562-4f88-b291-607e8f46a9bb', '1.0.0b3.r1', '', '2006-12-15 15:11:00', 1, 100, 4, 'Berkeley Library Catalog', 'Simon Kornblith', '^https?://[^/]*berkeley.edu[^/]*/WebZ/(?:html/results.html|FETCH)\?.*sessionid=',
|
||||||
|
|
Loading…
Reference in New Issue
Block a user