- add DB load and save features to Scaffold
- add ECL notice to XUL files that were missing it
This commit is contained in:
parent
666418ae64
commit
511f7ec77d
|
@ -173,7 +173,6 @@ Zotero_Ingester_Interface.contentLoad = function(event) {
|
||||||
// if there's already a scrapable page in the browser window, and it's
|
// if there's already a scrapable page in the browser window, and it's
|
||||||
// still there, ensure it is actually part of the page, then return
|
// still there, ensure it is actually part of the page, then return
|
||||||
if(data.translators && data.translators.length && data.document.location) {
|
if(data.translators && data.translators.length && data.document.location) {
|
||||||
Zotero.debug("already scrapable?");
|
|
||||||
if(Zotero_Ingester_Interface.searchFrames(rootDoc, data.document)) {
|
if(Zotero_Ingester_Interface.searchFrames(rootDoc, data.document)) {
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
|
@ -327,6 +326,7 @@ Zotero_Ingester_Interface._getData = function(browser) {
|
||||||
* Deletes the document object associated with a given browser window object
|
* Deletes the document object associated with a given browser window object
|
||||||
*/
|
*/
|
||||||
Zotero_Ingester_Interface._deleteData = function(browser) {
|
Zotero_Ingester_Interface._deleteData = function(browser) {
|
||||||
|
if(!browser) return false;
|
||||||
try {
|
try {
|
||||||
var key = browser.getAttribute("zotero-key");
|
var key = browser.getAttribute("zotero-key");
|
||||||
if(Zotero_Ingester_Interface.browserData[key]) {
|
if(Zotero_Ingester_Interface.browserData[key]) {
|
||||||
|
|
|
@ -1,4 +1,25 @@
|
||||||
<?xml version="1.0"?>
|
<?xml version="1.0"?>
|
||||||
|
<!--
|
||||||
|
***** BEGIN LICENSE BLOCK *****
|
||||||
|
|
||||||
|
Copyright (c) 2006 Center for History and New Media
|
||||||
|
George Mason University, Fairfax, Virginia, USA
|
||||||
|
http://chnm.gmu.edu
|
||||||
|
|
||||||
|
Licensed under the Educational Community License, Version 1.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.opensource.org/licenses/ecl1.php
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
||||||
|
***** END LICENSE BLOCK *****
|
||||||
|
-->
|
||||||
<overlay id="zotero-ingester-overlay" xmlns="http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul">
|
<overlay id="zotero-ingester-overlay" xmlns="http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul">
|
||||||
|
|
||||||
<script src="../include.js"/>
|
<script src="../include.js"/>
|
||||||
|
|
|
@ -1,5 +1,25 @@
|
||||||
<?xml version="1.0"?>
|
<?xml version="1.0"?>
|
||||||
|
<!--
|
||||||
|
***** BEGIN LICENSE BLOCK *****
|
||||||
|
|
||||||
|
Copyright (c) 2006 Center for History and New Media
|
||||||
|
George Mason University, Fairfax, Virginia, USA
|
||||||
|
http://chnm.gmu.edu
|
||||||
|
|
||||||
|
Licensed under the Educational Community License, Version 1.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.opensource.org/licenses/ecl1.php
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
||||||
|
***** END LICENSE BLOCK *****
|
||||||
|
-->
|
||||||
<?xml-stylesheet href="chrome://global/skin/" type="text/css"?>
|
<?xml-stylesheet href="chrome://global/skin/" type="text/css"?>
|
||||||
<?xml-stylesheet href="chrome://zotero/skin/zotero.css" type="text/css"?>
|
<?xml-stylesheet href="chrome://zotero/skin/zotero.css" type="text/css"?>
|
||||||
|
|
||||||
|
|
207
scrapers.sql
207
scrapers.sql
|
@ -1,4 +1,4 @@
|
||||||
-- 144
|
-- 145
|
||||||
|
|
||||||
-- ***** BEGIN LICENSE BLOCK *****
|
-- ***** BEGIN LICENSE BLOCK *****
|
||||||
--
|
--
|
||||||
|
@ -22,7 +22,7 @@
|
||||||
|
|
||||||
|
|
||||||
-- Set the following timestamp to the most recent scraper update date
|
-- Set the following timestamp to the most recent scraper update date
|
||||||
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-12-17 21:49:18'));
|
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-12-18 01:28:04'));
|
||||||
|
|
||||||
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b3.r1', '', '2006-12-15 03:40:00', 1, 100, 4, 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
|
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b3.r1', '', '2006-12-15 03:40:00', 1, 100, 4, 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
|
||||||
'function detectWeb(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
|
@ -5410,8 +5410,20 @@ function doWeb(doc, url) {
|
||||||
Zotero.wait();
|
Zotero.wait();
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO translators VALUES ('21ad38-3830-4836-aed7-7b5c2dbfa740', '1.0.0b3r1', '', '2006-12-17 21:49:18', '1', '100', '4', 'ISI Web of Science', 'Simon Kornblith', '^http://[^/]+/WoS/CIW\.cgi',
|
REPLACE INTO translators VALUES ('21ad38-3830-4836-aed7-7b5c2dbfa740', '1.0.0b3r1', '', '2006-12-18 01:28:04', '1', '100', '4', 'ISI Web of Knowledge', 'Simon Kornblith', '^https?://[^/]+/(?:[^/]+/CIW\.cgi|portal\.cgi)',
|
||||||
'function detectWeb(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
|
var namespace = doc.documentElement.namespaceURI;
|
||||||
|
var nsResolver = namespace ? function(prefix) {
|
||||||
|
if (prefix == ''x'') return namespace; else return null;
|
||||||
|
} : null;
|
||||||
|
|
||||||
|
// require a link to Thomson at the bottom, to weed out other CGIs that
|
||||||
|
// happen to be called CIW.cgi
|
||||||
|
if(!doc.evaluate(''//p[@class="copyright"]/a[@href="http://www.thomson.com/scientific/scientific.jsp"]'',
|
||||||
|
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if(doc.title.substr(0, 11) == "Full Record") {
|
if(doc.title.substr(0, 11) == "Full Record") {
|
||||||
return "journalArticle";
|
return "journalArticle";
|
||||||
} else if(doc.title.substr(0, 14) == "Search Results") {
|
} else if(doc.title.substr(0, 14) == "Search Results") {
|
||||||
|
@ -5420,51 +5432,7 @@ REPLACE INTO translators VALUES ('21ad38-3830-4836-aed7-7b5c2dbfa740', '1.0.0b3r
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}',
|
}',
|
||||||
'function doWeb(doc, url) {
|
'function query(formAction, post, docOrUrls, done) {
|
||||||
var namespace = doc.documentElement.namespaceURI;
|
|
||||||
var nsResolver = namespace ? function(prefix) {
|
|
||||||
if (prefix == ''x'') return namespace; else return null;
|
|
||||||
} : null;
|
|
||||||
|
|
||||||
var urls = null;
|
|
||||||
var post = "";
|
|
||||||
|
|
||||||
// get form action
|
|
||||||
var formAction = doc.getElementsByTagName("form")[0].action;
|
|
||||||
|
|
||||||
// get hidden fields to add to post string
|
|
||||||
var hiddenFields = doc.evaluate(''//input[@type="hidden"]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
||||||
var hiddenField;
|
|
||||||
while(hiddenField = hiddenFields.iterateNext()) {
|
|
||||||
post += "&"+hiddenField.name+"="+encodeURIComponent(hiddenField.value);
|
|
||||||
}
|
|
||||||
|
|
||||||
if(doc.title.substr(0, 14) == "Search Results") {
|
|
||||||
var items = new Array();
|
|
||||||
var links = new Array();
|
|
||||||
|
|
||||||
var tableRows = doc.evaluate(''//tr[substring(@id, 1, 7) = "RECORD_"]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
||||||
var tableRow;
|
|
||||||
while(tableRow = tableRows.iterateNext()) {
|
|
||||||
var id = tableRow.getElementsByTagName("input")[0].value;
|
|
||||||
var link = tableRow.getElementsByTagName("a")[0];
|
|
||||||
items[id] = link.textContent;
|
|
||||||
links[id] = link.href;
|
|
||||||
}
|
|
||||||
|
|
||||||
items = Zotero.selectItems(items);
|
|
||||||
if(!items) return true;
|
|
||||||
|
|
||||||
var urls = new Array();
|
|
||||||
for(var code in items) {
|
|
||||||
post += "&marked_list_candidates="+encodeURIComponent(code);
|
|
||||||
urls.push(links[id]);
|
|
||||||
}
|
|
||||||
post += "&mark_selection=selected_records&Export.x=10&Export.y=10";
|
|
||||||
} else {
|
|
||||||
post += "&ExportOne.x=10&ExportOne.y=10"
|
|
||||||
}
|
|
||||||
|
|
||||||
post = post.substr(1)+"&fields=FullNoCitRef";
|
post = post.substr(1)+"&fields=FullNoCitRef";
|
||||||
|
|
||||||
Zotero.Utilities.HTTP.doPost(formAction, post, function(text) {
|
Zotero.Utilities.HTTP.doPost(formAction, post, function(text) {
|
||||||
|
@ -5537,10 +5505,10 @@ REPLACE INTO translators VALUES ('21ad38-3830-4836-aed7-7b5c2dbfa740', '1.0.0b3r
|
||||||
// theoretically, there could be book types, but I don''t know what the codes
|
// theoretically, there could be book types, but I don''t know what the codes
|
||||||
// are and Thomson is unlikely to help me figure that out
|
// are and Thomson is unlikely to help me figure that out
|
||||||
item = new Zotero.Item("journalArticle");
|
item = new Zotero.Item("journalArticle");
|
||||||
if(urls) {
|
if(docOrUrls.location) {
|
||||||
item.attachments = [{title:"ISI Web of Science Snapshot", url:urls.shift(), mimeType:"text/html"}];
|
item.attachments = [{title:"ISI Web of Science Snapshot", document:docOrUrls}];
|
||||||
} else {
|
} else {
|
||||||
item.attachments = [{title:"ISI Web of Science Snapshot", document:doc}];
|
item.attachments = [{title:"ISI Web of Science Snapshot", url:docOrUrls.shift(), mimeType:"text/html"}];
|
||||||
}
|
}
|
||||||
field = content = undefined;
|
field = content = undefined;
|
||||||
} else if(field == "ER") {
|
} else if(field == "ER") {
|
||||||
|
@ -5550,11 +5518,146 @@ REPLACE INTO translators VALUES ('21ad38-3830-4836-aed7-7b5c2dbfa740', '1.0.0b3r
|
||||||
} else {
|
} else {
|
||||||
content += "\n"+Zotero.Utilities.cleanString(line);
|
content += "\n"+Zotero.Utilities.cleanString(line);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(done) {
|
||||||
|
done();
|
||||||
|
} else {
|
||||||
Zotero.done();
|
Zotero.done();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function crossSearchFetch(services, SID) {
|
||||||
|
// if we''ve fetched everything, we''re done
|
||||||
|
if(!services.length) {
|
||||||
|
Zotero.done();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var service = services.shift();
|
||||||
|
Zotero.debug(service);
|
||||||
|
|
||||||
|
// execute requests
|
||||||
|
var post = "&SID="+SID+"&all_summary_UTs="+service.items.join("%3B");
|
||||||
|
|
||||||
|
// add marked_list_candidates
|
||||||
|
var i = 1;
|
||||||
|
for each(var marked_list_candidate in service.items) {
|
||||||
|
post += "&marked_list_candidates="+marked_list_candidate+"%2F"+i;
|
||||||
|
}
|
||||||
|
post += "&mark_selection=selected_records&Export.x=10&Export.y=10";
|
||||||
|
|
||||||
|
// do query
|
||||||
|
query(service.URL, post, service.itemURLs, function() { crossSearchFetch(services, SID) });
|
||||||
|
}
|
||||||
|
|
||||||
|
function doWeb(doc, url) {
|
||||||
|
var namespace = doc.documentElement.namespaceURI;
|
||||||
|
var nsResolver = namespace ? function(prefix) {
|
||||||
|
if (prefix == ''x'') return namespace; else return null;
|
||||||
|
} : null;
|
||||||
|
|
||||||
|
var post = "";
|
||||||
|
|
||||||
|
// get hidden fields to add to post string
|
||||||
|
var hiddenFields = doc.evaluate(''//input[@type="hidden"]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||||||
|
var hiddenField;
|
||||||
|
while(hiddenField = hiddenFields.iterateNext()) {
|
||||||
|
post += "&"+hiddenField.name+"="+encodeURIComponent(hiddenField.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(doc.title.substr(0, 14) == "Search Results") {
|
||||||
|
var items = new Array();
|
||||||
|
var links = new Array();
|
||||||
|
var tableRow;
|
||||||
|
|
||||||
|
if(url.indexOf("/portal.cgi") != -1 || url.indexOf("/XS/CIW.cgi") != -1) {
|
||||||
|
// CrossSearch
|
||||||
|
var tableRows = doc.evaluate(''//tr[td/span/input[@name="marked_list_candidates"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||||||
|
while(tableRow = tableRows.iterateNext()) {
|
||||||
|
var id = tableRow.getElementsByTagName("input")[0].value;
|
||||||
|
|
||||||
|
items[id] = tableRow.getElementsByTagName("b")[0].textContent;
|
||||||
|
|
||||||
|
var linkList = tableRow.getElementsByTagName("a");
|
||||||
|
for each(var link in linkList) {
|
||||||
|
if(link.href && link.href.indexOf("&Func=TransferToPublisher&") != -1) {
|
||||||
|
links[id] = link.href;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
items = Zotero.selectItems(items);
|
||||||
|
if(!items) return true;
|
||||||
|
|
||||||
|
var serviceRe = /^(https?:\/\/[^\/]+\/).*%26SrcAuth%3D([^%]+)%26/;
|
||||||
|
var queries = new Object();
|
||||||
|
var urls = new Object();
|
||||||
|
|
||||||
|
// contains an array of service objects with service, URL, itemURLs, and items properties
|
||||||
|
var services = new Array();
|
||||||
|
|
||||||
|
// build up object of request URL => [marked_list_candidates]
|
||||||
|
for(var id in items) {
|
||||||
|
var foundService = null;
|
||||||
|
|
||||||
|
var m = serviceRe.exec(links[id]);
|
||||||
|
for each(var service in services) {
|
||||||
|
if(service.service == m[2]) {
|
||||||
|
foundService = service;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!foundService) {
|
||||||
|
foundService = new Object();
|
||||||
|
foundService.service = m[2];
|
||||||
|
foundService.URL = m[1]+m[2]+"/CIW.cgi";
|
||||||
|
foundService.itemURLs = new Array();
|
||||||
|
foundService.items = new Array();
|
||||||
|
services.push(foundService);
|
||||||
|
}
|
||||||
|
|
||||||
|
foundService.items.push(id.substr(id.indexOf(":")+1));
|
||||||
|
foundService.itemURLs.push(links[id]);
|
||||||
|
}
|
||||||
|
|
||||||
|
var SID = doc.getElementsByName("SID")[0].value;
|
||||||
|
crossSearchFetch(services, SID);
|
||||||
|
} else {
|
||||||
|
var tableRows = doc.evaluate(''//tr[td/input[@name="marked_list_candidates"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||||||
|
while(tableRow = tableRows.iterateNext()) {
|
||||||
|
var id = tableRow.getElementsByTagName("input")[0].value;
|
||||||
|
var link = tableRow.getElementsByTagName("a")[0];
|
||||||
|
items[id] = link.textContent;
|
||||||
|
links[id] = link.href;
|
||||||
|
}
|
||||||
|
|
||||||
|
items = Zotero.selectItems(items);
|
||||||
|
if(!items) return true;
|
||||||
|
|
||||||
|
var urls = new Array();
|
||||||
|
for(var code in items) {
|
||||||
|
post += "&marked_list_candidates="+encodeURIComponent(code);
|
||||||
|
urls.push(links[id]);
|
||||||
|
}
|
||||||
|
post += "&mark_selection=selected_records&Export.x=10&Export.y=10";
|
||||||
|
|
||||||
|
// get form action
|
||||||
|
var formAction = doc.getElementsByTagName("form")[0].action;
|
||||||
|
// run query
|
||||||
|
query(formAction, post, urls);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
post += "&ExportOne.x=10&ExportOne.y=10"
|
||||||
|
|
||||||
|
// get form action
|
||||||
|
var formAction = doc.getElementsByTagName("form")[0].action;
|
||||||
|
// run query
|
||||||
|
query(formAction, post, doc);
|
||||||
|
}
|
||||||
|
|
||||||
Zotero.wait();
|
Zotero.wait();
|
||||||
}');
|
}');
|
||||||
|
|
Loading…
Reference in New Issue
Block a user