closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.
This commit is contained in:
parent
9e5c15423a
commit
216f0c7581
|
@ -148,7 +148,6 @@ Scholar.OpenURL = new function() {
|
||||||
this.discoverResolvers = discoverResolvers;
|
this.discoverResolvers = discoverResolvers;
|
||||||
this.createContextObject = createContextObject;
|
this.createContextObject = createContextObject;
|
||||||
this.parseContextObject = parseContextObject;
|
this.parseContextObject = parseContextObject;
|
||||||
this.lookupContextObject = lookupContextObject;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns a URL to look up an item in the OpenURL resolver
|
* Returns a URL to look up an item in the OpenURL resolver
|
||||||
|
@ -305,12 +304,16 @@ Scholar.OpenURL = new function() {
|
||||||
/*
|
/*
|
||||||
* Generates an item in the format returned by item.fromArray() given an
|
* Generates an item in the format returned by item.fromArray() given an
|
||||||
* OpenURL version 1.0 contextObject
|
* OpenURL version 1.0 contextObject
|
||||||
|
*
|
||||||
|
* accepts an item array to fill, or creates and returns a new item array
|
||||||
*/
|
*/
|
||||||
function parseContextObject(co) {
|
function parseContextObject(co, item) {
|
||||||
var coParts = co.split("&");
|
var coParts = co.split("&");
|
||||||
|
|
||||||
|
if(!item) {
|
||||||
var item = new Array();
|
var item = new Array();
|
||||||
item.creators = new Array();
|
item.creators = new Array();
|
||||||
|
}
|
||||||
|
|
||||||
// get type
|
// get type
|
||||||
item.itemType = _determineResourceType(coParts);
|
item.itemType = _determineResourceType(coParts);
|
||||||
|
@ -416,157 +419,6 @@ Scholar.OpenURL = new function() {
|
||||||
return item;
|
return item;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Looks up additional information on an item in the format returned by
|
|
||||||
* item.fromArray() in CrossRef or Open WorldCat given an OpenURL version
|
|
||||||
* 1.0 contextObject
|
|
||||||
*/
|
|
||||||
function lookupContextObject(co, done, error) {
|
|
||||||
// CrossRef requires a url_ver to work right
|
|
||||||
if(co.indexOf("url_ver=Z39.88-2004") == -1) {
|
|
||||||
co = "url_ver=Z39.88-2004&"+co;
|
|
||||||
}
|
|
||||||
|
|
||||||
var type = _determineResourceType(co.split("&"));
|
|
||||||
if(!type) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(type == "journal") {
|
|
||||||
// look up journals in CrossRef
|
|
||||||
Scholar.Utilities.HTTP.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", null, function(req) {
|
|
||||||
var items = _processCrossRef(req.responseText);
|
|
||||||
done(items);
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
// look up books in Open WorldCat
|
|
||||||
Scholar.Utilities.HTTP.processDocuments(null, ["http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co], function(browser) {
|
|
||||||
var doc = browser.contentDocument;
|
|
||||||
// find new COinS in the Open WorldCat page
|
|
||||||
items = _processOWC(doc);
|
|
||||||
|
|
||||||
if(items) { // we got a single item page; return the item
|
|
||||||
done(items);
|
|
||||||
} else { // assume we have a search results page
|
|
||||||
var items = new Array();
|
|
||||||
|
|
||||||
var namespace = doc.documentElement.namespaceURI;
|
|
||||||
var nsResolver = namespace ? function(prefix) {
|
|
||||||
if (prefix == 'x') return namespace; else return null;
|
|
||||||
} : null;
|
|
||||||
|
|
||||||
// first try to get only books
|
|
||||||
var elmts = doc.evaluate('//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
|
|
||||||
var elmt = elmts.iterateNext();
|
|
||||||
if(!elmt) { // if that fails, look for other options
|
|
||||||
var elmts = doc.evaluate('//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
|
|
||||||
elmt = elmts.iterateNext()
|
|
||||||
}
|
|
||||||
|
|
||||||
var urlsToProcess = new Array();
|
|
||||||
do {
|
|
||||||
urlsToProcess.push(elmt.href);
|
|
||||||
} while(elmt = elmts.iterateNext());
|
|
||||||
|
|
||||||
Scholar.Utilities.HTTP.processDocuments(null, urlsToProcess, function(browser) {
|
|
||||||
// per URL
|
|
||||||
var newItems = _processOWC(browser.contentDocument);
|
|
||||||
if(newItems) {
|
|
||||||
items = items.concat(newItems);
|
|
||||||
}
|
|
||||||
}, function() { // done
|
|
||||||
done(items);
|
|
||||||
}, function() { // error
|
|
||||||
error();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}, null, function() {
|
|
||||||
error();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Processes the XML format returned by CrossRef
|
|
||||||
*/
|
|
||||||
function _processCrossRef(xmlOutput) {
|
|
||||||
xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
|
|
||||||
|
|
||||||
// parse XML with E4X
|
|
||||||
var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
|
|
||||||
try {
|
|
||||||
var xml = new XML(xmlOutput);
|
|
||||||
} catch(e) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ensure status is valid
|
|
||||||
var status = xml.qr::body.qr::query.@status.toString();
|
|
||||||
if(status != "resolved" && status != "multiresolved") {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
var query = xml.qr::body.qr::query;
|
|
||||||
var item = new Array();
|
|
||||||
item.creators = new Array();
|
|
||||||
|
|
||||||
// try to get a DOI
|
|
||||||
item.DOI = query.qr::doi.(@type=="journal_article").toString();
|
|
||||||
if(!item.DOI) {
|
|
||||||
item.DOI = query.qr::doi.(@type=="book_title").toString();
|
|
||||||
}
|
|
||||||
if(!item.DOI) {
|
|
||||||
item.DOI = query.qr::doi.(@type=="book_content").toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
// try to get an ISSN (no print/electronic preferences)
|
|
||||||
item.ISSN = query.qr::issn.toString();
|
|
||||||
// get title
|
|
||||||
item.title = query.qr::article_title.toString();
|
|
||||||
// get publicationTitle
|
|
||||||
item.publicationTitle = query.qr::journal_title.toString();
|
|
||||||
// get author
|
|
||||||
item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.toString(), "author", true));
|
|
||||||
// get volume
|
|
||||||
item.volume = query.qr::volume.toString();
|
|
||||||
// get issue
|
|
||||||
item.issue = query.qr::issue.toString();
|
|
||||||
// get year
|
|
||||||
item.date = query.qr::year.toString();
|
|
||||||
// get edition
|
|
||||||
item.edition = query.qr::edition_number.toString();
|
|
||||||
// get first page
|
|
||||||
item.pages = query.qr::first_page.toString();
|
|
||||||
|
|
||||||
return [item];
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Parses a document object referring to an Open WorldCat entry for its
|
|
||||||
* OpenURL contextObject, then returns an item generated from this
|
|
||||||
* contextObject
|
|
||||||
*/
|
|
||||||
function _processOWC(doc) {
|
|
||||||
var spanTags = doc.getElementsByTagName("span");
|
|
||||||
for(var i=0; i<spanTags.length; i++) {
|
|
||||||
var spanClass = spanTags[i].getAttribute("class");
|
|
||||||
if(spanClass) {
|
|
||||||
var spanClasses = spanClass.split(" ");
|
|
||||||
if(Scholar.inArray("Z3988", spanClasses)) {
|
|
||||||
var spanTitle = spanTags[i].getAttribute("title");
|
|
||||||
var item = parseContextObject(spanTitle);
|
|
||||||
if(item) {
|
|
||||||
return [item];
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Determines the type of an OpenURL contextObject
|
* Determines the type of an OpenURL contextObject
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
* export
|
* export
|
||||||
* import
|
* import
|
||||||
* web
|
* web
|
||||||
|
* search
|
||||||
*
|
*
|
||||||
* a typical export process:
|
* a typical export process:
|
||||||
* var translatorObj = new Scholar.Translate();
|
* var translatorObj = new Scholar.Translate();
|
||||||
|
@ -35,7 +36,10 @@
|
||||||
* location - the location of the target (read-only; set with setLocation)
|
* location - the location of the target (read-only; set with setLocation)
|
||||||
* for import/export - this is an instance of nsILocalFile
|
* for import/export - this is an instance of nsILocalFile
|
||||||
* for web - this is a URL
|
* for web - this is a URL
|
||||||
|
* item - item to be used for searching (read-only; set with setItem)
|
||||||
* path - the path to the target; for web, this is the same as location
|
* path - the path to the target; for web, this is the same as location
|
||||||
|
* saveItem - whether new items should be saved to the database. defaults to
|
||||||
|
* true; set using second argument of constructor.
|
||||||
*
|
*
|
||||||
* PRIVATE PROPERTIES:
|
* PRIVATE PROPERTIES:
|
||||||
*
|
*
|
||||||
|
@ -49,6 +53,10 @@
|
||||||
* _sandbox - sandbox in which translators will be executed
|
* _sandbox - sandbox in which translators will be executed
|
||||||
* _streams - streams that need to be closed when execution is complete
|
* _streams - streams that need to be closed when execution is complete
|
||||||
* _IDMap - a map from IDs as specified in Scholar.Item() to IDs of actual items
|
* _IDMap - a map from IDs as specified in Scholar.Item() to IDs of actual items
|
||||||
|
* _parentTranslator - set when a translator is called from another translator.
|
||||||
|
* among other things, disables passing of the translate
|
||||||
|
* object to handlers and modifies complete() function on
|
||||||
|
* returned items
|
||||||
*
|
*
|
||||||
* WEB-ONLY PRIVATE PROPERTIES:
|
* WEB-ONLY PRIVATE PROPERTIES:
|
||||||
*
|
*
|
||||||
|
@ -56,24 +64,42 @@
|
||||||
* an EZProxy
|
* an EZProxy
|
||||||
*/
|
*/
|
||||||
|
|
||||||
Scholar.Translate = function(type) {
|
Scholar.Translate = function(type, saveItem) {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
|
|
||||||
// import = 001 = 1
|
// import = 0001 = 1
|
||||||
// export = 010 = 2
|
// export = 0010 = 2
|
||||||
// web = 100 = 4
|
// web = 0100 = 4
|
||||||
|
// search = 1000 = 8
|
||||||
|
|
||||||
// combination types determined by addition or bitwise AND
|
// combination types determined by addition or bitwise AND
|
||||||
// i.e., import+export = 1+2 = 3
|
// i.e., import+export = 1+2 = 3
|
||||||
|
this._numericTypes = "";
|
||||||
|
for(var i=0; i<=1; i++) {
|
||||||
|
for(var j=0; j<=1; j++) {
|
||||||
|
for(var k=0; k<=1; k++) {
|
||||||
if(type == "import") {
|
if(type == "import") {
|
||||||
this._numericTypes = "1,3,5,7";
|
this._numericTypes += ","+parseInt(i.toString()+j.toString()+k.toString()+"1", 2);
|
||||||
} else if(type == "export") {
|
} else if(type == "export") {
|
||||||
this._numericTypes = "2,3,6,7";
|
this._numericTypes += ","+parseInt(i.toString()+j.toString()+"1"+k.toString(), 2);
|
||||||
} else if(type == "web") {
|
} else if(type == "web") {
|
||||||
this._numericTypes = "4,5,6,7";
|
this._numericTypes += ","+parseInt(i.toString()+"1"+j.toString()+k.toString(), 2);
|
||||||
|
} else if(type == "search") {
|
||||||
|
this._numericTypes += ","+parseInt("1"+i.toString()+j.toString()+k.toString(), 2);
|
||||||
} else {
|
} else {
|
||||||
throw("invalid import type");
|
throw("invalid import type");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this._numericTypes = this._numericTypes.substr(1);
|
||||||
|
|
||||||
|
if(saveItem === false) { // three equals signs means if it's left
|
||||||
|
// undefined, this.saveItem will still be true
|
||||||
|
this.saveItem = false;
|
||||||
|
} else {
|
||||||
|
this.saveItem = true;
|
||||||
|
}
|
||||||
|
|
||||||
this._handlers = new Array();
|
this._handlers = new Array();
|
||||||
this._streams = new Array();
|
this._streams = new Array();
|
||||||
|
@ -87,6 +113,13 @@ Scholar.Translate.prototype.setBrowser = function(browser) {
|
||||||
this.setLocation(browser.contentDocument.location.href);
|
this.setLocation(browser.contentDocument.location.href);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* sets the item to be used for searching
|
||||||
|
*/
|
||||||
|
Scholar.Translate.prototype.setItem = function(item) {
|
||||||
|
this.item = item;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* sets the location to operate upon (file should be an nsILocalFile object or
|
* sets the location to operate upon (file should be an nsILocalFile object or
|
||||||
* web address)
|
* web address)
|
||||||
|
@ -112,12 +145,41 @@ Scholar.Translate.prototype.setLocation = function(location) {
|
||||||
* accepts either the object from getTranslators() or an ID
|
* accepts either the object from getTranslators() or an ID
|
||||||
*/
|
*/
|
||||||
Scholar.Translate.prototype.setTranslator = function(translator) {
|
Scholar.Translate.prototype.setTranslator = function(translator) {
|
||||||
if(typeof(translator) == "object") { // passed an object and not an ID
|
if(!translator) {
|
||||||
translator = translator.translatorID;
|
throw("cannot set translator: invalid value");
|
||||||
}
|
}
|
||||||
|
|
||||||
var sql = "SELECT * FROM translators WHERE translatorID = ? AND type IN ("+this._numericTypes+")";
|
if(typeof(translator) == "object") { // passed an object and not an ID
|
||||||
this.translator = Scholar.DB.rowQuery(sql, [translator]);
|
if(translator.translatorID) {
|
||||||
|
translator = [translator.translatorID];
|
||||||
|
} else {
|
||||||
|
// we have an associative array of translators
|
||||||
|
if(this.type != "search") {
|
||||||
|
throw("cannot set translator: a single translator must be specified when doing "+this.type+" translation");
|
||||||
|
}
|
||||||
|
// accept a list of objects
|
||||||
|
for(var i in translator) {
|
||||||
|
if(typeof(translator[i]) == "object") {
|
||||||
|
if(translator[i].translatorID) {
|
||||||
|
translator[i] = translator[i].translatorID;
|
||||||
|
} else {
|
||||||
|
throw("cannot set translator: must specify a single translator or a list of translators");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
translator = [translator];
|
||||||
|
}
|
||||||
|
|
||||||
|
var where = "";
|
||||||
|
for(var i in translator) {
|
||||||
|
where += " OR translatorID = ?";
|
||||||
|
}
|
||||||
|
where = where.substr(4);
|
||||||
|
|
||||||
|
var sql = "SELECT * FROM translators WHERE "+where+" AND type IN ("+this._numericTypes+")";
|
||||||
|
this.translator = Scholar.DB.query(sql, translator);
|
||||||
if(!this.translator) {
|
if(!this.translator) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -145,13 +207,13 @@ Scholar.Translate.prototype.setTranslator = function(translator) {
|
||||||
* returns: N/A
|
* returns: N/A
|
||||||
*
|
*
|
||||||
* itemDone
|
* itemDone
|
||||||
* valid: import, web
|
* valid: import, web, search
|
||||||
* called: when an item has been processed; may be called asynchronously
|
* called: when an item has been processed; may be called asynchronously
|
||||||
* passed: an item object (see Scholar.Item)
|
* passed: an item object (see Scholar.Item)
|
||||||
* returns: N/A
|
* returns: N/A
|
||||||
*
|
*
|
||||||
* collectionDone
|
* collectionDone
|
||||||
* valid: import, web
|
* valid: import
|
||||||
* called: when a collection has been processed, after all items have been
|
* called: when a collection has been processed, after all items have been
|
||||||
* added; may be called asynchronously
|
* added; may be called asynchronously
|
||||||
* passed: a collection object (see Scholar.Collection)
|
* passed: a collection object (see Scholar.Collection)
|
||||||
|
@ -187,7 +249,7 @@ Scholar.Translate.prototype.getTranslators = function() {
|
||||||
var sql = "SELECT translatorID, label, target, detectCode FROM translators WHERE type IN ("+this._numericTypes+") ORDER BY target IS NULL";
|
var sql = "SELECT translatorID, label, target, detectCode FROM translators WHERE type IN ("+this._numericTypes+") ORDER BY target IS NULL";
|
||||||
var translators = Scholar.DB.query(sql);
|
var translators = Scholar.DB.query(sql);
|
||||||
|
|
||||||
if(!this.location) {
|
if(!this.location && !this.item) {
|
||||||
return translators; // no need to see which can translate, because
|
return translators; // no need to see which can translate, because
|
||||||
// we don't have a location yet (for export or
|
// we don't have a location yet (for export or
|
||||||
// import dialog)
|
// import dialog)
|
||||||
|
@ -228,20 +290,21 @@ Scholar.Translate.prototype.displayOptions = function() {
|
||||||
}
|
}
|
||||||
|
|
||||||
Scholar.Translate.prototype._loadTranslator = function() {
|
Scholar.Translate.prototype._loadTranslator = function() {
|
||||||
if(!this._sandbox) {
|
if(!this._sandbox || this.type == "search") {
|
||||||
// create a new sandbox if none exists
|
// create a new sandbox if none exists, or for searching (so that it's
|
||||||
|
// bound to the correct url)
|
||||||
this._generateSandbox();
|
this._generateSandbox();
|
||||||
}
|
}
|
||||||
|
|
||||||
// parse detect code for the translator
|
// parse detect code for the translator
|
||||||
this._parseDetectCode(this.translator);
|
this._parseDetectCode(this.translator[0]);
|
||||||
|
|
||||||
Scholar.debug("parsing code for "+this.translator.label);
|
Scholar.debug("parsing code for "+this.translator[0].label);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Components.utils.evalInSandbox(this.translator.code, this._sandbox);
|
Components.utils.evalInSandbox(this.translator[0].code, this._sandbox);
|
||||||
} catch(e) {
|
} catch(e) {
|
||||||
Scholar.debug(e+' in parsing code for '+this.translator.label);
|
Scholar.debug(e+' in parsing code for '+this.translator[0].label);
|
||||||
this._translationComplete(false);
|
this._translationComplete(false);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -254,17 +317,24 @@ Scholar.Translate.prototype._loadTranslator = function() {
|
||||||
*/
|
*/
|
||||||
Scholar.Translate.prototype.translate = function() {
|
Scholar.Translate.prototype.translate = function() {
|
||||||
this._IDMap = new Array();
|
this._IDMap = new Array();
|
||||||
|
this._complete = false;
|
||||||
|
|
||||||
if(!this.location) {
|
if(!this.translator || !this.translator.length) {
|
||||||
throw("cannot translate: no location specified");
|
throw("cannot translate: no translator specified");
|
||||||
}
|
}
|
||||||
|
|
||||||
this._complete = false;
|
if(!this.location && this.type != "search") {
|
||||||
|
// searches operate differently, because we could have an array of
|
||||||
|
// translators and have to go through each
|
||||||
|
throw("cannot translate: no location specified");
|
||||||
|
}
|
||||||
|
|
||||||
if(!this._loadTranslator()) {
|
if(!this._loadTranslator()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this._sandbox.Scholar.scraperName = this.translator[0].label;
|
||||||
|
|
||||||
var returnValue;
|
var returnValue;
|
||||||
if(this.type == "web") {
|
if(this.type == "web") {
|
||||||
returnValue = this._web();
|
returnValue = this._web();
|
||||||
|
@ -272,7 +342,10 @@ Scholar.Translate.prototype.translate = function() {
|
||||||
returnValue = this._import();
|
returnValue = this._import();
|
||||||
} else if(this.type == "export") {
|
} else if(this.type == "export") {
|
||||||
returnValue = this._export();
|
returnValue = this._export();
|
||||||
|
} else if(this.type == "search") {
|
||||||
|
returnValue = this._search();
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!returnValue) {
|
if(!returnValue) {
|
||||||
// failure
|
// failure
|
||||||
this._translationComplete(false);
|
this._translationComplete(false);
|
||||||
|
@ -285,12 +358,31 @@ Scholar.Translate.prototype.translate = function() {
|
||||||
/*
|
/*
|
||||||
* generates a sandbox for scraping/scraper detection
|
* generates a sandbox for scraping/scraper detection
|
||||||
*/
|
*/
|
||||||
|
Scholar.Translate._searchSandboxRegexp = new RegExp();
|
||||||
|
Scholar.Translate._searchSandboxRegexp.compile("^http://[\\w.]+/");
|
||||||
Scholar.Translate.prototype._generateSandbox = function() {
|
Scholar.Translate.prototype._generateSandbox = function() {
|
||||||
var me = this;
|
var me = this;
|
||||||
|
|
||||||
|
if(this.type == "web" || this.type == "search") {
|
||||||
|
// get sandbox URL
|
||||||
|
var sandboxURL = "";
|
||||||
if(this.type == "web") {
|
if(this.type == "web") {
|
||||||
// use real URL, not proxied version, to create sandbox
|
// use real URL, not proxied version, to create sandbox
|
||||||
this._sandbox = new Components.utils.Sandbox(this.browser.contentDocument.location.href);
|
sandboxURL = this.browser.contentDocument.location.href;
|
||||||
|
} else {
|
||||||
|
// generate sandbox for search by extracting domain from translator
|
||||||
|
// target, if one exists
|
||||||
|
if(this.translator && this.translator[0] && this.translator[0].target) {
|
||||||
|
// so that web translators work too
|
||||||
|
var tempURL = this.translator[0].target.replace(/\\/g, "").replace(/\^/g, "");
|
||||||
|
var m = Scholar.Translate._searchSandboxRegexp.exec(tempURL);
|
||||||
|
if(m) {
|
||||||
|
sandboxURL = m[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Scholar.debug("binding sandbox to "+sandboxURL);
|
||||||
|
this._sandbox = new Components.utils.Sandbox(sandboxURL);
|
||||||
this._sandbox.Scholar = new Object();
|
this._sandbox.Scholar = new Object();
|
||||||
|
|
||||||
// add ingester utilities
|
// add ingester utilities
|
||||||
|
@ -300,27 +392,30 @@ Scholar.Translate.prototype._generateSandbox = function() {
|
||||||
// set up selectItems handler
|
// set up selectItems handler
|
||||||
this._sandbox.Scholar.selectItems = function(options) { return me._selectItems(options) };
|
this._sandbox.Scholar.selectItems = function(options) { return me._selectItems(options) };
|
||||||
} else {
|
} else {
|
||||||
// use null URL to create sanbox
|
// use null URL to create sandbox
|
||||||
this._sandbox = new Components.utils.Sandbox("");
|
this._sandbox = new Components.utils.Sandbox("");
|
||||||
this._sandbox.Scholar = new Object();
|
this._sandbox.Scholar = new Object();
|
||||||
|
|
||||||
this._sandbox.Scholar.Utilities = new Scholar.Utilities();
|
this._sandbox.Scholar.Utilities = new Scholar.Utilities();
|
||||||
}
|
}
|
||||||
|
|
||||||
if(this.type == "web" || this.type == "import") {
|
|
||||||
|
if(this.type == "export") {
|
||||||
|
// add routines to retrieve items and collections
|
||||||
|
this._sandbox.Scholar.nextItem = function() { return me._exportGetItem() };
|
||||||
|
this._sandbox.Scholar.nextCollection = function() { return me._exportGetCollection() }
|
||||||
|
} else {
|
||||||
// add routines to add new items
|
// add routines to add new items
|
||||||
this._sandbox.Scholar.Item = Scholar.Translate.ScholarItem;
|
this._sandbox.Scholar.Item = Scholar.Translate.ScholarItem;
|
||||||
// attach the function to be run when an item is done
|
// attach the function to be run when an item is done
|
||||||
this._sandbox.Scholar.Item.prototype.complete = function() {me._itemDone(this)};
|
this._sandbox.Scholar.Item.prototype.complete = function() {me._itemDone(this)};
|
||||||
|
|
||||||
|
if(this.type == "import") {
|
||||||
// add routines to add new collections
|
// add routines to add new collections
|
||||||
this._sandbox.Scholar.Collection = Scholar.Translate.ScholarCollection;
|
this._sandbox.Scholar.Collection = Scholar.Translate.ScholarCollection;
|
||||||
// attach the function to be run when a collection is done
|
// attach the function to be run when a collection is done
|
||||||
this._sandbox.Scholar.Collection.prototype.complete = function() {me._collectionDone(this)};
|
this._sandbox.Scholar.Collection.prototype.complete = function() {me._collectionDone(this)};
|
||||||
} else if(this.type == "export") {
|
}
|
||||||
// add routines to retrieve items and collections
|
|
||||||
this._sandbox.Scholar.nextItem = function() { return me._exportGetItem() };
|
|
||||||
this._sandbox.Scholar.nextCollection = function() { return me._exportGetCollection() };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
this._sandbox.XPathResult = Components.interfaces.nsIDOMXPathResult;
|
this._sandbox.XPathResult = Components.interfaces.nsIDOMXPathResult;
|
||||||
|
@ -334,9 +429,9 @@ Scholar.Translate.prototype._generateSandbox = function() {
|
||||||
this._sandbox.Scholar.addOption = function(option, value) {me._addOption(option, value) };
|
this._sandbox.Scholar.addOption = function(option, value) {me._addOption(option, value) };
|
||||||
|
|
||||||
// for loading other translators and accessing their methods
|
// for loading other translators and accessing their methods
|
||||||
var me = this;
|
|
||||||
this._sandbox.Scholar.loadTranslator = function(type, translatorID) {
|
this._sandbox.Scholar.loadTranslator = function(type, translatorID) {
|
||||||
var translation = new Scholar.Translate(type);
|
var translation = new Scholar.Translate(type, (translatorID ? true : false));
|
||||||
|
if(translatorID) {
|
||||||
// assign same handlers as for parent, because the done handler won't
|
// assign same handlers as for parent, because the done handler won't
|
||||||
// get called anyway, and the itemDone/selectItems handlers should be
|
// get called anyway, and the itemDone/selectItems handlers should be
|
||||||
// the same
|
// the same
|
||||||
|
@ -348,6 +443,24 @@ Scholar.Translate.prototype._generateSandbox = function() {
|
||||||
// use internal io
|
// use internal io
|
||||||
translation._initializeInternalIO();
|
translation._initializeInternalIO();
|
||||||
return translation._sandbox;
|
return translation._sandbox;
|
||||||
|
} else {
|
||||||
|
// create a safe translator object, so that scrapers can't get
|
||||||
|
// access to potentially harmful methods.
|
||||||
|
if(type == "import" || type == "export") {
|
||||||
|
throw("you must specify a translatorID for "+type+" translation");
|
||||||
|
}
|
||||||
|
|
||||||
|
var safeTranslator = new Object();
|
||||||
|
safeTranslator.setItem = function(arg) { return translation.setItem(arg) };
|
||||||
|
safeTranslator.setBrowser = function(arg) { return translation.setBrowser(arg) };
|
||||||
|
safeTranslator.setHandler = function(arg1, arg2) { translation.setHandler(arg1, arg2) };
|
||||||
|
safeTranslator.setTranslator = function(arg) { return translation.setTranslator(arg) };
|
||||||
|
safeTranslator.getTranslators = function() { return translation.getTranslators() };
|
||||||
|
safeTranslator.translate = function() { return translation.translate() };
|
||||||
|
translation._parentTranslator = me;
|
||||||
|
|
||||||
|
return safeTranslator;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -355,12 +468,11 @@ Scholar.Translate.prototype._generateSandbox = function() {
|
||||||
* Check to see if _scraper_ can scrape this document
|
* Check to see if _scraper_ can scrape this document
|
||||||
*/
|
*/
|
||||||
Scholar.Translate.prototype._canTranslate = function(translator) {
|
Scholar.Translate.prototype._canTranslate = function(translator) {
|
||||||
var canTranslate = false;
|
|
||||||
|
|
||||||
// Test location with regular expression
|
// Test location with regular expression
|
||||||
// If this is slow, we could preload all scrapers and compile regular
|
// If this is slow, we could preload all scrapers and compile regular
|
||||||
// expressions, so each check will be faster
|
// expressions, so each check will be faster
|
||||||
if(translator.target) {
|
if(translator.target && this.type != "search") {
|
||||||
|
var canTranslate = false;
|
||||||
if(this.type == "web") {
|
if(this.type == "web") {
|
||||||
var regularExpression = new RegExp(translator.target, "i");
|
var regularExpression = new RegExp(translator.target, "i");
|
||||||
} else {
|
} else {
|
||||||
|
@ -370,6 +482,8 @@ Scholar.Translate.prototype._canTranslate = function(translator) {
|
||||||
if(regularExpression.test(this.path)) {
|
if(regularExpression.test(this.path)) {
|
||||||
canTranslate = true;
|
canTranslate = true;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
var canTranslate = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test with JavaScript if available and didn't have a regular expression or
|
// Test with JavaScript if available and didn't have a regular expression or
|
||||||
|
@ -388,14 +502,21 @@ Scholar.Translate.prototype._canTranslate = function(translator) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(this._sandbox.detect) {
|
if((this.type == "web" && this._sandbox.detectWeb) ||
|
||||||
|
(this.type == "search" && this._sandbox.detectSearch) ||
|
||||||
|
(this.type == "import" && this._sandbox.detectImport) ||
|
||||||
|
(this.type == "export" && this._sandbox.detectExport)) {
|
||||||
var returnValue;
|
var returnValue;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if(this.type == "web") {
|
if(this.type == "web") {
|
||||||
returnValue = this._sandbox.detect(this.browser.contentDocument, this.location);
|
returnValue = this._sandbox.detectWeb(this.browser.contentDocument, this.location);
|
||||||
|
} else if(this.type == "search") {
|
||||||
|
returnValue = this._sandbox.detectSearch(this.item);
|
||||||
} else if(this.type == "import") {
|
} else if(this.type == "import") {
|
||||||
returnValue = this._sandbox.detect();
|
returnValue = this._sandbox.detectImport();
|
||||||
|
} else if(this.type == "export") {
|
||||||
|
returnValue = this._sandbox.detectExport();
|
||||||
}
|
}
|
||||||
} catch(e) {
|
} catch(e) {
|
||||||
Scholar.debug(e+' in executing detectCode for '+translator.label);
|
Scholar.debug(e+' in executing detectCode for '+translator.label);
|
||||||
|
@ -476,7 +597,7 @@ Scholar.Translate.prototype._addOption = function(option, value) {
|
||||||
* called as wait() in translator code
|
* called as wait() in translator code
|
||||||
*/
|
*/
|
||||||
Scholar.Translate.prototype._enableAsynchronous = function() {
|
Scholar.Translate.prototype._enableAsynchronous = function() {
|
||||||
me = this;
|
var me = this;
|
||||||
this._waitForCompletion = true;
|
this._waitForCompletion = true;
|
||||||
this._sandbox.Scholar.done = function() { me._translationComplete(true) };
|
this._sandbox.Scholar.done = function() { me._translationComplete(true) };
|
||||||
}
|
}
|
||||||
|
@ -505,6 +626,12 @@ Scholar.Translate.prototype._translationComplete = function(returnValue) {
|
||||||
if(!this._complete) {
|
if(!this._complete) {
|
||||||
this._complete = true;
|
this._complete = true;
|
||||||
|
|
||||||
|
if(this.type == "search" && !this._itemsFound && this.translator.length > 1) {
|
||||||
|
// if we're performing a search and didn't get any results, go on
|
||||||
|
// to the next translator
|
||||||
|
this.translator.shift();
|
||||||
|
this.translate();
|
||||||
|
} else {
|
||||||
Scholar.debug("translation complete");
|
Scholar.debug("translation complete");
|
||||||
|
|
||||||
// call handler
|
// call handler
|
||||||
|
@ -513,6 +640,7 @@ Scholar.Translate.prototype._translationComplete = function(returnValue) {
|
||||||
// close open streams
|
// close open streams
|
||||||
this._closeStreams();
|
this._closeStreams();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -547,13 +675,23 @@ Scholar.Translate.prototype._closeStreams = function() {
|
||||||
*/
|
*/
|
||||||
Scholar.Translate.prototype._itemDone = function(item) {
|
Scholar.Translate.prototype._itemDone = function(item) {
|
||||||
Scholar.debug(item);
|
Scholar.debug(item);
|
||||||
|
if(!this.saveItem) { // if we're not supposed to save the item, just
|
||||||
|
// return the item array
|
||||||
|
|
||||||
|
// if a parent sandbox exists, use complete() function from that sandbox
|
||||||
|
if(this._parentTranslator) {
|
||||||
|
var pt = this._parentTranslator;
|
||||||
|
item.complete = function() { pt._itemDone(this) };
|
||||||
|
Scholar.debug("done from parent sandbox");
|
||||||
|
}
|
||||||
|
this._runHandler("itemDone", item);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Get typeID, defaulting to "website"
|
// Get typeID, defaulting to "website"
|
||||||
var type = (item.itemType ? item.itemType : "website");
|
var type = (item.itemType ? item.itemType : "website");
|
||||||
|
|
||||||
Scholar.debug("type is "+type);
|
|
||||||
if(type == "note") { // handle notes differently
|
if(type == "note") { // handle notes differently
|
||||||
Scholar.debug("handling a note");
|
|
||||||
var myID = Scholar.Notes.add(item.note);
|
var myID = Scholar.Notes.add(item.note);
|
||||||
// re-retrieve the item
|
// re-retrieve the item
|
||||||
var newItem = Scholar.Items.get(myID);
|
var newItem = Scholar.Items.get(myID);
|
||||||
|
@ -718,7 +856,11 @@ Scholar.Translate.prototype._runHandler = function(type, argument) {
|
||||||
for(var i in this._handlers[type]) {
|
for(var i in this._handlers[type]) {
|
||||||
Scholar.debug("running handler "+i+" for "+type);
|
Scholar.debug("running handler "+i+" for "+type);
|
||||||
try {
|
try {
|
||||||
|
if(this._parentTranslator) {
|
||||||
|
returnValue = this._handlers[type][i](null, argument);
|
||||||
|
} else {
|
||||||
returnValue = this._handlers[type][i](this, argument);
|
returnValue = this._handlers[type][i](this, argument);
|
||||||
|
}
|
||||||
} catch(e) {
|
} catch(e) {
|
||||||
Scholar.debug(e+' in handler '+i+' for '+type);
|
Scholar.debug(e+' in handler '+i+' for '+type);
|
||||||
}
|
}
|
||||||
|
@ -734,7 +876,21 @@ Scholar.Translate.prototype._web = function() {
|
||||||
try {
|
try {
|
||||||
this._sandbox.doWeb(this.browser.contentDocument, this.location);
|
this._sandbox.doWeb(this.browser.contentDocument, this.location);
|
||||||
} catch(e) {
|
} catch(e) {
|
||||||
Scholar.debug(e+' in executing code for '+this.translator.label);
|
Scholar.debug(e+' in executing code for '+this.translator[0].label);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* does the actual search translation
|
||||||
|
*/
|
||||||
|
Scholar.Translate.prototype._search = function() {
|
||||||
|
try {
|
||||||
|
this._sandbox.doSearch(this.item);
|
||||||
|
} catch(e) {
|
||||||
|
Scholar.debug(e+' in executing code for '+this.translator[0].label);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -750,7 +906,7 @@ Scholar.Translate.prototype._import = function() {
|
||||||
try {
|
try {
|
||||||
this._sandbox.doImport();
|
this._sandbox.doImport();
|
||||||
} catch(e) {
|
} catch(e) {
|
||||||
Scholar.debug(e+' in executing code for '+this.translator.label);
|
Scholar.debug(e+' in executing code for '+this.translator[0].label);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -830,7 +986,7 @@ Scholar.Translate.prototype._export = function() {
|
||||||
try {
|
try {
|
||||||
this._sandbox.doExport();
|
this._sandbox.doExport();
|
||||||
} catch(e) {
|
} catch(e) {
|
||||||
Scholar.debug(e+' in executing code for '+this.translator.label);
|
Scholar.debug(e+' in executing code for '+this.translator[0].label);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -321,8 +321,8 @@ Scholar.Utilities.Ingester.prototype.lookupContextObject = function(co, done, er
|
||||||
return Scholar.OpenURL.lookupContextObject(co, done, error);
|
return Scholar.OpenURL.lookupContextObject(co, done, error);
|
||||||
}
|
}
|
||||||
|
|
||||||
Scholar.Utilities.Ingester.prototype.parseContextObject = function(co) {
|
Scholar.Utilities.Ingester.prototype.parseContextObject = function(co, item) {
|
||||||
return Scholar.OpenURL.parseContextObject(co);
|
return Scholar.OpenURL.parseContextObject(co, item);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
355
scrapers.sql
355
scrapers.sql
|
@ -4,7 +4,7 @@
|
||||||
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-07 01:09:00'));
|
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-07 01:09:00'));
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 4, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)',
|
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 4, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
|
var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
|
||||||
if(searchRe.test(doc.location.href)) {
|
if(searchRe.test(doc.location.href)) {
|
||||||
return "multiple";
|
return "multiple";
|
||||||
|
@ -123,7 +123,7 @@ function doWeb(doc, url) {
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006-06-26 16:01:00', 4, 'WorldCat Scraper', 'Simon Kornblith', '^http://(?:new)?firstsearch\.oclc\.org/WebZ/',
|
REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006-06-26 16:01:00', 4, 'WorldCat Scraper', 'Simon Kornblith', '^http://(?:new)?firstsearch\.oclc\.org/WebZ/',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
if(doc.title == ''FirstSearch: WorldCat Detailed Record'') {
|
if(doc.title == ''FirstSearch: WorldCat Detailed Record'') {
|
||||||
return "book";
|
return "book";
|
||||||
} else if(doc.title == ''FirstSearch: WorldCat List of Records'') {
|
} else if(doc.title == ''FirstSearch: WorldCat List of Records'') {
|
||||||
|
@ -288,7 +288,7 @@ REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('88915634-1af6-c134-0171-56fd198235ed', '2006-06-26 21:40:00', 4, 'LOC/Voyager WebVoyage Scraper', 'Simon Kornblith', 'Pwebrecon\.cgi',
|
REPLACE INTO "translators" VALUES ('88915634-1af6-c134-0171-56fd198235ed', '2006-06-26 21:40:00', 4, 'LOC/Voyager WebVoyage Scraper', 'Simon Kornblith', 'Pwebrecon\.cgi',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var export_options = doc.forms.namedItem(''frm'').elements.namedItem(''RD'').options;
|
var export_options = doc.forms.namedItem(''frm'').elements.namedItem(''RD'').options;
|
||||||
for(var i in export_options) {
|
for(var i in export_options) {
|
||||||
if(export_options[i].text == ''Latin1 MARC''
|
if(export_options[i].text == ''Latin1 MARC''
|
||||||
|
@ -415,7 +415,7 @@ REPLACE INTO "translators" VALUES ('88915634-1af6-c134-0171-56fd198235ed', '2006
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('d921155f-0186-1684-615c-ca57682ced9b', '2006-06-26 16:01:00', 4, 'JSTOR Scraper', 'Simon Kornblith', '^http://www\.jstor\.org/(?:view|browse|search/)',
|
REPLACE INTO "translators" VALUES ('d921155f-0186-1684-615c-ca57682ced9b', '2006-06-26 16:01:00', 4, 'JSTOR Scraper', 'Simon Kornblith', '^http://www\.jstor\.org/(?:view|browse|search/)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var namespace = doc.documentElement.namespaceURI;
|
var namespace = doc.documentElement.namespaceURI;
|
||||||
var nsResolver = namespace ? function(prefix) {
|
var nsResolver = namespace ? function(prefix) {
|
||||||
if (prefix == ''x'') return namespace; else return null;
|
if (prefix == ''x'') return namespace; else return null;
|
||||||
|
@ -590,7 +590,7 @@ function doWeb(doc, url) {
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('e85a3134-8c1a-8644-6926-584c8565f23e', '2006-06-26 16:01:00', 4, 'History Cooperative Scraper', 'Simon Kornblith', '^http://www\.historycooperative\.org/(?:journals/.+/.+/.+\.html$|cgi-bin/search.cgi)',
|
REPLACE INTO "translators" VALUES ('e85a3134-8c1a-8644-6926-584c8565f23e', '2006-06-26 16:01:00', 4, 'History Cooperative Scraper', 'Simon Kornblith', '^http://www\.historycooperative\.org/(?:journals/.+/.+/.+\.html$|cgi-bin/search.cgi)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
if(doc.title == "History Cooperative: Search Results") {
|
if(doc.title == "History Cooperative: Search Results") {
|
||||||
return "multiple";
|
return "multiple";
|
||||||
} else {
|
} else {
|
||||||
|
@ -657,7 +657,7 @@ function doWeb(doc, url) {
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-08-06 21:45:00', 4, 'InnoPAC Scraper', 'Simon Kornblith', '^http://[^/]+/(?:search/|record=)',
|
REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-08-06 21:45:00', 4, 'InnoPAC Scraper', 'Simon Kornblith', '^http://[^/]+/(?:search/|record=)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
// First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button
|
// First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button
|
||||||
var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
|
var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
|
||||||
if(matchRegexp.test(doc.location.href)) {
|
if(matchRegexp.test(doc.location.href)) {
|
||||||
|
@ -837,7 +837,7 @@ REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-26 16:01:00', 4, 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
|
REPLACE INTO "translators" VALUES ('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-26 16:01:00', 4, 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var namespace = doc.documentElement.namespaceURI;
|
var namespace = doc.documentElement.namespaceURI;
|
||||||
var nsResolver = namespace ? function(prefix) {
|
var nsResolver = namespace ? function(prefix) {
|
||||||
if (prefix == ''x'') return namespace; else return null;
|
if (prefix == ''x'') return namespace; else return null;
|
||||||
|
@ -964,7 +964,7 @@ function doWeb(doc, url) {
|
||||||
');
|
');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006-06-26 16:01:00', 4, 'ProQuest Scraper', 'Simon Kornblith', '^http://proquest\.umi\.com/pqdweb\?((?:.*\&)?did=.*&Fmt=[0-9]|(?:.*\&)Fmt=[0-9].*&did=|(?:.*\&)searchInterface=)',
|
REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006-06-26 16:01:00', 4, 'ProQuest Scraper', 'Simon Kornblith', '^http://proquest\.umi\.com/pqdweb\?((?:.*\&)?did=.*&Fmt=[0-9]|(?:.*\&)Fmt=[0-9].*&did=|(?:.*\&)searchInterface=)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
if(doc.title == "Results") {
|
if(doc.title == "Results") {
|
||||||
return "magazineArticle";
|
return "magazineArticle";
|
||||||
} else {
|
} else {
|
||||||
|
@ -1147,7 +1147,7 @@ function doWeb(doc, url) {
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('6773a9af-5375-3224-d148-d32793884dec', '2006-06-26 16:01:00', 4, 'InfoTrac Scraper', 'Simon Kornblith', '^http://infotrac-college\.thomsonlearning\.com/itw/infomark/',
|
REPLACE INTO "translators" VALUES ('6773a9af-5375-3224-d148-d32793884dec', '2006-06-26 16:01:00', 4, 'InfoTrac Scraper', 'Simon Kornblith', '^http://infotrac-college\.thomsonlearning\.com/itw/infomark/',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
if(doc.title.substring(0, 8) == "Article ") {
|
if(doc.title.substring(0, 8) == "Article ") {
|
||||||
return "magazineArticle";
|
return "magazineArticle";
|
||||||
} else doc.title.substring(0, 10) == "Citations ") {
|
} else doc.title.substring(0, 10) == "Citations ") {
|
||||||
|
@ -1273,7 +1273,7 @@ function doWeb(doc, url) {
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-26 16:01:00', 4, 'LexisNexis Scraper', 'Simon Kornblith', '^http://web\.lexis-nexis\.com/universe/(?:document|doclist)',
|
REPLACE INTO "translators" VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-26 16:01:00', 4, 'LexisNexis Scraper', 'Simon Kornblith', '^http://web\.lexis-nexis\.com/universe/(?:document|doclist)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var detailRe = new RegExp("^http://[^/]+/universe/document");
|
var detailRe = new RegExp("^http://[^/]+/universe/document");
|
||||||
if(detailRe.test(doc.location.href)) {
|
if(detailRe.test(doc.location.href)) {
|
||||||
return "newspaperArticle";
|
return "newspaperArticle";
|
||||||
|
@ -1377,7 +1377,7 @@ function doWeb(doc, url) {
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-26 16:01:00', 4, 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)',
|
REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-26 16:01:00', 4, 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
|
var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
|
||||||
|
|
||||||
if(singleRe.test(doc.location.href)) {
|
if(singleRe.test(doc.location.href)) {
|
||||||
|
@ -1468,7 +1468,7 @@ REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006-06-26 16:01:00', 4, 'Dynix Scraper', 'Simon Kornblith', 'ipac\.jsp\?.*(?:uri=full=[0-9]|menu=search)',
|
REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006-06-26 16:01:00', 4, 'Dynix Scraper', 'Simon Kornblith', 'ipac\.jsp\?.*(?:uri=full=[0-9]|menu=search)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
|
var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
|
||||||
if(detailsRe.test(doc.location.href)) {
|
if(detailsRe.test(doc.location.href)) {
|
||||||
return "book";
|
return "book";
|
||||||
|
@ -1556,7 +1556,7 @@ REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006-06-26 16:01:00', 4, 'VTLS Scraper', 'Simon Kornblith', '/chameleon(?:\?|$)',
|
REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006-06-26 16:01:00', 4, 'VTLS Scraper', 'Simon Kornblith', '/chameleon(?:\?|$)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var node = Scholar.Utilities.getNode(doc, doc, ''//tr[@class="intrRow"]/td/table/tbody/tr[th]'', null);
|
var node = Scholar.Utilities.getNode(doc, doc, ''//tr[@class="intrRow"]/td/table/tbody/tr[th]'', null);
|
||||||
if(node) {
|
if(node) {
|
||||||
return "multiple";
|
return "multiple";
|
||||||
|
@ -1660,7 +1660,7 @@ REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006-06-26 16:01:00', 4, 'DRA Scraper', 'Simon Kornblith', '/web2/tramp2\.exe/(?:see\_record/|authority\_hits/|goto/.*\?.*screen=Record\.html)',
|
REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006-06-26 16:01:00', 4, 'DRA Scraper', 'Simon Kornblith', '/web2/tramp2\.exe/(?:see\_record/|authority\_hits/|goto/.*\?.*screen=Record\.html)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
if(doc.location.href.indexOf("/authority_hits") > 0) {
|
if(doc.location.href.indexOf("/authority_hits") > 0) {
|
||||||
return "multiple";
|
return "multiple";
|
||||||
} else {
|
} else {
|
||||||
|
@ -1730,7 +1730,7 @@ REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006
|
||||||
|
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-26 16:01:00', 4, 'GEAC Scraper', 'Simon Kornblith', '/(?:GeacQUERY|(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))',
|
REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-26 16:01:00', 4, 'GEAC Scraper', 'Simon Kornblith', '/(?:GeacQUERY|(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
if(doc.location.href.indexOf("/GeacQUERY") > 0) {
|
if(doc.location.href.indexOf("/GeacQUERY") > 0) {
|
||||||
return "multiple";
|
return "multiple";
|
||||||
} else {
|
} else {
|
||||||
|
@ -1818,7 +1818,7 @@ REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006-06-26 16:01:00', 4, 'SIRSI -2003 Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
|
REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006-06-26 16:01:00', 4, 'SIRSI -2003 Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var namespace = doc.documentElement.namespaceURI;
|
var namespace = doc.documentElement.namespaceURI;
|
||||||
var nsResolver = namespace ? function(prefix) {
|
var nsResolver = namespace ? function(prefix) {
|
||||||
if (prefix == ''x'') return namespace; else return null;
|
if (prefix == ''x'') return namespace; else return null;
|
||||||
|
@ -1954,7 +1954,7 @@ REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006-06-26 16:01:00', 4, 'TLC/YouSeeMore Scraper', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?(?:.*LabelDisplay.*RecordNumber=[0-9]|Search|ItemTitles)',
|
REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006-06-26 16:01:00', 4, 'TLC/YouSeeMore Scraper', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?(?:.*LabelDisplay.*RecordNumber=[0-9]|Search|ItemTitles)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
|
var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
|
||||||
if(detailRe.test(doc.location.href)) {
|
if(detailRe.test(doc.location.href)) {
|
||||||
return "book";
|
return "book";
|
||||||
|
@ -2052,7 +2052,7 @@ REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006-06-26 16:01:00', 4, 'Project MUSE Scraper', 'Simon Kornblith', '^http://muse\.jhu\.edu/(?:journals/[^/]+/[^/]+/[^/]+\.html|search/pia.cgi)',
|
REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006-06-26 16:01:00', 4, 'Project MUSE Scraper', 'Simon Kornblith', '^http://muse\.jhu\.edu/(?:journals/[^/]+/[^/]+/[^/]+\.html|search/pia.cgi)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
|
var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
|
||||||
if(searchRe.test(url)) {
|
if(searchRe.test(url)) {
|
||||||
return "multiple";
|
return "multiple";
|
||||||
|
@ -2163,48 +2163,37 @@ REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006
|
||||||
}
|
}
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-06-26 16:01:00', 4, 'PubMed Scraper', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)',
|
REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-06-26 16:01:00', 12, 'PubMed Scraper', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
if(doc.location.href.indexOf("list_uids=") >= 0) {
|
if(doc.location.href.indexOf("list_uids=") >= 0) {
|
||||||
return "journalArticle";
|
return "journalArticle";
|
||||||
} else {
|
} else {
|
||||||
return "multiple";
|
return "multiple";
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getPMID(co) {
|
||||||
|
var coParts = co.split("&");
|
||||||
|
for each(part in coParts) {
|
||||||
|
if(part.substr(0, 7) == "rft_id=") {
|
||||||
|
var value = unescape(part.substr(7));
|
||||||
|
if(value.substr(0, 10) == "info:pmid/") {
|
||||||
|
return value.substr(10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectSearch(item) {
|
||||||
|
if(item.contextObject) {
|
||||||
|
if(getPMID(item.contextObject)) {
|
||||||
|
return "journalArticle";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}',
|
}',
|
||||||
'function doWeb(doc, url) {
|
'function lookupPMIDs(ids) {
|
||||||
var uri = doc.location.href;
|
Scholar.wait();
|
||||||
var ids = new Array();
|
|
||||||
var idRegexp = /[\?\&]list_uids=([0-9\,]+)/;
|
|
||||||
|
|
||||||
var m = idRegexp.exec(uri);
|
|
||||||
if(m) {
|
|
||||||
ids.push(m[1]);
|
|
||||||
} else {
|
|
||||||
var namespace = doc.documentElement.namespaceURI;
|
|
||||||
var nsResolver = namespace ? function(prefix) {
|
|
||||||
if (prefix == ''x'') return namespace; else return null;
|
|
||||||
} : null;
|
|
||||||
|
|
||||||
var items = new Array();
|
|
||||||
var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//div[@class="ResultSet"]/table/tbody'', nsResolver);
|
|
||||||
// Go through table rows
|
|
||||||
for(var i=0; i<tableRows.length; i++) {
|
|
||||||
var link = Scholar.Utilities.getNode(doc, tableRows[i], ''.//a'', nsResolver);
|
|
||||||
var article = Scholar.Utilities.getNode(doc, tableRows[i], ''./tr[2]/td[2]/text()[1]'', nsResolver);
|
|
||||||
items[link.href] = article.nodeValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
items = Scholar.selectItems(items);
|
|
||||||
|
|
||||||
if(!items) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
for(var i in items) {
|
|
||||||
var m = idRegexp.exec(i);
|
|
||||||
ids.push(m[1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=PubMed&retmode=xml&rettype=citation&id="+ids.join(",");
|
var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=PubMed&retmode=xml&rettype=citation&id="+ids.join(",");
|
||||||
Scholar.Utilities.HTTPUtilities.doGet(newUri, null, function(text) {
|
Scholar.Utilities.HTTPUtilities.doGet(newUri, null, function(text) {
|
||||||
|
@ -2283,13 +2272,54 @@ REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006
|
||||||
}
|
}
|
||||||
|
|
||||||
Scholar.done();
|
Scholar.done();
|
||||||
})
|
});
|
||||||
|
}
|
||||||
|
|
||||||
Scholar.wait();
|
function doWeb(doc, url) {
|
||||||
|
var uri = doc.location.href;
|
||||||
|
var ids = new Array();
|
||||||
|
var idRegexp = /[\?\&]list_uids=([0-9\,]+)/;
|
||||||
|
|
||||||
|
var m = idRegexp.exec(uri);
|
||||||
|
if(m) {
|
||||||
|
ids.push(m[1]);
|
||||||
|
} else {
|
||||||
|
var namespace = doc.documentElement.namespaceURI;
|
||||||
|
var nsResolver = namespace ? function(prefix) {
|
||||||
|
if (prefix == ''x'') return namespace; else return null;
|
||||||
|
} : null;
|
||||||
|
|
||||||
|
var items = new Array();
|
||||||
|
var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//div[@class="ResultSet"]/table/tbody'', nsResolver);
|
||||||
|
// Go through table rows
|
||||||
|
for(var i=0; i<tableRows.length; i++) {
|
||||||
|
var link = Scholar.Utilities.getNode(doc, tableRows[i], ''.//a'', nsResolver);
|
||||||
|
var article = Scholar.Utilities.getNode(doc, tableRows[i], ''./tr[2]/td[2]/text()[1]'', nsResolver);
|
||||||
|
items[link.href] = article.nodeValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
items = Scholar.selectItems(items);
|
||||||
|
|
||||||
|
if(!items) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(var i in items) {
|
||||||
|
var m = idRegexp.exec(i);
|
||||||
|
ids.push(m[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lookupPMIDs(ids);
|
||||||
|
}
|
||||||
|
|
||||||
|
function doSearch(item) {
|
||||||
|
// pmid was defined earlier in detectSearch
|
||||||
|
lookupPMIDs([getPMID(item.contextObject)]);
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006-06-26 16:41:00', 4, 'Embedded RDF Scraper', 'Simon Kornblith', NULL,
|
REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006-06-26 16:41:00', 4, 'Embedded RDF Scraper', 'Simon Kornblith', NULL,
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var metaTags = doc.getElementsByTagName("meta");
|
var metaTags = doc.getElementsByTagName("meta");
|
||||||
|
|
||||||
for(var i=0; i<metaTags.length; i++) {
|
for(var i=0; i<metaTags.length; i++) {
|
||||||
|
@ -2333,7 +2363,7 @@ REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-07 01:09:00', 4, 'COinS Scraper', 'Simon Kornblith', NULL,
|
REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-07 01:09:00', 4, 'COinS Scraper', 'Simon Kornblith', NULL,
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var spanTags = doc.getElementsByTagName("span");
|
var spanTags = doc.getElementsByTagName("span");
|
||||||
|
|
||||||
var encounteredType = false;
|
var encounteredType = false;
|
||||||
|
@ -2348,11 +2378,11 @@ REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006
|
||||||
// determine if it''s a valid type
|
// determine if it''s a valid type
|
||||||
var coParts = spanTitle.split("&");
|
var coParts = spanTitle.split("&");
|
||||||
var type = null
|
var type = null
|
||||||
for(var i in coParts) {
|
for(var j in coParts) {
|
||||||
if(coParts[i].substr(0, 12) == "rft_val_fmt=") {
|
if(coParts[j].substr(0, 12) == "rft_val_fmt=") {
|
||||||
var format = unescape(coParts[i].substr(12));
|
var format = unescape(coParts[j].substr(12));
|
||||||
if(format == "info:ofi/fmt:kev:mtx:journal") {
|
if(format == "info:ofi/fmt:kev:mtx:journal") {
|
||||||
var type = "journal";
|
var type = "journalArticle";
|
||||||
} else if(format == "info:ofi/fmt:kev:mtx:book") {
|
} else if(format == "info:ofi/fmt:kev:mtx:book") {
|
||||||
if(Scholar.Utilities.inArray("rft.genre=bookitem", coParts)) {
|
if(Scholar.Utilities.inArray("rft.genre=bookitem", coParts)) {
|
||||||
var type = "bookSection";
|
var type = "bookSection";
|
||||||
|
@ -2384,45 +2414,43 @@ function retrieveNextCOinS(needFullItems, newItems) {
|
||||||
var item = needFullItems.shift();
|
var item = needFullItems.shift();
|
||||||
|
|
||||||
Scholar.Utilities.debugPrint("looking up contextObject");
|
Scholar.Utilities.debugPrint("looking up contextObject");
|
||||||
Scholar.Utilities.lookupContextObject(item.contextObject, function(items) {
|
var search = Scholar.loadTranslator("search");
|
||||||
Scholar.Utilities.debugPrint(items);
|
search.setHandler("itemDone", function(obj, item) {
|
||||||
if(items) {
|
newItems.push(item);
|
||||||
newItems = newItems.concat(items);
|
|
||||||
}
|
|
||||||
retrieveNextCOinS(needFullItems, newItems);
|
|
||||||
}, function() {
|
|
||||||
Scholar.done(false);
|
|
||||||
});
|
});
|
||||||
|
search.setHandler("done", function() {
|
||||||
|
retrieveNextCOinS(needFullItems, newItems);
|
||||||
|
});
|
||||||
|
search.setItem(item);
|
||||||
|
|
||||||
|
// look for translators
|
||||||
|
var translators = search.getTranslators();
|
||||||
|
if(translators) {
|
||||||
|
search.setTranslator(translators);
|
||||||
|
search.translate();
|
||||||
|
} else {
|
||||||
|
retrieveNextCOinS(needFullItems, newItems);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
completeCOinS(newItems);
|
completeCOinS(newItems);
|
||||||
Scholar.done(true);
|
Scholar.done(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// attaches item data to a new Scholar.Item instance (because data returned from
|
|
||||||
// Scholar.OpenURL.processContextObject does not have a complete() method)
|
|
||||||
function addAsItem(itemArray) {
|
|
||||||
var newItem = new Scholar.Item();
|
|
||||||
for(var i in itemArray) {
|
|
||||||
newItem[i] = itemArray[i];
|
|
||||||
}
|
|
||||||
newItem.complete();
|
|
||||||
}
|
|
||||||
|
|
||||||
// saves all COinS objects
|
// saves all COinS objects
|
||||||
function completeCOinS(newItems) {
|
function completeCOinS(newItems) {
|
||||||
if(newItems.length > 1) {
|
if(newItems.length > 1) {
|
||||||
var selectArray = new Array();
|
var selectArray = new Array();
|
||||||
|
|
||||||
for(var i in newItems) {
|
for(var i in newItems) {
|
||||||
selectArray[i] = newItems.title;
|
selectArray[i] = newItems[i].title;
|
||||||
}
|
}
|
||||||
selectArray = Scholar.selectItems(selectArray);
|
selectArray = Scholar.selectItems(selectArray);
|
||||||
for(var i in selectArray) {
|
for(var i in selectArray) {
|
||||||
addAsItem(newItems[i]);
|
newItems[i].complete();
|
||||||
}
|
}
|
||||||
} else if(newItems.length) {
|
} else if(newItems.length) {
|
||||||
addAsItem(newItems[0]);
|
newItems[0].complete();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2438,8 +2466,8 @@ function doWeb(doc, url) {
|
||||||
var spanClasses = spanClass.split(" ");
|
var spanClasses = spanClass.split(" ");
|
||||||
if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
|
if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
|
||||||
var spanTitle = spanTags[i].getAttribute("title");
|
var spanTitle = spanTags[i].getAttribute("title");
|
||||||
var newItem = Scholar.Utilities.parseContextObject(spanTitle);
|
var newItem = new Scholar.Item();
|
||||||
if(newItem) {
|
if(Scholar.Utilities.parseContextObject(spanTitle, newItem)) {
|
||||||
if(newItem.title && newItem.creators.length) {
|
if(newItem.title && newItem.creators.length) {
|
||||||
// title and creators are minimum data to avoid looking up
|
// title and creators are minimum data to avoid looking up
|
||||||
newItems.push(newItem);
|
newItems.push(newItem);
|
||||||
|
@ -2463,7 +2491,7 @@ function doWeb(doc, url) {
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006-06-26 16:01:00', 4, 'Google Books Scraper', 'Simon Kornblith', '^http://books\.google\.com/books\?(.*vid=.*\&id=.*|.*q=.*)',
|
REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006-06-26 16:01:00', 4, 'Google Books Scraper', 'Simon Kornblith', '^http://books\.google\.com/books\?(.*vid=.*\&id=.*|.*q=.*)',
|
||||||
'function detect(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
|
var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
|
||||||
if(re.test(doc.location.href)) {
|
if(re.test(doc.location.href)) {
|
||||||
return "book";
|
return "book";
|
||||||
|
@ -2553,6 +2581,161 @@ REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006
|
||||||
Scholar.wait();
|
Scholar.wait();
|
||||||
}');
|
}');
|
||||||
|
|
||||||
|
REPLACE INTO "translators" VALUES ('e07e9b8c-0e98-4915-bb5a-32a08cb2f365', '2006-08-07 11:36:00', 8, 'Open WorldCat', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
|
||||||
|
'function detectSearch(item) {
|
||||||
|
if(item.itemType == "book" || item.itemType == "bookSection") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}',
|
||||||
|
'// creates an item from an Open WorldCat document
|
||||||
|
function processOWC(doc) {
|
||||||
|
var spanTags = doc.getElementsByTagName("span");
|
||||||
|
for(var i=0; i<spanTags.length; i++) {
|
||||||
|
var spanClass = spanTags[i].getAttribute("class");
|
||||||
|
if(spanClass) {
|
||||||
|
var spanClasses = spanClass.split(" ");
|
||||||
|
if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
|
||||||
|
var spanTitle = spanTags[i].getAttribute("title");
|
||||||
|
var item = new Scholar.Item();
|
||||||
|
if(Scholar.Utilities.parseContextObject(spanTitle, item)) {
|
||||||
|
item.complete();
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
function doSearch(item) {
|
||||||
|
if(item.contextObject) {
|
||||||
|
var co = item.contextObject;
|
||||||
|
} else {
|
||||||
|
var co = Scholar.Utilities.createContextObject(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
Scholar.Utilities.processDocuments(null, ["http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co], function(browser) {
|
||||||
|
var doc = browser.contentDocument;
|
||||||
|
// find new COinS in the Open WorldCat page
|
||||||
|
if(processOWC(doc)) { // we got a single item page
|
||||||
|
Scholar.done();
|
||||||
|
} else { // assume we have a search results page
|
||||||
|
var items = new Array();
|
||||||
|
|
||||||
|
var namespace = doc.documentElement.namespaceURI;
|
||||||
|
var nsResolver = namespace ? function(prefix) {
|
||||||
|
if (prefix == ''x'') return namespace; else return null;
|
||||||
|
} : null;
|
||||||
|
|
||||||
|
// first try to get only books
|
||||||
|
var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
|
||||||
|
var elmt = elmts.iterateNext();
|
||||||
|
if(!elmt) { // if that fails, look for other options
|
||||||
|
var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
|
||||||
|
elmt = elmts.iterateNext()
|
||||||
|
}
|
||||||
|
|
||||||
|
var urlsToProcess = new Array();
|
||||||
|
do {
|
||||||
|
urlsToProcess.push(elmt.href);
|
||||||
|
} while(elmt = elmts.iterateNext());
|
||||||
|
|
||||||
|
Scholar.Utilities.processDocuments(null, urlsToProcess, function(browser) {
|
||||||
|
// per URL
|
||||||
|
processOWC(browser.contentDocument);
|
||||||
|
}, function() { // done
|
||||||
|
Scholar.done();
|
||||||
|
}, function() { // error
|
||||||
|
Scholar.done(false);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}, null, function() {
|
||||||
|
error();
|
||||||
|
});
|
||||||
|
|
||||||
|
Scholar.wait();
|
||||||
|
}');
|
||||||
|
|
||||||
|
REPLACE INTO "translators" VALUES ('11645bd1-0420-45c1-badb-53fb41eeb753', '2006-08-07 18:17:00', 8, 'CrossRef', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
|
||||||
|
'function detectSearch(item) {
|
||||||
|
if(item.itemType == "journal") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}',
|
||||||
|
'function processCrossRef(xmlOutput) {
|
||||||
|
xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
|
||||||
|
|
||||||
|
// parse XML with E4X
|
||||||
|
var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
|
||||||
|
try {
|
||||||
|
var xml = new XML(xmlOutput);
|
||||||
|
} catch(e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ensure status is valid
|
||||||
|
var status = xml.qr::query_result.qr::body.qr::query.@status.toString();
|
||||||
|
if(status != "resolved" && status != "multiresolved") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
var query = xml.qr::query_result.qr::body.qr::query;
|
||||||
|
var item = new Scholar.Item("journalArticle");
|
||||||
|
|
||||||
|
// try to get a DOI
|
||||||
|
item.DOI = query.qr::doi.(@type=="journal_article").text().toString();
|
||||||
|
if(!item.DOI) {
|
||||||
|
item.DOI = query.qr::doi.(@type=="book_title").text().toString();
|
||||||
|
}
|
||||||
|
if(!item.DOI) {
|
||||||
|
item.DOI = query.qr::doi.(@type=="book_content").text().toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
// try to get an ISSN (no print/electronic preferences)
|
||||||
|
item.ISSN = query.qr::issn[0].text().toString();
|
||||||
|
// get title
|
||||||
|
item.title = query.qr::article_title.text().toString();
|
||||||
|
// get publicationTitle
|
||||||
|
item.publicationTitle = query.qr::journal_title.text().toString();
|
||||||
|
// get author
|
||||||
|
item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.text().toString(), "author", true));
|
||||||
|
// get volume
|
||||||
|
item.volume = query.qr::volume.text().toString();
|
||||||
|
// get issue
|
||||||
|
item.issue = query.qr::issue.text().toString();
|
||||||
|
// get year
|
||||||
|
item.date = query.qr::year.text().toString();
|
||||||
|
// get edition
|
||||||
|
item.edition = query.qr::edition_number.text().toString();
|
||||||
|
// get first page
|
||||||
|
item.pages = query.qr::first_page.text().toString();
|
||||||
|
item.complete();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function doSearch(item) {
|
||||||
|
if(item.contextObject) {
|
||||||
|
var co = item.contextObject;
|
||||||
|
if(co.indexOf("url_ver=") == -1) {
|
||||||
|
co = "url_ver=Z39.88-2004"+co;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
var co = Scholar.Utilities.createContextObject(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
Scholar.Utilities.HTTPUtilities.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", null, function(responseText) {
|
||||||
|
processCrossRef(responseText);
|
||||||
|
Scholar.done();
|
||||||
|
});
|
||||||
|
|
||||||
|
Scholar.wait();
|
||||||
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 3, 'MODS (XML)', 'Simon Kornblith', 'xml',
|
REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 3, 'MODS (XML)', 'Simon Kornblith', 'xml',
|
||||||
'Scholar.addOption("exportNotes", true);
|
'Scholar.addOption("exportNotes", true);
|
||||||
Scholar.addOption("exportFileData", true);',
|
Scholar.addOption("exportFileData", true);',
|
||||||
|
|
Loading…
Reference in New Issue
Block a user