closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet) add links to permanent URLs download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled) fix WorldCat translator improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured) tag items from SIRSI and WorldCat return to putting the full lengths of books into "pages," because some citation styles require it fix COinS (broken a few revisions ago)
This commit is contained in:
parent
410e090ecd
commit
10ba568ee8
|
@ -12,8 +12,6 @@ var Scholar_File_Interface = new function() {
|
|||
* Creates Scholar.Translate instance and shows file picker for file export
|
||||
*/
|
||||
function exportFile(items) {
|
||||
Scholar.debug(items);
|
||||
|
||||
var translation = new Scholar.Translate("export");
|
||||
var translators = translation.getTranslators();
|
||||
|
||||
|
|
|
@ -97,14 +97,12 @@ Scholar_Ingester_Interface.contentLoad = function(event) {
|
|||
var rootDoc = doc;
|
||||
|
||||
// get the appropriate root document to check which browser we're on
|
||||
Scholar.debug("getting root document");
|
||||
while(rootDoc.defaultView.frameElement) {
|
||||
rootDoc = rootDoc.defaultView.frameElement.ownerDocument;
|
||||
}
|
||||
|
||||
// Figure out what browser this contentDocument is associated with
|
||||
var browser;
|
||||
Scholar.debug("getting browser");
|
||||
for(var i=0; i<Scholar_Ingester_Interface.tabBrowser.browsers.length; i++) {
|
||||
if(rootDoc == Scholar_Ingester_Interface.tabBrowser.browsers[i].contentDocument) {
|
||||
browser = Scholar_Ingester_Interface.tabBrowser.browsers[i];
|
||||
|
@ -115,7 +113,6 @@ Scholar_Ingester_Interface.contentLoad = function(event) {
|
|||
return;
|
||||
}
|
||||
|
||||
Scholar.debug("getting data");
|
||||
// get data object
|
||||
var data = Scholar_Ingester_Interface._getData(browser);
|
||||
|
||||
|
@ -125,13 +122,14 @@ Scholar_Ingester_Interface.contentLoad = function(event) {
|
|||
return;
|
||||
}
|
||||
|
||||
Scholar.debug("translating");
|
||||
// get translators
|
||||
var translate = new Scholar.Translate("web");
|
||||
translate.setDocument(doc);
|
||||
data.translators = translate.getTranslators();
|
||||
// update status
|
||||
Scholar_Ingester_Interface._updateStatus(data);
|
||||
if(Scholar_Ingester_Interface.tabBrowser.selectedBrowser == browser) {
|
||||
Scholar_Ingester_Interface._updateStatus(data);
|
||||
}
|
||||
// add document
|
||||
if(data.translators && data.translators.length) {
|
||||
data.document = doc;
|
||||
|
@ -412,6 +410,7 @@ Scholar_Ingester_Interface.Progress = new function() {
|
|||
|
||||
function kill() {
|
||||
_windowLoaded = false;
|
||||
_windowLoading = false;
|
||||
try {
|
||||
_progressWindow.close();
|
||||
} catch(ex) {}
|
||||
|
|
|
@ -71,6 +71,8 @@
|
|||
*
|
||||
* _locationIsProxied - whether the URL being scraped is going through
|
||||
* an EZProxy
|
||||
* _downloadAssociatedFiles - whether to download content, according to
|
||||
* preferences
|
||||
*/
|
||||
|
||||
Scholar.Translate = function(type, saveItem) {
|
||||
|
@ -166,7 +168,6 @@ Scholar.Translate.prototype.setString = function(string) {
|
|||
this.string = string;
|
||||
this._createStorageStream();
|
||||
|
||||
Scholar.debug(string);
|
||||
this._storageStreamLength = string.length;
|
||||
|
||||
// write string
|
||||
|
@ -497,6 +498,8 @@ Scholar.Translate.prototype._generateSandbox = function() {
|
|||
// for loading other translators and accessing their methods
|
||||
this._sandbox.Scholar.loadTranslator = function(type, translatorID) {
|
||||
var translation = new Scholar.Translate(type, (translatorID ? true : false));
|
||||
translation._parentTranslator = me;
|
||||
|
||||
if(translatorID) {
|
||||
// assign same handlers as for parent, because the done handler won't
|
||||
// get called anyway, and the itemDone/selectItems handlers should be
|
||||
|
@ -521,7 +524,7 @@ Scholar.Translate.prototype._generateSandbox = function() {
|
|||
}
|
||||
|
||||
var safeTranslator = new Object();
|
||||
safeTranslator.setItem = function(arg) { return translation.setItem(arg) };
|
||||
safeTranslator.setSearch = function(arg) { return translation.setSearch(arg) };
|
||||
safeTranslator.setBrowser = function(arg) { return translation.setBrowser(arg) };
|
||||
safeTranslator.setHandler = function(arg1, arg2) { translation.setHandler(arg1, arg2) };
|
||||
safeTranslator.setString = function(arg) { translation.setString(arg) };
|
||||
|
@ -797,7 +800,6 @@ Scholar.Translate.prototype._closeStreams = function() {
|
|||
* executed when an item is done and ready to be loaded into the database
|
||||
*/
|
||||
Scholar.Translate.prototype._itemDone = function(item) {
|
||||
Scholar.debug(item);
|
||||
if(!this.saveItem) { // if we're not supposed to save the item, just
|
||||
// return the item array
|
||||
|
||||
|
@ -809,6 +811,14 @@ Scholar.Translate.prototype._itemDone = function(item) {
|
|||
}
|
||||
this._runHandler("itemDone", item);
|
||||
return;
|
||||
} else if(this._parentTranslator) {
|
||||
// run done on parent
|
||||
this._parentTranslator._itemDone(item);
|
||||
return;
|
||||
}
|
||||
|
||||
if(!item.title) {
|
||||
throw("item has no title");
|
||||
}
|
||||
|
||||
var notifierStatus = Scholar.Notifier.isEnabled();
|
||||
|
@ -897,6 +907,48 @@ Scholar.Translate.prototype._itemDone = function(item) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handle attachments
|
||||
if(item.attachments) {
|
||||
for each(var attachment in item.attachments) {
|
||||
if(!attachment.url && (this.type != "web" || !attachment.document)) {
|
||||
Scholar.debug("not adding attachment: no URL specified");
|
||||
} else if(this.type == "web") {
|
||||
if(attachment.downloadable && this._downloadAssociatedFiles) {
|
||||
if(attachment.document) {
|
||||
var attachmentID = Scholar.Attachments.importFromDocument(attachment.document, myID);
|
||||
|
||||
// change title, if a different one was specified
|
||||
if(attachment.title && (!attachment.document.title
|
||||
|| attachment.title != attachment.document.title)) {
|
||||
var attachmentItem = Scholar.Items.get(attachmentID);
|
||||
attachmentItem.setField("title", attachment.title);
|
||||
}
|
||||
} else {
|
||||
Scholar.Attachments.importFromURL(attachment.url, myID,
|
||||
(attachment.mimeType ? attachment.mimeType : undefined),
|
||||
(attachment.title ? attachment.title : undefined));
|
||||
}
|
||||
} else {
|
||||
if(attachment.document) {
|
||||
Scholar.Attachments.linkFromURL(attachment.document.location.href, myID,
|
||||
(attachment.mimeType ? attachment.mimeType : attachment.document.contentType),
|
||||
(attachment.title ? attachment.title : attachment.document.title));
|
||||
} else {
|
||||
if(!attachment.mimeType || attachment.title) {
|
||||
Scholar.debug("notice: either mimeType or title is missing; attaching file will be slower");
|
||||
}
|
||||
|
||||
Scholar.Attachments.linkFromURL(attachment.url, myID,
|
||||
(attachment.mimeType ? attachment.mimeType : undefined),
|
||||
(attachment.title ? attachment.title : undefined));
|
||||
}
|
||||
}
|
||||
} else if(this.type == "import") {
|
||||
// TODO
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(item.itemID) {
|
||||
|
@ -926,7 +978,6 @@ Scholar.Translate.prototype._itemDone = function(item) {
|
|||
* executed when a collection is done and ready to be loaded into the database
|
||||
*/
|
||||
Scholar.Translate.prototype._collectionDone = function(collection) {
|
||||
Scholar.debug(collection);
|
||||
var newCollection = this._processCollection(collection, null);
|
||||
|
||||
this._runHandler("collectionDone", newCollection);
|
||||
|
@ -985,6 +1036,8 @@ Scholar.Translate.prototype._runHandler = function(type, argument) {
|
|||
* does the actual web translation
|
||||
*/
|
||||
Scholar.Translate.prototype._web = function() {
|
||||
this._downloadAssociatedFiles = Scholar.Prefs.get("downloadAssociatedFiles");
|
||||
|
||||
try {
|
||||
this._sandbox.doWeb(this.document, this.location);
|
||||
} catch(e) {
|
||||
|
@ -1049,7 +1102,7 @@ Scholar.Translate.prototype._importConfigureIO = function() {
|
|||
|
||||
// get URI and parse
|
||||
var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null);
|
||||
parser.parseString(dataSource, baseURI, str);
|
||||
parser.parseString(this._rdf.dataSource, baseURI, str);
|
||||
|
||||
// make an instance of the RDF handler
|
||||
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(this._rdf.dataSource);
|
||||
|
@ -1182,7 +1235,6 @@ Scholar.Translate.prototype._exportConfigureIO = function() {
|
|||
Scholar.Translate.prototype._exportGetItem = function() {
|
||||
if(this._itemsLeft.length != 0) {
|
||||
var returnItem = this._itemsLeft.shift();
|
||||
Scholar.debug("getting info on "+returnItem.getID());
|
||||
this._runHandler("itemDone", returnItem);
|
||||
return returnItem.toArray();
|
||||
}
|
||||
|
@ -1328,6 +1380,8 @@ Scholar.Translate.ScholarItem = function(itemType) {
|
|||
this.tags = new Array();
|
||||
// generate see also array
|
||||
this.seeAlso = new Array();
|
||||
// generate file array
|
||||
this.attachments = new Array();
|
||||
}
|
||||
|
||||
/* Scholar.Translate.Collection: a class for generating a new top-level
|
||||
|
|
|
@ -71,7 +71,7 @@ Scholar.Utilities.prototype.cleanAuthor = function(author, type, useComma) {
|
|||
* Cleans whitespace off a string and replaces multiple spaces with one
|
||||
*/
|
||||
Scholar.Utilities.prototype.cleanString = function(s) {
|
||||
s = s.replace(/[ \xA0]+/g, " ");
|
||||
s = s.replace(/[ \xA0\r\n]+/g, " ");
|
||||
s = s.replace(/^\s+/, "");
|
||||
return s.replace(/\s+$/, "");
|
||||
}
|
||||
|
|
|
@ -5,4 +5,5 @@ pref("extensions.scholar.automaticScraperUpdates",true);
|
|||
pref("extensions.scholar.scholarPaneOnTop",false);
|
||||
pref("extensions.scholar.openURL.resolver","http://athene.gmu.edu:8888/lfp/LinkFinderPlus/Display");
|
||||
pref("extensions.scholar.openURL.version","0.1");
|
||||
pref("extensions.scholar.parseEndNoteMIMETypes",true);
|
||||
pref("extensions.scholar.parseEndNoteMIMETypes",true);
|
||||
pref("extensions.scholar.downloadAssociatedFiles",false);
|
1010
scrapers.sql
1010
scrapers.sql
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user