diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.js b/chrome/chromeFiles/content/scholar/ingester/browser.js index d06fae166..6ae2839ed 100644 --- a/chrome/chromeFiles/content/scholar/ingester/browser.js +++ b/chrome/chromeFiles/content/scholar/ingester/browser.js @@ -27,6 +27,7 @@ Scholar_Ingester_Interface.init = function() { Scholar_Ingester_Interface.browserData = new Object(); Scholar_Ingester_Interface._scrapePopupShowing = false; Scholar.Ingester.ProxyMonitor.init(); + Scholar.Ingester.MIMEHandler.init(); window.addEventListener("load", Scholar_Ingester_Interface.chromeLoad, false); window.addEventListener("unload", Scholar_Ingester_Interface.chromeUnload, false); diff --git a/chrome/chromeFiles/content/scholar/xpcom/ingester.js b/chrome/chromeFiles/content/scholar/xpcom/ingester.js index ffd1bc81d..c7ad6412e 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/ingester.js +++ b/chrome/chromeFiles/content/scholar/xpcom/ingester.js @@ -457,4 +457,155 @@ Scholar.OpenURL = new function() { return ""; } } +} + +Scholar.Ingester.MIMEHandler = new function() { + var on = false; + + this.init = init; + + /* + * registers URIContentListener to handle MIME types + */ + function init() { + if(!on && Scholar.Prefs.get("parseEndNoteMIMETypes")) { + var uriLoader = Components.classes["@mozilla.org/uriloader;1"]. + getService(Components.interfaces.nsIURILoader); + uriLoader.registerContentListener(Scholar.Ingester.MIMEHandler.URIContentListener); + on = true; + } + } +} + +/* + * Scholar.Ingester.MIMEHandler.URIContentListener: implements + * nsIURIContentListener interface to grab MIME types + */ +Scholar.Ingester.MIMEHandler.URIContentListener = new function() { + var _desiredContentTypes = ["application/x-endnote-refer", "application/x-research-info-systems"]; + + this.QueryInterface = QueryInterface; + this.canHandleContent = canHandleContent; + this.doContent = doContent; + this.isPreferred = isPreferred; + this.onStartURIOpen = onStartURIOpen; + + function QueryInterface(iid) { + if(iid.equals(Components.interfaces.nsISupports) + || iid.equals(Components.interfaces.nsISupportsWeakReference) + || iid.equals(Components.interfaces.nsIURIContentListener)) { + return this; + } + throw Components.results.NS_ERROR_NO_INTERFACE; + } + + function canHandleContent(contentType, isContentPreferred, desiredContentType) { + if(Scholar.inArray(contentType, _desiredContentTypes)) { + return true; + } + return false; + } + + function doContent(contentType, isContentPreferred, request, contentHandler) { + Scholar.debug("doing content for "+request.name); + contentHandler.value = new Scholar.Ingester.MIMEHandler.StreamListener(request, contentType); + return false; + } + + function isPreferred(contentType, desiredContentType) { + if(Scholar.inArray(contentType, _desiredContentTypes)) { + return true; + } + return false; + } + + function onStartURIOpen(URI) { + return true; + } +} + +/* + * Scholar.Ingester.MIMEHandler.StreamListener: implements nsIStreamListener and + * nsIRequestObserver interfaces to download MIME types we've grabbed + */ +Scholar.Ingester.MIMEHandler.StreamListener = function(request, contentType) { + this._request = request; + this._contentType = contentType + this._readString = ""; + this._scriptableStream = null; + this._scriptableStreamInput = null + + // get front window + var windowWatcher = Components.classes["@mozilla.org/embedcomp/window-watcher;1"]. + getService(Components.interfaces.nsIWindowWatcher); + this._frontWindow = windowWatcher.activeWindow; + this._frontWindow.Scholar_Ingester_Interface.Progress.show(); +} + +Scholar.Ingester.MIMEHandler.StreamListener.prototype.QueryInterface = function(iid) { + if(iid.equals(Components.interfaces.nsISupports) + || iid.equals(Components.interfaces.nsIRequestObserver) + || iid.equals(Components.interfaces.nsIStreamListener)) { + return this; + } + throw Components.results.NS_ERROR_NO_INTERFACE; +} + +Scholar.Ingester.MIMEHandler.StreamListener.prototype.onStartRequest = function(channel, context) {} + +/* + * called when there's data available; basicallly, we just want to collect this data + */ +Scholar.Ingester.MIMEHandler.StreamListener.prototype.onDataAvailable = function(request, context, inputStream, offset, count) { + Scholar.debug(count+" bytes available"); + + if(inputStream != this._scriptableStreamInput) { // get storage stream + // if there's not one + this._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"]. + createInstance(Components.interfaces.nsIScriptableInputStream); + this._scriptableStream.init(inputStream); + this._scriptableStreamInput = inputStream; + } + this._readString += this._scriptableStream.read(count); +} + +/* + * called when the request is done + */ +Scholar.Ingester.MIMEHandler.StreamListener.prototype.onStopRequest = function(channel, context, status) { + Scholar.debug("request finished"); + var externalHelperAppService = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"]. + getService(Components.interfaces.nsIExternalHelperAppService); + + // attempt to import through Scholar.Translate + var translation = new Scholar.Translate("import"); + translation.setLocation(this._request.name); + translation.setString(this._readString); + translation.setHandler("itemDone", this._frontWindow.Scholar_Ingester_Interface._itemDone); + translation.setHandler("done", this._frontWindow.Scholar_Ingester_Interface._finishScraping); + + // attempt to retrieve translators + var translators = translation.getTranslators(); + if(!translators.length) { + // we lied. we can't really translate this file. call + // nsIExternalHelperAppService with the data + this._frontWindow.Scholar_Ingester_Interface.Progress.kill(); + + var streamListener; + if(streamListener = externalHelperAppService.doContent(this._contentType, this._request, this._frontWindow)) { + // create a string input stream + var inputStream = Components.classes["@mozilla.org/io/string-input-stream;1"]. + createInstance(Components.interfaces.nsIStringInputStream); + inputStream.setData(this._readString, this._readString.length); + + streamListener.onStartRequest(channel, context); + streamListener.onDataAvailable(this._request, context, inputStream, 0, this._readString.length); + streamListener.onStopRequest(channel, context, status); + } + return false; + } + + // translate using first available + translation.setTranslator(translators[0]); + translation.translate(); } \ No newline at end of file diff --git a/chrome/chromeFiles/content/scholar/xpcom/translate.js b/chrome/chromeFiles/content/scholar/xpcom/translate.js index c5df9fd1e..6184cf6f2 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/translate.js +++ b/chrome/chromeFiles/content/scholar/xpcom/translate.js @@ -38,6 +38,7 @@ * for web - this is a URL * item - item to be used for searching (read-only; set with setItem) * path - the path to the target; for web, this is the same as location + * string - the string content to be used as a file. * saveItem - whether new items should be saved to the database. defaults to * true; set using second argument of constructor. * @@ -57,6 +58,8 @@ * among other things, disables passing of the translate * object to handlers and modifies complete() function on * returned items + * _storageStream - the storage stream to be used, if one is configured + * _storageStreamLength - the length of the storage stream * * WEB-ONLY PRIVATE PROPERTIES: * @@ -135,10 +138,30 @@ Scholar.Translate.prototype.setLocation = function(location) { this.path = this.location; } else { this.location = location; - this.path = location.path; + if(this.location instanceof Components.interfaces.nsIFile) { // if a file + this.path = location.path; + } else { // if a url + this.path = location; + } } } +/* + * sets the string to be used as a file + */ +Scholar.Translate.prototype.setString = function(string) { + this.string = string; + this._createStorageStream(); + + Scholar.debug(string); + this._storageStreamLength = string.length; + + // write string + var fStream = this._storageStream.getOutputStream(0); + fStream.write(string, this._storageStreamLength); + fStream.close(); +} + /* * sets the translator to be used for import/export * @@ -672,7 +695,10 @@ Scholar.Translate.prototype._closeStreams = function() { try { stream.QueryInterface(Components.interfaces.nsIFileInputStream); } catch(e) { - stream.QueryInterface(Components.interfaces.nsIFileOutputStream); + try { + stream.QueryInterface(Components.interfaces.nsIFileOutputStream); + } catch(e) { + } } // encase close in try block, because it's possible it's already @@ -934,52 +960,85 @@ Scholar.Translate.prototype._import = function() { * sets up import for IO */ Scholar.Translate.prototype._importConfigureIO = function() { - if(this._configOptions.dataMode == "rdf") { - var IOService = Components.classes['@mozilla.org/network/io-service;1'] - .getService(Components.interfaces.nsIIOService); - var fileHandler = IOService.getProtocolHandler("file") - .QueryInterface(Components.interfaces.nsIFileProtocolHandler); - var URL = fileHandler.getURLSpecFromFile(this.location); - delete fileHandler, IOService; - - var RDFService = Components.classes['@mozilla.org/rdf/rdf-service;1'] - .getService(Components.interfaces.nsIRDFService); - var dataSource = RDFService.GetDataSourceBlocking(URL); - - // make an instance of the RDF handler - this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(dataSource); - } else { - // open file - var fStream = Components.classes["@mozilla.org/network/file-input-stream;1"] - .createInstance(Components.interfaces.nsIFileInputStream); - fStream.init(this.location, 0x01, 0664, 0); - this._streams.push(fStream); - - if(this._configOptions.dataMode == "line") { // line by line reading - var notEof = true; - var lineData = new Object(); + if(this._storageStream) { + if(this._configOptions.dataMode == "rdf") { + // read string out of storage stream + var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]. + createInstance(Components.interfaces.nsIScriptableInputStream); + sStream.init(this._storageStream.newInputStream(0)); + var str = sStream.read(this._storageStreamLength); + sStream.close(); - fStream.QueryInterface(Components.interfaces.nsILineInputStream); + var IOService = Components.classes['@mozilla.org/network/io-service;1'] + .getService(Components.interfaces.nsIIOService); + var dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"]. + createInstance(Components.interfaces.nsIRDFDataSource); + var parser = Components.classes["@mozilla.org/rdf/xml-parser;1"]. + createInstance(Components.interfaces.nsIRDFXMLParser); - this._sandbox.Scholar.read = function() { - if(notEof) { - notEof = fStream.readLine(lineData); - return lineData.value; - } else { - return false; + // get URI and parse + var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null); + parser.parseString(dataSource, baseURI, str); + + // make an instance of the RDF handler + this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(dataSource); + } else { + this._storageStreamFunctions(true); + + if(this._scriptableStream) { + // close scriptable stream so functions will be forced to get a + // new one + this._scriptableStream.close(); + this._scriptableStream = undefined; + } + } + } else { + if(this._configOptions.dataMode == "rdf") { + var IOService = Components.classes['@mozilla.org/network/io-service;1'] + .getService(Components.interfaces.nsIIOService); + var fileHandler = IOService.getProtocolHandler("file") + .QueryInterface(Components.interfaces.nsIFileProtocolHandler); + var URL = fileHandler.getURLSpecFromFile(this.location); + + var RDFService = Components.classes['@mozilla.org/rdf/rdf-service;1'] + .getService(Components.interfaces.nsIRDFService); + var dataSource = RDFService.GetDataSourceBlocking(URL); + + // make an instance of the RDF handler + this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(dataSource); + } else { + // open file and set read methods + var fStream = Components.classes["@mozilla.org/network/file-input-stream;1"] + .createInstance(Components.interfaces.nsIFileInputStream); + fStream.init(this.location, 0x01, 0664, 0); + this._streams.push(fStream); + + if(this._configOptions.dataMode == "line") { // line by line reading + var notEof = true; + var lineData = new Object(); + + fStream.QueryInterface(Components.interfaces.nsILineInputStream); + + this._sandbox.Scholar.read = function() { + if(notEof) { + notEof = fStream.readLine(lineData); + return lineData.value; + } else { + return false; + } } + } else { // block reading + var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"] + .createInstance(Components.interfaces.nsIScriptableInputStream); + sStream.init(fStream); + + this._sandbox.Scholar.read = function(amount) { + return sStream.read(amount); + } + + // attach sStream to stack of streams to close + this._streams.push(sStream); } - } else { // block reading - var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"] - .createInstance(Components.interfaces.nsIScriptableInputStream); - sStream.init(fStream); - - this._sandbox.Scholar.read = function(amount) { - return sStream.read(amount); - } - - // attach sStream to stack of streams to close - this._streams.push(sStream); } } } @@ -1087,73 +1146,90 @@ Scholar.Translate.prototype._initializeInternalIO = function() { // make an instance of the RDF handler this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(dataSource); } else { - // create a storage stream - var storageStream = Components.classes["@mozilla.org/storagestream;1"]. - createInstance(Components.interfaces.nsIStorageStream); - storageStream.init(4096, 4294967295, null); // virtually no size limit + this._createStorageStream(); + this._storageStreamFunctions(true, true); + } + } +} + +/* + * creates and returns storage stream + */ +Scholar.Translate.prototype._createStorageStream = function() { + // create a storage stream + this._storageStream = Components.classes["@mozilla.org/storagestream;1"]. + createInstance(Components.interfaces.nsIStorageStream); + this._storageStream.init(4096, 4294967295, null); // virtually no size limit +} + +/* + * sets up functions for reading/writing to a storage stream + */ +Scholar.Translate.prototype._storageStreamFunctions = function(read, write) { + var me = this; + if(write) { + // set up write() method + var fStream = _storageStream.getOutputStream(0); + this._sandbox.Scholar.write = function(data) { fStream.write(data, data.length) }; + + // set Scholar.eof() to close the storage stream + this._sandbox.Scholar.eof = function() { + this._storageStream.QueryInterface(Components.interfaces.nsIOutputStream); + this._storageStream.close(); + } + } + + if(read) { + // set up read methods + if(this._configOptions.dataMode == "line") { // line by line reading + var lastCharacter; - // set up write() method - var fStream = storageStream.getOutputStream(0); - this._sandbox.Scholar.write = function(data) { fStream.write(data, data.length) }; + this._sandbox.Scholar.read = function() { + if(!me._scriptableStream) { // allocate an fStream and sStream on the fly + // otherwise with no data we get an error + me._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"]. + createInstance(Components.interfaces.nsIScriptableInputStream); + me._scriptableStream.init(me._storageStream.newInputStream(0)); - // set up read methods - var sStream; - var me = this; - if(this._configOptions.dataMode == "line") { // line by line reading - var lastCharacter; - - this._sandbox.Scholar.read = function() { - if(!sStream) { // allocate an fStream and sStream on the fly - // otherwise with no data we get an error - sStream = Components.classes["@mozilla.org/scriptableinputstream;1"] - .createInstance(Components.interfaces.nsIScriptableInputStream); - sStream.init(fStream.newInputStream(0)); - - // attach sStream to stack of streams to close - me._streams.push(sStream); - } - - var character = sStream.read(1); - if(!character) { - return false; - } - var string = ""; - - if(lastCharacter == "\r" && character == "\n") { - // if the last read got a cr, and this first char was - // an lf, ignore the lf - character = ""; - } - - while(character != "\n" && character != "\r" && character) { - string += character; - character = sStream.read(1); - } - - lastCharacter = character; - - return string; + // attach sStream to stack of streams to close + me._streams.push(me._scriptableStream); } - } else { // block reading - this._sandbox.Scholar.read = function(amount) { - if(!sStream) { // allocate an fStream and sStream on the fly - // otherwise with no data we get an error - sStream = Components.classes["@mozilla.org/scriptableinputstream;1"] - .createInstance(Components.interfaces.nsIScriptableInputStream); - sStream.init(fStream.newInputStream(0)); - - // attach sStream to stack of streams to close - me._streams.push(sStream); - } - - return sStream.read(amount); + + var character = me._scriptableStream.read(1); + if(!character) { + return false; } + var string = ""; + + if(lastCharacter == "\r" && character == "\n") { + // if the last read got a cr, and this first char was + // an lf, ignore the lf + character = ""; + } + + while(character != "\n" && character != "\r" && character) { + string += character; + character = me._scriptableStream.read(1); + } + + lastCharacter = character; + + return string; } + } else { // block reading + this._sandbox.Scholar.read = function(amount) { + if(!me._scriptableStream) { // allocate an fStream and + // sStream on the fly; otherwise + // with no data we get an error + me._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"]. + createInstance(Components.interfaces.nsIScriptableInputStream); + me._scriptableStream.init(me._storageStream.newInputStream(0)); - // set Scholar.eof() to close the storage stream - this._sandbox.Scholar.eof = function() { - storageStream.QueryInterface(Components.interfaces.nsIOutputStream); - storageStream.close(); + // attach sStream to stack of streams to close + me._streams.push(me._scriptableStream); + } + + return me._scriptableStream.read(amount); } } } diff --git a/defaults/preferences/scholar.js b/defaults/preferences/scholar.js index d11ed2bb2..673856fde 100644 --- a/defaults/preferences/scholar.js +++ b/defaults/preferences/scholar.js @@ -4,4 +4,5 @@ pref("extensions.scholar.automaticScraperUpdates",true); pref("extensions.scholar.scholarPaneOnTop",false); pref("extensions.scholar.openURL.resolver","http://athene.gmu.edu:8888/lfp/LinkFinderPlus/Display"); -pref("extensions.scholar.openURL.version","0.1"); \ No newline at end of file +pref("extensions.scholar.openURL.version","0.1"); +pref("extensions.scholar.parseEndNoteMIMETypes",true); \ No newline at end of file diff --git a/scrapers.sql b/scrapers.sql index 14b67fed2..33c9aa80a 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,7 +1,7 @@ --- 39 +-- 40 -- Set the following timestamp to the most recent scraper update date -REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-07 21:55:00')); +REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-08 17:12:00')); REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 4, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)', 'function detectWeb(doc, url) { @@ -4068,12 +4068,12 @@ function doImport() { } }'); -REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-06-30 15:36:00', 3, 'RIS', 'Simon Kornblith', 'ris', +REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-08-08 17:12:00', 3, 'RIS', 'Simon Kornblith', 'ris', 'Scholar.configure("dataMode", "line"); Scholar.addOption("exportNotes", true); function detectImport() { - var line + var line; while(line = Scholar.read()) { if(line.replace(/\s/g, "") != "") { if(line.substr(0, 6) == "TY - ") { @@ -4141,6 +4141,8 @@ var inputTypeMap = { function processTag(item, tag, value) { if(fieldMap[tag]) { item[fieldMap[tag]] = value; + } else if(inputFieldMap[tag]) { + item[inputFieldMap[tag]] = value; } else if(tag == "TY") { // look for type