diff --git a/chrome/chromeFiles/content/scholar/fileInterface.js b/chrome/chromeFiles/content/scholar/fileInterface.js index f647d2554..6afc26fba 100644 --- a/chrome/chromeFiles/content/scholar/fileInterface.js +++ b/chrome/chromeFiles/content/scholar/fileInterface.js @@ -19,7 +19,7 @@ Scholar_File_Interface = new function() { if (rv == nsIFilePicker.returnOK || rv == nsIFilePicker.returnReplace) { translation.setLocation(fp.file); translation.setTranslator(translators[fp.filterIndex]); - translation.setHandler("done", Scholar_Ingester_Interface.exportDone); + //translation.setHandler("done", _exportDone); translation.translate(); } } diff --git a/chrome/chromeFiles/content/scholar/xpcom/translate.js b/chrome/chromeFiles/content/scholar/xpcom/translate.js index a5edd1948..80b9cc8cb 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/translate.js +++ b/chrome/chromeFiles/content/scholar/xpcom/translate.js @@ -22,8 +22,29 @@ * translatorObj.setFile(myNsILocalFile); * translatorObj.setTranslator(possibleTranslators[x]); // also accepts only an ID * translatorObj.setHandler("done", _translationDone); - * translatorObj.translate() + * translatorObj.translate(); + * + * + * PUBLIC PROPERTIES: + * + * type - the text type of translator (set by constructor) + * numeric type - the numeric type of translator (set by constructor) + * location - the location of the target (set by setLocation) + * for import/export - this is an instance of nsILocalFile + * for web - this is a browser object + * translator - the translator currently in use (set by setTranslator) + * + * PRIVATE PROPERTIES: + * + * _handlers - handlers for various events (see setHandler) + * _configOptions - options set by translator modifying behavior of + * Scholar.Translate + * _displayOptions - options available to user for this specific translator + * _waitForCompletion - whether to wait for asynchronous completion, or return + * immediately when script has finished executing + * _sandbox - sandbox in which translators will be executed */ + Scholar.Translate = function(type) { this.type = type; @@ -55,7 +76,7 @@ Scholar.Translate.prototype.getTranslators = function() { } /* - * sets the file to be used file should be an nsILocalFile object + * sets the location to operate upon (file should be an nsILocalFile object) */ Scholar.Translate.prototype.setLocation = function(file) { this.location = file; @@ -73,11 +94,18 @@ Scholar.Translate.prototype.setTranslator = function(translator) { var sql = 'SELECT * FROM translators WHERE translatorID = ? AND type = ?'; this.translator = Scholar.DB.rowQuery(sql, [translator, this.numericType]); - if(this.translator) { - Scholar.debug("got translator "+translator); - return true; + if(!this.translator) { + return false; } - return false; + + if(this.type == "export") { + // for export, we need to execute the translator detectCode to get + // options; for other types, this has already been done + this._executeDetectCode(this.translator); + } + + Scholar.debug("got translator "+translator); + return true; } /* @@ -106,7 +134,10 @@ Scholar.Translate.prototype.setTranslator = function(translator) { * returns: N/A */ Scholar.Translate.prototype.setHandler = function(type, handler) { - this._handlers[type] = handler; + if(!this._handlers[type]) { + this._handlers[type] = new Array(); + } + this._handlers[type].push(handler); } /* @@ -114,15 +145,7 @@ Scholar.Translate.prototype.setHandler = function(type, handler) { * * NOT IMPLEMENTED */ -Scholar.Translate.prototype.getOptions = function() { -} - -/* - * sets translator options to be displayed in a dialog - * - * NOT IMPLEMENTED - */ -Scholar.Translate.prototype.setOptions = function() { +Scholar.Translate.prototype.displayOptions = function() { } /* @@ -145,7 +168,7 @@ Scholar.Translate.prototype.translate = function() { } // If synchronous, call _translationComplete(); - if(!this._waitForCompletion && returnValue) { + if(!this._waitForCompletion) { this._translationComplete(returnValue); } } @@ -173,14 +196,67 @@ Scholar.Translate.prototype._generateSandbox = function() { var me = this; this._sandbox.wait = function() {me._enableAsynchronous() }; - if(this.type == "export") { - this._sandbox.write = function(data) { me._exportWrite(data); }; + this._sandbox.configure = function(option, value) {me._configure(option, value) }; + this._sandbox.addOption = function(option, value) {me._addOption(option, value) }; +} + +/* + * executes translator detectCode, sandboxed + */ +Scholar.Translate.prototype._executeDetectCode = function(translator) { + this._configOptions = new Array(); + this._displayOptions = new Array(); + Scholar.debug("executing detect code"); + + try { + return Components.utils.evalInSandbox(translator.detectCode, this._sandbox); + } catch(e) { + Scholar.debug(e+' in executing detectCode for '+translator.label); + return; } } +/* + * sets an option that modifies the way the translator is executed + * + * called as configure() in translator detectCode + * + * current options: + * + * dataMode + * valid: import, export + * options: rdf, text + * purpose: selects whether write/read behave as standard text functions or + * using Mozilla's built-in support for RDF data sources + * + * getCollections + * valid: export + * options: true, false + * purpose: selects whether export translator will receive an array of + * collections and children in addition to the array of items and + * children + */ +Scholar.Translate.prototype._configure = function(option, value) { + this._configOptions[option] = value; + Scholar.debug("setting configure option "+option+" to "+value); +} + +/* + * adds translator options to be displayed in a dialog + * + * called as addOption() in detect code + * + */ +Scholar.Translate.prototype._addOption = function(option, value) { + this._displayOptions[option] = value; + Scholar.debug("setting display option "+option+" to "+value); +} + /* * makes translation API wait until done() has been called from the translator - * before executing _translationComplete; called as wait() + * before executing _translationComplete + * + * called as wait() in translator code */ Scholar.Translate.prototype._enableAsynchronous = function() { this._waitForCompletion = true; @@ -198,13 +274,21 @@ Scholar.Translate.prototype._translationComplete = function(returnValue) { if(!this._complete) { this._complete = true; - if(this.type == "export" || this.type == "import") { - this.foStream.close(); - } + Scholar.debug("translation complete"); // call handler - if(this._handlers.done) { - this._handlers.done(this, returnValue); + this._runHandler("done", returnValue); + } +} + +/* + * calls a handler (see setHandler above) + */ +Scholar.Translate.prototype._runHandler = function(type, argument) { + if(this._handlers[type]) { + for(var i in this._handlers[type]) { + Scholar.debug("running handler "+i+" for "+type); + this._handlers[type][i](this, argument); } } } @@ -213,6 +297,8 @@ Scholar.Translate.prototype._translationComplete = function(returnValue) { * does the actual export, after code has been loaded and parsed */ Scholar.Translate.prototype._export = function() { + this._exportConfigureIO(); + // get items var itemObjects = Scholar.getItems(); var itemArrays = new Array(); @@ -221,35 +307,88 @@ Scholar.Translate.prototype._export = function() { } delete itemObjects; // free memory - // get collections - var collectionObjects = Scholar.getCollections(); - var collectionArrays = new Array(); - for(var i in collectionObjects) { - var collection = new Object(); - collection.id = collectionObjects[i].getID(); - collection.name = collectionObjects[i].getName(); - collection.type = "collection"; - collection.children = collectionObjects[i].toArray(); - - collectionArrays.push(collection); + // get collections, if requested + var collectionArrays; + if(this._configOptions.getCollections) { + var collectionObjects = Scholar.getCollections(); + collectionArrays = new Array(); + for(var i in collectionObjects) { + var collection = new Object(); + collection.id = collectionObjects[i].getID(); + collection.name = collectionObjects[i].getName(); + collection.type = "collection"; + collection.children = collectionObjects[i].toArray(); + + collectionArrays.push(collection); + } + delete collectionObjects; // free memory } - delete collectionObjects; // free memory - - // open file - this.foStream = Components.classes["@mozilla.org/network/file-output-stream;1"] - .createInstance(Components.interfaces.nsIFileOutputStream); - this.foStream.init(this.location, 0x02 | 0x08 | 0x20, 0664, 0); // write, create, truncate - try { - return this._sandbox.doExport(itemArrays, collectionArrays); + return this._sandbox.translate(itemArrays, collectionArrays); } catch(e) { Scholar.debug(e+' in executing code for '+this.translator.label); this._translationComplete(false); } } -// TODO - allow writing in different character sets -Scholar.Translate.prototype._exportWrite = function(data) { - this.foStream.write(data, data.length); +/* + * configures IO for export + */ +Scholar.Translate.prototype._exportConfigureIO = function() { + // open file + var foStream = Components.classes["@mozilla.org/network/file-output-stream;1"] + .createInstance(Components.interfaces.nsIFileOutputStream); + foStream.init(this.location, 0x02 | 0x08 | 0x20, 0664, 0); // write, create, truncate + + if(this._configOptions.dataMode == "rdf") { + /*** INITIALIZATION ***/ + var RDFService = Components.classes['@mozilla.org/rdf/rdf-service;1'].getService(Components.interfaces.nsIRDFService); + var IOService = Components.classes['@mozilla.org/network/io-service;1'].getService(Components.interfaces.nsIIOService); + var AtomService = Components.classes["@mozilla.org/atom-service;1"].getService(Components.interfaces.nsIAtomService); + + // create data source + var dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=xml-datasource"]. + createInstance(Components.interfaces.nsIRDFDataSource); + // create serializer + var serializer = Components.classes["@mozilla.org/rdf/xml-serializer;1"]. + createInstance(Components.interfaces.nsIRDFXMLSerializer); + serializer.init(dataSource); + + /*** FUNCTIONS ***/ + this._sandbox.model = new Object(); + + // writes an RDF triple + this._sandbox.model.addStatement = function(about, relation, value, literal) { + if(!(about instanceof Components.interfaces.nsIRDFResource)) { + about = RDFService.GetResource(about); + } + dataSource.Assert(about, RDFService.GetResource(relation), + (literal ? RDFService.GetLiteral(value) : RDFService.GetResource(value)), true); + } + + // creates an anonymous resource + this._sandbox.model.newResource = function() { return RDFService.GetAnonymousResource() }; + + // sets a namespace + this._sandbox.model.addNamespace = function(prefix, uri) { + serializer.addNameSpace(AtomService.getAtom(prefix), uri); + } + + this.setHandler("done", function() { + serializer.QueryInterface(Components.interfaces.nsIRDFXMLSource); + serializer.Serialize(foStream); + + delete dataSource, RDFService, IOService, AtomService; + }); + } else { + /*** FUNCTIONS ***/ + // write just writes to the file + this._sandbox.write = function(data) { foStream.write(data, data.length) }; + } + + this.setHandler("done", function() { + foStream.close(); + delete foStream; + }); } \ No newline at end of file diff --git a/scrapers.sql b/scrapers.sql index aaf62faf9..328d02313 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -2465,7 +2465,8 @@ utilities.processDocuments(browser, null, newUris, function(newBrowser) { wait();'); REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 2, 'MODS (XML)', 'Simon Kornblith', 'xml', -'addOption("exportNotes", true); +'configure("getCollections", true); +addOption("exportNotes", true); addOption("exportFileData", true);', 'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"]; var rdf = new Namespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); @@ -2513,7 +2514,7 @@ function generateSeeAlso(id, seeAlso, rdfDoc) { rdfDoc.rdf::description += description; } -function doExport(items, collections) { +function translate(items, collections) { var rdfDoc = ; var modsCollection = ; @@ -2778,7 +2779,7 @@ function doExport(items, collections) { if(item.note) { // Add note tag - var note = {item.note}; + var note = {item.note}; note.@ID = "item:"+item.itemID; mods.note += note; } @@ -2808,15 +2809,15 @@ function doExport(items, collections) { write(modsCollection.toXMLString()); }'); -REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-07-05 23:40:00', 2, 'Dublin Core (RDF/XML)', 'Simon Kornblith', 'xml', '', -'function doExport(items) { - var addSubclass = new Object(); +REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-07-05 23:40:00', 2, 'Dublin Core (RDF/XML)', 'Simon Kornblith', 'xml', +'configure("dataMode", "rdf");', +'function translate(items) { var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"]; - var rdfDoc = ; - var rdf = new Namespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); - var dcterms = new Namespace("dcterms", "http://purl.org/dc/terms/"); - var dc = new Namespace("dc", "http://purl.org/dc/elements/1.1/"); + var dcterms = "http://purl.org/dc/terms/"; + var dc = "http://purl.org/dc/elements/1.1/"; + model.addNamespace("dcterms", dcterms); + model.addNamespace("dc", dc); for(var i in items) { var item = items[i]; @@ -2830,22 +2831,20 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006 isPartialItem = true; } - var description = ; + var resource; if(item.ISBN) { - description.@rdf::about = "urn:isbn:"+item.ISBN; - } else if(item.ISSN) { - description.@rdf::about = "urn:issn:"+item.ISSN; + resource = "urn:isbn:"+item.ISBN; } else if(item.url) { - description.@rdf::about = item.url; + resource = item.url; } else { // just specify a node ID - description.@rdf::nodeID = item.itemID; + resource = model.newResource(); } /** CORE FIELDS **/ // XML tag titleInfo; object field title - description.dc::title = item.title; + model.addStatement(resource, dc+"title", item.title, true); // XML tag typeOfResource/genre; object field type var type; @@ -2856,7 +2855,7 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006 } else { type = "Text"; } - description.dc::type.@rdf::resource = "http://purl.org/dc/dcmitype/"+type; + model.addStatement(resource, dc+"type", "http://purl.org/dc/dcmitype/"+type, false); // XML tag name; object field creators for(var j in item.creators) { @@ -2867,9 +2866,9 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006 } if(item.creators[j].creatorType == "author") { - description.dc::creator += {creator}; + model.addStatement(resource, dc+"creator", creator, true); } else { - description.dc::contributor.* += {creator}; + model.addStatement(resource, dc+"contributor", creator, true); } } @@ -2877,86 +2876,72 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006 // source if(item.source) { - description.dc::source = item.source; + model.addStatement(resource, dc+"source", item.source, true); } // accessionNumber as generic ID if(item.accessionNumber) { - description.dc::identifier = item.accessionNumber; + model.addStatement(resource, dc+"identifier", item.accessionNumber, true); } // rights if(item.rights) { - description.dc::rights = item.rights; + model.addStatement(resource, dc+"rights", item.rights, true); } /** SUPPLEMENTAL FIELDS **/ // publication/series -> isPartOf if(item.publication) { - description.dcterms::isPartOf = item.publication; - addSubclass.isPartOf = true; + model.addStatement(resource, dcterms+"isPartOf", item.publication, true); } else if(item.series) { - description.dcterms::isPartOf = item.series; - addSubclass.isPartOf = true; + model.addStatement(resource, dcterms+"isPartOf", item.series, true); } // TODO - create text citation and OpenURL citation to handle volume, number, pages, issue, place // edition if(item.edition) { - description.dcterms::hasVersion = item.edition; + model.addStatement(resource, dcterms+"hasVersion", item.edition, true); } // publisher/distributor if(item.publisher) { - description.dc::publisher = item.publisher; + model.addStatement(resource, dc+"publisher", item.publisher, true); } else if(item.distributor) { - description.dc::publisher = item.distributor; + model.addStatement(resource, dc+"publisher", item.distributor, true); } // date/year if(item.date) { - description.dc::date = item.date; + model.addStatement(resource, dc+"date", item.date, true); } else if(item.year) { - description.dc::date = item.year; + model.addStatement(resource, dc+"year", item.year, true); } // ISBN/ISSN - var resource = false; + var identifier = false; if(item.ISBN) { - resource = "urn:isbn:"+item.ISBN; + identifier = "urn:isbn:"+item.ISBN; } else if(item.ISSN) { - resource = "urn:issn:"+item.ISSN; + identifier = "urn:issn:"+item.ISSN; } - if(resource) { + if(identifier) { if(isPartialItem) { - description.dcterms::isPartOf.@rdf::resource = resource; - addSubclass.isPartOf = true; + model.addStatement(resource, dc+"isPartOf", identifier, false); } else { - description.dc::identifier.@rdf::resource = resource; + model.addStatement(resource, dc+"identifier", identifier, false); } } // callNumber if(item.callNumber) { - description.dc::identifier += item.callNumber; + model.addStatement(resource, dc+"identifier", item.callNumber, true); } // archiveLocation if(item.archiveLocation) { - description.dc::coverage = item.archiveLocation; + model.addStatement(resource, dc+"coverage", item.archiveLocation, true); } - - rdfDoc.rdf::Description += description; } - - if(addSubclass.isPartOf) { - rdfDoc.rdf::Description += - - ; - } - - write(''''+"\n"); - write(rdfDoc.toXMLString()); }'); @@ -2969,7 +2954,7 @@ addOption("exportFileData", true);', } } -function doExport(items) { +function translate(items) { for(var i in items) { var item = items[i];