diff --git a/chrome/chromeFiles/content/scholar/fileInterface.js b/chrome/chromeFiles/content/scholar/fileInterface.js
index f647d2554..6afc26fba 100644
--- a/chrome/chromeFiles/content/scholar/fileInterface.js
+++ b/chrome/chromeFiles/content/scholar/fileInterface.js
@@ -19,7 +19,7 @@ Scholar_File_Interface = new function() {
if (rv == nsIFilePicker.returnOK || rv == nsIFilePicker.returnReplace) {
translation.setLocation(fp.file);
translation.setTranslator(translators[fp.filterIndex]);
- translation.setHandler("done", Scholar_Ingester_Interface.exportDone);
+ //translation.setHandler("done", _exportDone);
translation.translate();
}
}
diff --git a/chrome/chromeFiles/content/scholar/xpcom/translate.js b/chrome/chromeFiles/content/scholar/xpcom/translate.js
index a5edd1948..80b9cc8cb 100644
--- a/chrome/chromeFiles/content/scholar/xpcom/translate.js
+++ b/chrome/chromeFiles/content/scholar/xpcom/translate.js
@@ -22,8 +22,29 @@
* translatorObj.setFile(myNsILocalFile);
* translatorObj.setTranslator(possibleTranslators[x]); // also accepts only an ID
* translatorObj.setHandler("done", _translationDone);
- * translatorObj.translate()
+ * translatorObj.translate();
+ *
+ *
+ * PUBLIC PROPERTIES:
+ *
+ * type - the text type of translator (set by constructor)
+ * numeric type - the numeric type of translator (set by constructor)
+ * location - the location of the target (set by setLocation)
+ * for import/export - this is an instance of nsILocalFile
+ * for web - this is a browser object
+ * translator - the translator currently in use (set by setTranslator)
+ *
+ * PRIVATE PROPERTIES:
+ *
+ * _handlers - handlers for various events (see setHandler)
+ * _configOptions - options set by translator modifying behavior of
+ * Scholar.Translate
+ * _displayOptions - options available to user for this specific translator
+ * _waitForCompletion - whether to wait for asynchronous completion, or return
+ * immediately when script has finished executing
+ * _sandbox - sandbox in which translators will be executed
*/
+
Scholar.Translate = function(type) {
this.type = type;
@@ -55,7 +76,7 @@ Scholar.Translate.prototype.getTranslators = function() {
}
/*
- * sets the file to be used file should be an nsILocalFile object
+ * sets the location to operate upon (file should be an nsILocalFile object)
*/
Scholar.Translate.prototype.setLocation = function(file) {
this.location = file;
@@ -73,11 +94,18 @@ Scholar.Translate.prototype.setTranslator = function(translator) {
var sql = 'SELECT * FROM translators WHERE translatorID = ? AND type = ?';
this.translator = Scholar.DB.rowQuery(sql, [translator, this.numericType]);
- if(this.translator) {
- Scholar.debug("got translator "+translator);
- return true;
+ if(!this.translator) {
+ return false;
}
- return false;
+
+ if(this.type == "export") {
+ // for export, we need to execute the translator detectCode to get
+ // options; for other types, this has already been done
+ this._executeDetectCode(this.translator);
+ }
+
+ Scholar.debug("got translator "+translator);
+ return true;
}
/*
@@ -106,7 +134,10 @@ Scholar.Translate.prototype.setTranslator = function(translator) {
* returns: N/A
*/
Scholar.Translate.prototype.setHandler = function(type, handler) {
- this._handlers[type] = handler;
+ if(!this._handlers[type]) {
+ this._handlers[type] = new Array();
+ }
+ this._handlers[type].push(handler);
}
/*
@@ -114,15 +145,7 @@ Scholar.Translate.prototype.setHandler = function(type, handler) {
*
* NOT IMPLEMENTED
*/
-Scholar.Translate.prototype.getOptions = function() {
-}
-
-/*
- * sets translator options to be displayed in a dialog
- *
- * NOT IMPLEMENTED
- */
-Scholar.Translate.prototype.setOptions = function() {
+Scholar.Translate.prototype.displayOptions = function() {
}
/*
@@ -145,7 +168,7 @@ Scholar.Translate.prototype.translate = function() {
}
// If synchronous, call _translationComplete();
- if(!this._waitForCompletion && returnValue) {
+ if(!this._waitForCompletion) {
this._translationComplete(returnValue);
}
}
@@ -173,14 +196,67 @@ Scholar.Translate.prototype._generateSandbox = function() {
var me = this;
this._sandbox.wait = function() {me._enableAsynchronous() };
- if(this.type == "export") {
- this._sandbox.write = function(data) { me._exportWrite(data); };
+ this._sandbox.configure = function(option, value) {me._configure(option, value) };
+ this._sandbox.addOption = function(option, value) {me._addOption(option, value) };
+}
+
+/*
+ * executes translator detectCode, sandboxed
+ */
+Scholar.Translate.prototype._executeDetectCode = function(translator) {
+ this._configOptions = new Array();
+ this._displayOptions = new Array();
+ Scholar.debug("executing detect code");
+
+ try {
+ return Components.utils.evalInSandbox(translator.detectCode, this._sandbox);
+ } catch(e) {
+ Scholar.debug(e+' in executing detectCode for '+translator.label);
+ return;
}
}
+/*
+ * sets an option that modifies the way the translator is executed
+ *
+ * called as configure() in translator detectCode
+ *
+ * current options:
+ *
+ * dataMode
+ * valid: import, export
+ * options: rdf, text
+ * purpose: selects whether write/read behave as standard text functions or
+ * using Mozilla's built-in support for RDF data sources
+ *
+ * getCollections
+ * valid: export
+ * options: true, false
+ * purpose: selects whether export translator will receive an array of
+ * collections and children in addition to the array of items and
+ * children
+ */
+Scholar.Translate.prototype._configure = function(option, value) {
+ this._configOptions[option] = value;
+ Scholar.debug("setting configure option "+option+" to "+value);
+}
+
+/*
+ * adds translator options to be displayed in a dialog
+ *
+ * called as addOption() in detect code
+ *
+ */
+Scholar.Translate.prototype._addOption = function(option, value) {
+ this._displayOptions[option] = value;
+ Scholar.debug("setting display option "+option+" to "+value);
+}
+
/*
* makes translation API wait until done() has been called from the translator
- * before executing _translationComplete; called as wait()
+ * before executing _translationComplete
+ *
+ * called as wait() in translator code
*/
Scholar.Translate.prototype._enableAsynchronous = function() {
this._waitForCompletion = true;
@@ -198,13 +274,21 @@ Scholar.Translate.prototype._translationComplete = function(returnValue) {
if(!this._complete) {
this._complete = true;
- if(this.type == "export" || this.type == "import") {
- this.foStream.close();
- }
+ Scholar.debug("translation complete");
// call handler
- if(this._handlers.done) {
- this._handlers.done(this, returnValue);
+ this._runHandler("done", returnValue);
+ }
+}
+
+/*
+ * calls a handler (see setHandler above)
+ */
+Scholar.Translate.prototype._runHandler = function(type, argument) {
+ if(this._handlers[type]) {
+ for(var i in this._handlers[type]) {
+ Scholar.debug("running handler "+i+" for "+type);
+ this._handlers[type][i](this, argument);
}
}
}
@@ -213,6 +297,8 @@ Scholar.Translate.prototype._translationComplete = function(returnValue) {
* does the actual export, after code has been loaded and parsed
*/
Scholar.Translate.prototype._export = function() {
+ this._exportConfigureIO();
+
// get items
var itemObjects = Scholar.getItems();
var itemArrays = new Array();
@@ -221,35 +307,88 @@ Scholar.Translate.prototype._export = function() {
}
delete itemObjects; // free memory
- // get collections
- var collectionObjects = Scholar.getCollections();
- var collectionArrays = new Array();
- for(var i in collectionObjects) {
- var collection = new Object();
- collection.id = collectionObjects[i].getID();
- collection.name = collectionObjects[i].getName();
- collection.type = "collection";
- collection.children = collectionObjects[i].toArray();
-
- collectionArrays.push(collection);
+ // get collections, if requested
+ var collectionArrays;
+ if(this._configOptions.getCollections) {
+ var collectionObjects = Scholar.getCollections();
+ collectionArrays = new Array();
+ for(var i in collectionObjects) {
+ var collection = new Object();
+ collection.id = collectionObjects[i].getID();
+ collection.name = collectionObjects[i].getName();
+ collection.type = "collection";
+ collection.children = collectionObjects[i].toArray();
+
+ collectionArrays.push(collection);
+ }
+ delete collectionObjects; // free memory
}
- delete collectionObjects; // free memory
-
- // open file
- this.foStream = Components.classes["@mozilla.org/network/file-output-stream;1"]
- .createInstance(Components.interfaces.nsIFileOutputStream);
- this.foStream.init(this.location, 0x02 | 0x08 | 0x20, 0664, 0); // write, create, truncate
-
try {
- return this._sandbox.doExport(itemArrays, collectionArrays);
+ return this._sandbox.translate(itemArrays, collectionArrays);
} catch(e) {
Scholar.debug(e+' in executing code for '+this.translator.label);
this._translationComplete(false);
}
}
-// TODO - allow writing in different character sets
-Scholar.Translate.prototype._exportWrite = function(data) {
- this.foStream.write(data, data.length);
+/*
+ * configures IO for export
+ */
+Scholar.Translate.prototype._exportConfigureIO = function() {
+ // open file
+ var foStream = Components.classes["@mozilla.org/network/file-output-stream;1"]
+ .createInstance(Components.interfaces.nsIFileOutputStream);
+ foStream.init(this.location, 0x02 | 0x08 | 0x20, 0664, 0); // write, create, truncate
+
+ if(this._configOptions.dataMode == "rdf") {
+ /*** INITIALIZATION ***/
+ var RDFService = Components.classes['@mozilla.org/rdf/rdf-service;1'].getService(Components.interfaces.nsIRDFService);
+ var IOService = Components.classes['@mozilla.org/network/io-service;1'].getService(Components.interfaces.nsIIOService);
+ var AtomService = Components.classes["@mozilla.org/atom-service;1"].getService(Components.interfaces.nsIAtomService);
+
+ // create data source
+ var dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=xml-datasource"].
+ createInstance(Components.interfaces.nsIRDFDataSource);
+ // create serializer
+ var serializer = Components.classes["@mozilla.org/rdf/xml-serializer;1"].
+ createInstance(Components.interfaces.nsIRDFXMLSerializer);
+ serializer.init(dataSource);
+
+ /*** FUNCTIONS ***/
+ this._sandbox.model = new Object();
+
+ // writes an RDF triple
+ this._sandbox.model.addStatement = function(about, relation, value, literal) {
+ if(!(about instanceof Components.interfaces.nsIRDFResource)) {
+ about = RDFService.GetResource(about);
+ }
+ dataSource.Assert(about, RDFService.GetResource(relation),
+ (literal ? RDFService.GetLiteral(value) : RDFService.GetResource(value)), true);
+ }
+
+ // creates an anonymous resource
+ this._sandbox.model.newResource = function() { return RDFService.GetAnonymousResource() };
+
+ // sets a namespace
+ this._sandbox.model.addNamespace = function(prefix, uri) {
+ serializer.addNameSpace(AtomService.getAtom(prefix), uri);
+ }
+
+ this.setHandler("done", function() {
+ serializer.QueryInterface(Components.interfaces.nsIRDFXMLSource);
+ serializer.Serialize(foStream);
+
+ delete dataSource, RDFService, IOService, AtomService;
+ });
+ } else {
+ /*** FUNCTIONS ***/
+ // write just writes to the file
+ this._sandbox.write = function(data) { foStream.write(data, data.length) };
+ }
+
+ this.setHandler("done", function() {
+ foStream.close();
+ delete foStream;
+ });
}
\ No newline at end of file
diff --git a/scrapers.sql b/scrapers.sql
index aaf62faf9..328d02313 100644
--- a/scrapers.sql
+++ b/scrapers.sql
@@ -2465,7 +2465,8 @@ utilities.processDocuments(browser, null, newUris, function(newBrowser) {
wait();');
REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 2, 'MODS (XML)', 'Simon Kornblith', 'xml',
-'addOption("exportNotes", true);
+'configure("getCollections", true);
+addOption("exportNotes", true);
addOption("exportFileData", true);',
'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
var rdf = new Namespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
@@ -2513,7 +2514,7 @@ function generateSeeAlso(id, seeAlso, rdfDoc) {
rdfDoc.rdf::description += description;
}
-function doExport(items, collections) {
+function translate(items, collections) {
var rdfDoc = ;
var modsCollection = ;
@@ -2778,7 +2779,7 @@ function doExport(items, collections) {
if(item.note) {
// Add note tag
- var note = {item.note};
+ var note = {item.note};
note.@ID = "item:"+item.itemID;
mods.note += note;
}
@@ -2808,15 +2809,15 @@ function doExport(items, collections) {
write(modsCollection.toXMLString());
}');
-REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-07-05 23:40:00', 2, 'Dublin Core (RDF/XML)', 'Simon Kornblith', 'xml', '',
-'function doExport(items) {
- var addSubclass = new Object();
+REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-07-05 23:40:00', 2, 'Dublin Core (RDF/XML)', 'Simon Kornblith', 'xml',
+'configure("dataMode", "rdf");',
+'function translate(items) {
var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
- var rdfDoc = ;
- var rdf = new Namespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
- var dcterms = new Namespace("dcterms", "http://purl.org/dc/terms/");
- var dc = new Namespace("dc", "http://purl.org/dc/elements/1.1/");
+ var dcterms = "http://purl.org/dc/terms/";
+ var dc = "http://purl.org/dc/elements/1.1/";
+ model.addNamespace("dcterms", dcterms);
+ model.addNamespace("dc", dc);
for(var i in items) {
var item = items[i];
@@ -2830,22 +2831,20 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006
isPartialItem = true;
}
- var description = ;
+ var resource;
if(item.ISBN) {
- description.@rdf::about = "urn:isbn:"+item.ISBN;
- } else if(item.ISSN) {
- description.@rdf::about = "urn:issn:"+item.ISSN;
+ resource = "urn:isbn:"+item.ISBN;
} else if(item.url) {
- description.@rdf::about = item.url;
+ resource = item.url;
} else {
// just specify a node ID
- description.@rdf::nodeID = item.itemID;
+ resource = model.newResource();
}
/** CORE FIELDS **/
// XML tag titleInfo; object field title
- description.dc::title = item.title;
+ model.addStatement(resource, dc+"title", item.title, true);
// XML tag typeOfResource/genre; object field type
var type;
@@ -2856,7 +2855,7 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006
} else {
type = "Text";
}
- description.dc::type.@rdf::resource = "http://purl.org/dc/dcmitype/"+type;
+ model.addStatement(resource, dc+"type", "http://purl.org/dc/dcmitype/"+type, false);
// XML tag name; object field creators
for(var j in item.creators) {
@@ -2867,9 +2866,9 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006
}
if(item.creators[j].creatorType == "author") {
- description.dc::creator += {creator};
+ model.addStatement(resource, dc+"creator", creator, true);
} else {
- description.dc::contributor.* += {creator};
+ model.addStatement(resource, dc+"contributor", creator, true);
}
}
@@ -2877,86 +2876,72 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006
// source
if(item.source) {
- description.dc::source = item.source;
+ model.addStatement(resource, dc+"source", item.source, true);
}
// accessionNumber as generic ID
if(item.accessionNumber) {
- description.dc::identifier = item.accessionNumber;
+ model.addStatement(resource, dc+"identifier", item.accessionNumber, true);
}
// rights
if(item.rights) {
- description.dc::rights = item.rights;
+ model.addStatement(resource, dc+"rights", item.rights, true);
}
/** SUPPLEMENTAL FIELDS **/
// publication/series -> isPartOf
if(item.publication) {
- description.dcterms::isPartOf = item.publication;
- addSubclass.isPartOf = true;
+ model.addStatement(resource, dcterms+"isPartOf", item.publication, true);
} else if(item.series) {
- description.dcterms::isPartOf = item.series;
- addSubclass.isPartOf = true;
+ model.addStatement(resource, dcterms+"isPartOf", item.series, true);
}
// TODO - create text citation and OpenURL citation to handle volume, number, pages, issue, place
// edition
if(item.edition) {
- description.dcterms::hasVersion = item.edition;
+ model.addStatement(resource, dcterms+"hasVersion", item.edition, true);
}
// publisher/distributor
if(item.publisher) {
- description.dc::publisher = item.publisher;
+ model.addStatement(resource, dc+"publisher", item.publisher, true);
} else if(item.distributor) {
- description.dc::publisher = item.distributor;
+ model.addStatement(resource, dc+"publisher", item.distributor, true);
}
// date/year
if(item.date) {
- description.dc::date = item.date;
+ model.addStatement(resource, dc+"date", item.date, true);
} else if(item.year) {
- description.dc::date = item.year;
+ model.addStatement(resource, dc+"year", item.year, true);
}
// ISBN/ISSN
- var resource = false;
+ var identifier = false;
if(item.ISBN) {
- resource = "urn:isbn:"+item.ISBN;
+ identifier = "urn:isbn:"+item.ISBN;
} else if(item.ISSN) {
- resource = "urn:issn:"+item.ISSN;
+ identifier = "urn:issn:"+item.ISSN;
}
- if(resource) {
+ if(identifier) {
if(isPartialItem) {
- description.dcterms::isPartOf.@rdf::resource = resource;
- addSubclass.isPartOf = true;
+ model.addStatement(resource, dc+"isPartOf", identifier, false);
} else {
- description.dc::identifier.@rdf::resource = resource;
+ model.addStatement(resource, dc+"identifier", identifier, false);
}
}
// callNumber
if(item.callNumber) {
- description.dc::identifier += item.callNumber;
+ model.addStatement(resource, dc+"identifier", item.callNumber, true);
}
// archiveLocation
if(item.archiveLocation) {
- description.dc::coverage = item.archiveLocation;
+ model.addStatement(resource, dc+"coverage", item.archiveLocation, true);
}
-
- rdfDoc.rdf::Description += description;
}
-
- if(addSubclass.isPartOf) {
- rdfDoc.rdf::Description +=
-
- ;
- }
-
- write(''''+"\n");
- write(rdfDoc.toXMLString());
}');
@@ -2969,7 +2954,7 @@ addOption("exportFileData", true);',
}
}
-function doExport(items) {
+function translate(items) {
for(var i in items) {
var item = items[i];