diff --git a/chrome/chromeFiles/content/scholar/xpcom/utilities.js b/chrome/chromeFiles/content/scholar/xpcom/utilities.js
index 1341886ed..40df4e30c 100644
--- a/chrome/chromeFiles/content/scholar/xpcom/utilities.js
+++ b/chrome/chromeFiles/content/scholar/xpcom/utilities.js
@@ -138,6 +138,23 @@ Scholar.Utilities.prototype.getVersion = function() {
return Scholar.version;
}
+/*
+ * Get a page range, given a user-entered set of pages
+ */
+Scholar.Utilities.prototype._pageRangeRegexp = /^\s*([0-9]+)-([0-9]+)\s*$/
+Scholar.Utilities.prototype.getPageRange = function(pages) {
+ var pageNumbers;
+ var m = this._pageRangeRegexp.exec(pages);
+ if(m) {
+ // A page range
+ pageNumbers = [m[1], m[2]];
+ } else {
+ // Assume start and end are the same
+ pageNumbers = [pages, pages];
+ }
+ return pageNumbers;
+}
+
Scholar.Utilities.prototype.inArray = Scholar.inArray;
/*
diff --git a/scrapers.sql b/scrapers.sql
index 59596981f..be63c2b47 100644
--- a/scrapers.sql
+++ b/scrapers.sql
@@ -2464,9 +2464,9 @@ utilities.processDocuments(browser, null, newUris, function(newBrowser) {
wait();');
-REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-06-28 16:00:00', 2, 'MODS', 'Simon Kornblith', 'xml',
-'options.add("Export project structure", "checkbox", "true");
-options.add("Export notes", "checkbox", "true");',
+REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-06-28 16:00:00', 2, 'MODS (XML)', 'Simon Kornblith', 'xml',
+'addOption("exportNotes", true);
+addOption("exportFileData", true);',
'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
function doExport(items) {
@@ -2619,19 +2619,8 @@ function doExport(items) {
// XML tag detail; object field pages
if(item.pages) {
- var start, end;
-
- if(typeof(item.pages) == "string" && item.pages.indexOf("-")) {
- // A page range
- var pageNumbers = item.pages.split("-");
- start = pageNumbers[0];
- end = pageNumbers[1];
- } else {
- // Assume start and end are the same
- start = item.pages;
- end = item.pages;
- }
- part += {start}{end};
+ var range = utilities.getPageRange(item.pages);
+ part += {range[0]}{range[1]};
}
// Assign part if something was assigned
@@ -2681,7 +2670,7 @@ function doExport(items) {
}
// XML tag identifier; object fields ISBN, ISSN
- var identifier = null;
+ var identifier = false;
if(item.ISBN) {
identifier = {item.ISBN};
} else if(item.ISSN) {
@@ -2728,6 +2717,264 @@ function doExport(items) {
modsCollection.mods += mods;
}
- write(modsCollection.toString());
+ write(''''+"\n");
+ write(modsCollection.toXMLString());
}');
+REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-06-28 18:04:00', 2, 'Dublin Core (RDF/XML)', 'Simon Kornblith', 'xml', '',
+'function doExport(items) {
+ var addSubclass = new Object();
+ var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
+
+ var rdfDoc = ;
+ var rdf = new Namespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
+ var dcterms = new Namespace("dcterms", "http://purl.org/dc/terms/");
+ var dc = new Namespace("dc", "http://purl.org/dc/elements/1.1/");
+
+ for(var i in items) {
+ var item = items[i];
+
+ if(item.itemType == "note") {
+ continue;
+ }
+
+ var isPartialItem = false;
+ if(utilities.inArray(item.itemType, partialItemTypes)) {
+ isPartialItem = true;
+ }
+
+ var description = ;
+ if(item.ISBN) {
+ description.@rdf::about = "urn:isbn:"+item.ISBN;
+ } else if(item.ISSN) {
+ description.@rdf::about = "urn:issn:"+item.ISSN;
+ } else if(item.url) {
+ description.@rdf::about = item.url;
+ } else {
+ // generate a guid, bc that''s all we can do
+ description.@rdf::about = "urn:uuid:"+item.itemID;
+ }
+
+ /** CORE FIELDS **/
+
+ // XML tag titleInfo; object field title
+ description.dc::title = item.title;
+
+ // XML tag typeOfResource/genre; object field type
+ var type;
+ if(item.itemType == "film") {
+ type = "MovingImage";
+ } else if(item.itemType == "artwork") {
+ type = "StillImage";
+ } else {
+ type = "Text";
+ }
+ description.dc::type.@rdf::resource = "http://purl.org/dc/dcmitype/"+type;
+
+ // XML tag name; object field creators
+ for(var j in item.creators) {
+ // put creators in lastName, firstName format (although DC doesn''t specify)
+ var creator = item.creators[j].lastName;
+ if(item.creators[j].firstName) {
+ creator += ", "+item.creators[j].firstName;
+ }
+
+ if(item.creators[j].creatorType == "author") {
+ description.dc::creator += {creator};
+ } else {
+ description.dc::contributor.* += {creator};
+ }
+ }
+
+ /** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
+
+ // source
+ if(item.source) {
+ description.dc::source = item.source;
+ }
+
+ // accessionNumber as generic ID
+ if(item.accessionNumber) {
+ description.dc::identifier = item.accessionNumber;
+ }
+
+ // rights
+ if(item.rights) {
+ description.dc::rights = item.rights;
+ }
+
+ /** SUPPLEMENTAL FIELDS **/
+
+ // publication/series -> isPartOf
+ if(item.publication) {
+ description.dcterms::isPartOf = item.publication;
+ addSubclass.isPartOf = true;
+ } else if(item.series) {
+ description.dcterms::isPartOf = item.series;
+ addSubclass.isPartOf = true;
+ }
+
+ // TODO - create text citation and OpenURL citation to handle volume, number, pages, issue, place
+
+ // edition
+ if(item.edition) {
+ description.dcterms::hasVersion = item.edition;
+ }
+ // publisher/distributor
+ if(item.publisher) {
+ description.dc::publisher = item.publisher;
+ } else if(item.distributor) {
+ description.dc::publisher = item.distributor;
+ }
+ // date/year
+ if(item.date) {
+ description.dc::date = item.date;
+ } else if(item.year) {
+ description.dc::date = item.year;
+ }
+
+ // ISBN/ISSN
+ var resource = false;
+ if(item.ISBN) {
+ resource = "urn:isbn:"+item.ISBN;
+ } else if(item.ISSN) {
+ resource = "urn:issn:"+item.ISSN;
+ }
+ if(resource) {
+ if(isPartialItem) {
+ description.dcterms::isPartOf.@rdf::resource = resource;
+ addSubclass.isPartOf = true;
+ } else {
+ description.dc::identifier.@rdf::resource = resource;
+ }
+ }
+
+ // callNumber
+ if(item.callNumber) {
+ description.dc::identifier += item.callNumber;
+ }
+
+ // archiveLocation
+ if(item.archiveLocation) {
+ description.dc::coverage = item.archiveLocation;
+ }
+
+ rdfDoc.rdf::Description += description;
+ }
+
+ if(addSubclass.isPartOf) {
+ rdfDoc.rdf::Description +=
+
+ ;
+ }
+
+ write(''''+"\n");
+ write(rdfDoc.toXMLString());
+}');
+
+
+REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-06-30 15:36:00', 2, 'RIS', 'Simon Kornblith', 'ris',
+'addOption("exportNotes", true);
+addOption("exportFileData", true);',
+'function addTag(tag, value) {
+ if(value) {
+ write(tag+" - "+value+"\r\n");
+ }
+}
+
+function doExport(items) {
+ for(var i in items) {
+ var item = items[i];
+
+ // can''t store notes in RIS
+ if(item.itemType == "note") {
+ continue;
+ }
+
+ // type
+ // TODO - figure out if these are the best types for letter, interview, website
+ if(item.itemType == "book") {
+ var risType = "BOOK";
+ } else if(item.itemType == "bookSection") {
+ var risType = "CHAP";
+ } else if(item.itemType == "journalArticle") {
+ var risType = "JOUR";
+ } else if(item.itemType == "magazineArticle") {
+ var risType = "MGZN";
+ } else if(item.itemType == "newspaperArticle") {
+ var risType = "NEWS";
+ } else if(item.itemType == "thesis") {
+ var risType = "THES";
+ } else if(item.itemType == "letter" || item.itemType == "interview") {
+ var risType = "PCOMM";
+ } else if(item.itemType == "film") {
+ var risType = "MPCT";
+ } else if(item.itemType == "artwork") {
+ var risType = "ART";
+ } else if(item.itemType == "website") {
+ var risType = "ICOMM";
+ }
+ addTag("TY", risType);
+ // ID
+ addTag("ID", item.itemID);
+ // primary title
+ addTag("T1", item.title);
+ // series title
+ addTag("T3", item.series);
+ // creators
+ for(var j in item.creators) {
+ // only two types, primary and secondary
+ var risTag = "A1"
+ if(item.creators[j].creatorType != "author") {
+ risTag = "A2";
+ }
+
+ addTag(risTag, item.creators[j].lastName+","+item.creators[j].firstName);
+ }
+ // date
+ if(item.date) {
+ var isoDate = /^[0-9]{4}-[0-9]{2}-[0-9]{2}$/;
+ if(isoDate.test(item.date)) { // can directly accept ISO format with minor mods
+ addTag("Y1", item.date.replace("-", "/")+"/");
+ } else { // otherwise, extract year and attach other data
+ var year = /^(.*?) *([0-9]{4})/;
+ var m = year.exec(item.date);
+ if(m) {
+ addTag("Y1", m[2]+"///"+m[1]);
+ }
+ }
+ } else if(item.year) {
+ addTag("Y1", item.year+"///");
+ }
+ // notes
+ for(var j in item.notes) {
+ addTag("N1", item.notes[j].note);
+ }
+ // publication
+ addTag("JF", item.publication);
+ // volume
+ addTag("VL", item.volume);
+ // number
+ addTag("IS", item.number);
+ // pages
+ if(item.pages) {
+ var range = utilities.getPageRange(item.pages);
+ addTag("SP", range[0]);
+ addTag("EP", range[1]);
+ }
+ // place
+ addTag("CP", item.place);
+ // publisher
+ addTag("PB", item.publisher);
+ // ISBN/ISSN
+ addTag("SN", item.ISBN);
+ addTag("SN", item.ISSN);
+ // URL
+ if(item.url) {
+ addTag("UR", item.url);
+ } else if(item.source && item.source.substr(0, 7) == "http://") {
+ addTag("UR", item.source);
+ }
+ write("\r\n");
+ }
+}');
\ No newline at end of file