diff --git a/chrome/chromeFiles/content/scholar/xpcom/scholar.js b/chrome/chromeFiles/content/scholar/xpcom/scholar.js index d4c5d8c0d..0ff2d41d1 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/scholar.js +++ b/chrome/chromeFiles/content/scholar/xpcom/scholar.js @@ -603,6 +603,7 @@ Scholar.Hash.prototype.has = function(in_key){ Scholar.Date = new function(){ this.sqlToDate = sqlToDate; this.strToDate = strToDate; + this.formatDate = formatDate; this.getFileDateString = getFileDateString; this.getFileTimeString = getFileTimeString; @@ -652,9 +653,6 @@ Scholar.Date = new function(){ return date; } - // get short month strings from CSL interpreter - var months = CSL.getMonthStrings("short"); - string = string.replace(/^\s+/, "").replace(/\s+$/, "").replace(/\s+/, " "); var dateRe = /^([0-9]{4})[\-\/]([0-9]{2})[\-\/]([0-9]{2})$/; @@ -695,6 +693,9 @@ Scholar.Date = new function(){ date.part = m[1]+m[3]; Scholar.debug("DATE: got year ("+date.year+", "+date.part+")"); + // get short month strings from CSL interpreter + var months = CSL.getMonthStrings("short"); + // then, see if have anything resembling a month anywhere var monthRe = new RegExp("^(.*)\\b("+months.join("|")+")[^ ]* (.*)$", "i"); var m = monthRe.exec(date.part); @@ -721,6 +722,34 @@ Scholar.Date = new function(){ return date; } + /* + * does pretty formatting of a date object returned by strToDate() + */ + function formatDate(date) { + var string = ""; + + if(date.part) { + string += date.part+" "; + } + + if(date.month) { + // get short month strings from CSL interpreter + var months = CSL.getMonthStrings("long"); + string += months[date.month]; + if(date.day) { + string += ", "+date.day; + } else { + string += " "; + } + } + + if(date.year) { + string += date.year; + } + + return string; + } + function getFileDateString(file){ var date = new Date(); date.setTime(file.lastModifiedTime); diff --git a/chrome/chromeFiles/content/scholar/xpcom/translate.js b/chrome/chromeFiles/content/scholar/xpcom/translate.js index 42967bc61..186e7fd3e 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/translate.js +++ b/chrome/chromeFiles/content/scholar/xpcom/translate.js @@ -1611,6 +1611,7 @@ Scholar.Translate.prototype._exportGetCollection = function() { Scholar.Translate.prototype._initializeInternalIO = function() { if(this.type == "import" || this.type == "export") { if(this._configOptions.dataMode == "rdf") { + this._rdf = new Object(); // use an in-memory data source for internal IO this._rdf.dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"]. createInstance(Components.interfaces.nsIRDFDataSource); diff --git a/chrome/chromeFiles/content/scholar/xpcom/utilities.js b/chrome/chromeFiles/content/scholar/xpcom/utilities.js index ba868aa67..0f8bec441 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/utilities.js +++ b/chrome/chromeFiles/content/scholar/xpcom/utilities.js @@ -15,28 +15,8 @@ Scholar.Utilities.prototype.debug = function(msg) { /* * Converts a JavaScript date object to an SQL-style date */ -Scholar.Utilities.prototype.dateToSQL = function(jsDate) { - var date = ""; - var year = jsDate.getFullYear().toString(); - var month = (jsDate.getMonth()+1).toString(); - var day = jsDate.getDate().toString(); - - for(var i = year.length; i<4; i++) { - date += "0"; - } - date += year+"-"; - - if(month.length == 1) { - date += "0"; - } - date += month+"-"; - - if(day.length == 1) { - date += "0"; - } - date += day; - - return date; +Scholar.Utilities.prototype.formatDate = function(date) { + return Scholar.Date.formatDate(date); } /* diff --git a/scrapers.sql b/scrapers.sql index ddec970d8..8040c807b 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,4 +1,4 @@ --- 66 +-- 67 -- Set the following timestamp to the most recent scraper update date REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-15 15:42:00')); @@ -46,12 +46,7 @@ REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006 if(attribute == "Publisher:") { if(value.lastIndexOf("(") != -1) { - var date = value.substring(value.lastIndexOf("(")+1, value.length-1); - jsDate = new Date(date); - if(!isNaN(jsDate.valueOf())) { - date = Scholar.Utilities.dateToSQL(jsDate); - } - newItem.date = date; + newItem.date = value.substring(value.lastIndexOf("(")+1, value.length-1); value = value.substring(0, value.lastIndexOf("(")-1); } @@ -638,12 +633,7 @@ function doWeb(doc, url) { } else if(fieldCode == "SE") { newItem.seriesTitle = fieldContent; } else if(fieldCode == "DA") { - var date = new Date(fieldContent.replace(".", "")); - if(isNaN(date.valueOf())) { - newItem.date = fieldContent; - } else { - newItem.date = Scholar.Utilities.dateToSQL(date); - } + newItem.date = fieldContent; } else if(fieldCode == "PP") { newItem.pages = fieldContent; } else if(fieldCode == "EI") { @@ -1131,8 +1121,6 @@ REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006 } }', 'function scrape(doc) { - Scholar.Utilities.debug(doc.getElementsByTagName("body")[0].innerHTML); - var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; @@ -1174,12 +1162,7 @@ REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006 var date = doc.evaluate(''./TD[2]/A[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); if(date.nodeValue) { - date = date.nodeValue; - var jsDate = new Date(Scholar.Utilities.superCleanString(date)); - if(!isNaN(jsDate.valueOf())) { - date = Scholar.Utilities.dateToSQL(jsDate); - } - newItem.date = date; + newItem.date = date.nodeValue; } var moreInfo = doc.evaluate(''./TD[2]/text()[2]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); @@ -1639,8 +1622,7 @@ REPLACE INTO "translators" VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '2006 var dateRegexp = /]*>(?:)?([A-Z][a-z]+)(?:<\/b>)? ([0-9]+, [0-9]{4})/; var m = dateRegexp.exec(centerElements[centerElements.length-1].innerHTML); if(m) { - var jsDate = new Date(m[1]+" "+m[2]); - newItem.date = Scholar.Utilities.dateToSQL(jsDate); + newItem.date = m[1]+" "+m[2]; } else { var elementParts = centerElements[centerElements.length-1].innerHTML.split(/]*>/gi); newItem.date = elementParts[1]; @@ -2619,10 +2601,11 @@ function detectSearch(item) { newItem.ISSN = issn.replace(/[^0-9]/g, ""); } + newItem.journalAbbreviation = Scholar.Utilities.superCleanString(citation.MedlineJournalInfo.MedlineTA.text().toString()); if(article.Journal.Title.length()) { newItem.publicationTitle = Scholar.Utilities.superCleanString(article.Journal.Title.text().toString()); } else if(citation.MedlineJournalInfo.MedlineTA.length()) { - newItem.publicationTitle = Scholar.Utilities.superCleanString(citation.MedlineJournalInfo.MedlineTA.text().toString()); + newItem.publicationTitle = newItem.journalAbbreviation; } if(article.Journal.JournalIssue.length()) { @@ -2630,19 +2613,11 @@ function detectSearch(item) { newItem.issue = article.Journal.JournalIssue.Issue.text(); if(article.Journal.JournalIssue.PubDate.length()) { // try to get the date if(article.Journal.JournalIssue.PubDate.Day.text().toString() != "") { - var date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Day.text()+", "+article.Journal.JournalIssue.PubDate.Year.text(); - var jsDate = new Date(date); - if(!isNaN(jsDate.valueOf())) { - date = Scholar.Utilities.dateToSQL(jsDate); - } + newItem.date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Day.text()+", "+article.Journal.JournalIssue.PubDate.Year.text(); } else if(article.Journal.JournalIssue.PubDate.Month.text().toString() != "") { - var date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Year.text(); + newItem.date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Year.text(); } else if(article.Journal.JournalIssue.PubDate.Year.text().toString() != "") { - var date = article.Journal.JournalIssue.PubDate.Year.text(); - } - - if(date) { - newItem.date = date; + newItem.date = article.Journal.JournalIssue.PubDate.Year.text(); } } } @@ -2733,7 +2708,9 @@ REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006 var dc = "http://purl.org/dc/elements/1.1/"; // load RDF translator - var translator = Scholar.loadTranslator("import", "5e3ad958-ac79-463d-812b-a86a9235c28f"); + var translator = Scholar.loadTranslator("import"); + translator.setTranslator("5e3ad958-ac79-463d-812b-a86a9235c28f"); + var rdf = translator.getTranslatorObject(); var metaTags = doc.getElementsByTagName("meta"); var foundTitle = false; // We can use the page title if necessary @@ -2744,20 +2721,20 @@ REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006 if(tag == "dc.title") { foundTitle = true; } - translator.Scholar.RDF.addStatement(url, dc + tag.substr(3), value, true); + rdf.Scholar.RDF.addStatement(url, dc + tag.substr(3), value, true); Scholar.Utilities.debug(tag.substr(3) + " = " + value); } else if(tag && value && (tag == "author" || tag == "author-personal")) { - translator.Scholar.RDF.addStatement(url, dc + "creator", value, true); + rdf.Scholar.RDF.addStatement(url, dc + "creator", value, true); } else if(tag && value && tag == "author-corporate") { - translator.Scholar.RDF.addStatement(url, dc + "creator", value, true); + rdf.Scholar.RDF.addStatement(url, dc + "creator", value, true); } } if(!foundTitle) { - translator.Scholar.RDF.addStatement(url, dc + "title", doc.title, true); + rdf.Scholar.RDF.addStatement(url, dc + "title", doc.title, true); } - translator.doImport(); + rdf.doImport(); }'); REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-07 01:09:00', 4, 'COinS', 'Simon Kornblith', NULL, @@ -2964,16 +2941,7 @@ REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006 } else if(field == "Publisher") { newItem.publisher = value; } else if(field == "Publication Date") { - var date = value; - - jsDate = new Date(value); - if(!isNaN(jsDate.valueOf())) { - date = Scholar.Utilities.dateToSQL(jsDate); - } - - newItem.date = date; - /*} else if(field == "Format") { - .addStatement(uri, prefixDC + ''medium'', value);*/ + newItem.date = value; } else if(field == "ISBN") { newItem.ISBN = value; } else if(field == "Pages") { @@ -3981,11 +3949,10 @@ function doExport() { var dateType = "dateCreated"; } var tag = <{dateType}>{item.date}; - tag.@encoding = "iso8601"; originInfo += tag; } if(item.accessDate) { - originInfo += {item.accessDate}; + originInfo += {item.accessDate}; } if(originInfo.length() != 1) { if(isPartialItem) { @@ -4629,6 +4596,11 @@ function doExport() { Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"alternative", item.journalAbbreviation, true); } + // extra + if(item.extra) { + Scholar.RDF.addStatement(resource, n.dc+"description", item.extra, true); + } + /** NOTES **/ if(Scholar.getOption("exportNotes")) { @@ -5190,6 +5162,9 @@ function doImport() { // see also processSeeAlso(node, newItem); + + // description + newItem.extra = getFirstResults(node, [n.dc+"description"], true); /** NOTES **/ @@ -5371,13 +5346,19 @@ function processTag(item, tag, value) { if(dateParts.length == 1) { // technically, if there''s only one date part, the file isn''t valid // RIS, but EndNote accepts this, so we have to too - item.date = value+"-00-00"; - } else if(dateParts[1].length == 0 && dateParts[2].length == 0 && dateParts[3] && dateParts[3].length != 0) { - // in the case that we have a year and other data, format that way - item.date = dateParts[3]+(dateParts[0] ? " "+dateParts[0] : ""); + item.date = value; } else { - // standard YMD data - item.date = Scholar.Utilities.lpad(dateParts[0], "0", 4)+"-"+Scholar.Utilities.lpad(dateParts[1], "0", 2)+"-"+Scholar.Utilities.lpad(dateParts[2], "0", 2); + // in the case that we have a year and other data, format that way + + var month = parseInt(dateParts[1]); + if(month) { + month--; + } + + item.date = Scholar.Utilities.formatDate({year:dateParts[0], + month:month, + day:dateParts[2], + part:dateParts[3]}); } } else if(tag == "N1" || tag == "AB") { // notes