// Scholar for Firefox Utilities
// Utilities based on code taken from Piggy Bank 2.1.1 (BSD-licensed)
// This code is licensed according to the GPL
/////////////////////////////////////////////////////////////////
//
// Scholar.Utilities
//
/////////////////////////////////////////////////////////////////
// Scholar.Utilities class, a set of methods to assist in data
// extraction. Some of the code here was stolen directly from the Piggy Bank
// project.
Scholar.Utilities = function () {}
// Adapter for Piggy Bank function to print debug messages; log level is
// fixed at 4 (could change this)
Scholar.Utilities.prototype.debugPrint = function(msg) {
Scholar.debug(msg, 4);
}
// Appears to trim a string, chopping of newlines/spacing
Scholar.Utilities.prototype.trimString = function(s) {
var i = 0;
var spaceChars = " \n\r\t" + String.fromCharCode(160) /* */;
while (i < s.length) {
var c = s.charAt(i);
if (spaceChars.indexOf(c) < 0) {
break;
}
i++;
}
s = s.substring(i);
i = s.length;
while (i > 0) {
var c = s.charAt(i - 1);
if (spaceChars.indexOf(c) < 0) {
break;
}
i--;
}
return s.substring(0, i);
}
/*
* BEGIN SCHOLAR FOR FIREFOX EXTENSIONS
* Functions below this point are extensions to the utilities provided by
* Piggy Bank. When used in external code, the repository will need to add
* a function definition when exporting in Piggy Bank format.
*/
/*
* Converts a JavaScript date object to an ISO-style date
*/
Scholar.Utilities.prototype.dateToISO = function(jsDate) {
var date = "";
var year = jsDate.getFullYear().toString();
var month = (jsDate.getMonth()+1).toString();
var day = jsDate.getDate().toString();
for(var i = year.length; i<4; i++) {
date += "0";
}
date += year+"-";
if(month.length == 1) {
date += "0";
}
date += month+"-";
if(day.length == 1) {
date += "0";
}
date += day;
return date;
}
/*
* Cleans extraneous punctuation off an author name
*/
Scholar.Utilities.prototype.cleanAuthor = function(author) {
author = author.replace(/^[\s\.\,\/\[\]\:]+/, '');
author = author.replace(/[\s\,\/\[\]\:\.]+$/, '');
author = author.replace(/ +/, ' ');
// Add period for initials
if(author.substring(author.length-2, author.length-1) == " ") {
author += ".";
}
var splitNames = author.split(', ');
if(splitNames.length > 1) {
author = splitNames[1]+' '+splitNames[0];
}
return author;
}
/*
* Cleans whitespace off a string and replaces multiple spaces with one
*/
Scholar.Utilities.prototype.cleanString = function(s) {
s = s.replace(/[ \xA0]+/g, " ");
return this.trimString(s);
}
/*
* Cleans any non-word non-parenthesis characters off the ends of a string
*/
Scholar.Utilities.prototype.superCleanString = function(x) {
var x = x.replace(/^[^\w(]+/, "");
return x.replace(/[^\w)]+$/, "");
}
/*
* Eliminates HTML tags, replacing
s with /ns
*/
Scholar.Utilities.prototype.cleanTags = function(x) {
x = x.replace(/
]*>/gi, "\n");
return x.replace(/<[^>]+>/g, "");
}
// These functions are for use by importMARCRecord. They're private, because,
// while they are useful, it's also nice if as many of our scrapers as possible
// are PiggyBank compatible, and if our scrapers used functions, that would
// break compatibility
Scholar.Utilities.prototype._MARCCleanString = function(author) {
author = author.replace(/^[\s\.\,\/\[\]\:]+/, '');
author = author.replace(/[\s\.\,\/\[\]\:]+$/, '');
return author.replace(/ +/, ' ');
}
Scholar.Utilities.prototype._MARCCleanNumber = function(author) {
author = author.replace(/^[\s\.\,\/\[\]\:]+/, '');
author = author.replace(/[\s\.\,\/\[\]\:]+$/, '');
var regexp = /^[^ ]*/;
var m = regexp.exec(author);
if(m) {
return m[0];
}
}
Scholar.Utilities.prototype._MARCPullYear = function(text) {
var pullRe = /[0-9]+/;
var m = pullRe.exec(text);
if(m) {
return m[0];
}
}
Scholar.Utilities.prototype._MARCAssociateField = function(record, uri, model, fieldNo, rdfUri, execMe, prefix, part) {
if(!part) {
part = 'a';
}
var field = record.get_field_subfields(fieldNo);
Scholar.debug('Found '+field.length+' matches for '+fieldNo+part);
if(field) {
for(i in field) {
var value;
for(var j=0; j= 0 && !(addedURLs[href])) {
urls.unshift(href);
addedURLs[href] = true;
}
aElement = aElements.iterateNext();
}
return urls;
}
// For now, we're going to skip the getLLsFromAddresses function (which gets
// latitude and longitude pairs from a series of addresses, but requires the
// big mess of Java code that is the Piggy Bank server) and the geoHelper
// tools (which rely on getLLsFromAddresses) since these are probably not
// essential components for Scholar and would take a great deal of effort to
// implement. We can, however, always implement them later.
/*
* BEGIN SCHOLAR FOR FIREFOX EXTENSIONS
*/
/*
* Gets a given node (assumes only one value)
*/
Scholar.Utilities.Ingester.prototype.getNode = function(doc, contextNode, xpath, nsResolver) {
return doc.evaluate(xpath, contextNode, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE, null).iterateNext();
}
/*
* Gets a given node as a string containing all child nodes
*/
Scholar.Utilities.Ingester.prototype.getNodeString = function(doc, contextNode, xpath, nsResolver) {
var elmts = this.gatherElementsOnXPath(doc, contextNode, xpath, nsResolver);
var returnVar = "";
for(var i=0; i