// Scholar for Firefox Utilities // Utilities based on code taken from Piggy Bank 2.1.1 (BSD-licensed) // This code is licensed according to the GPL ///////////////////////////////////////////////////////////////// // // Scholar.Utilities // ///////////////////////////////////////////////////////////////// // Scholar.Utilities class, a set of methods to assist in data // extraction. Some of the code here was stolen directly from the Piggy Bank // project. Scholar.Utilities = function () {} // Adapter for Piggy Bank function to print debug messages; log level is // fixed at 4 (could change this) Scholar.Utilities.prototype.debugPrint = function(msg) { Scholar.debug(msg, 4); } // Appears to trim a string, chopping of newlines/spacing Scholar.Utilities.prototype.trimString = function(s) { var i = 0; var spaceChars = " \n\r\t" + String.fromCharCode(160) /* */; while (i < s.length) { var c = s.charAt(i); if (spaceChars.indexOf(c) < 0) { break; } i++; } s = s.substring(i); i = s.length; while (i > 0) { var c = s.charAt(i - 1); if (spaceChars.indexOf(c) < 0) { break; } i--; } return s.substring(0, i); } /* * BEGIN SCHOLAR FOR FIREFOX EXTENSIONS * Functions below this point are extensions to the utilities provided by * Piggy Bank. When used in external code, the repository will need to add * a function definition when exporting in Piggy Bank format. */ /* * Converts a JavaScript date object to an ISO-style date */ Scholar.Utilities.prototype.dateToISO = function(jsDate) { var date = ""; var year = jsDate.getFullYear().toString(); var month = (jsDate.getMonth()+1).toString(); var day = jsDate.getDate().toString(); for(var i = year.length; i<4; i++) { date += "0"; } date += year+"-"; if(month.length == 1) { date += "0"; } date += month+"-"; if(day.length == 1) { date += "0"; } date += day; return date; } /* * Cleans extraneous punctuation off an author name */ Scholar.Utilities.prototype.cleanAuthor = function(author) { author = author.replace(/^[\s\.\,\/\[\]\:]+/, ''); author = author.replace(/[\s\,\/\[\]\:\.]+$/, ''); author = author.replace(/ +/, ' '); // Add period for initials if(author.substring(author.length-2, author.length-1) == " ") { author += "."; } var splitNames = author.split(', '); if(splitNames.length > 1) { author = splitNames[1]+' '+splitNames[0]; } return author; } /* * Cleans whitespace off a string and replaces multiple spaces with one */ Scholar.Utilities.prototype.cleanString = function(s) { s = s.replace(/[ \xA0]+/g, " "); return this.trimString(s); } /* * Cleans any non-word non-parenthesis characters off the ends of a string */ Scholar.Utilities.prototype.superCleanString = function(x) { var x = x.replace(/^[^\w(]+/, ""); return x.replace(/[^\w)]+$/, ""); } /* * Eliminates HTML tags, replacing
s with /ns */ Scholar.Utilities.prototype.cleanTags = function(x) { x = x.replace(/]*>/gi, "\n"); return x.replace(/<[^>]+>/g, ""); } // These functions are for use by importMARCRecord. They're private, because, // while they are useful, it's also nice if as many of our scrapers as possible // are PiggyBank compatible, and if our scrapers used functions, that would // break compatibility Scholar.Utilities.prototype._MARCCleanString = function(author) { author = author.replace(/^[\s\.\,\/\[\]\:]+/, ''); author = author.replace(/[\s\.\,\/\[\]\:]+$/, ''); return author.replace(/ +/, ' '); } Scholar.Utilities.prototype._MARCCleanNumber = function(author) { author = author.replace(/^[\s\.\,\/\[\]\:]+/, ''); author = author.replace(/[\s\.\,\/\[\]\:]+$/, ''); var regexp = /^[^ ]*/; var m = regexp.exec(author); if(m) { return m[0]; } } Scholar.Utilities.prototype._MARCPullYear = function(text) { var pullRe = /[0-9]+/; var m = pullRe.exec(text); if(m) { return m[0]; } } Scholar.Utilities.prototype._MARCAssociateField = function(record, uri, model, fieldNo, rdfUri, execMe, prefix, part) { if(!part) { part = 'a'; } var field = record.get_field_subfields(fieldNo); Scholar.debug('Found '+field.length+' matches for '+fieldNo+part); if(field) { for(i in field) { var value; for(var j=0; j= 0 && !(addedURLs[href])) { urls.unshift(href); addedURLs[href] = true; } aElement = aElements.iterateNext(); } return urls; } // For now, we're going to skip the getLLsFromAddresses function (which gets // latitude and longitude pairs from a series of addresses, but requires the // big mess of Java code that is the Piggy Bank server) and the geoHelper // tools (which rely on getLLsFromAddresses) since these are probably not // essential components for Scholar and would take a great deal of effort to // implement. We can, however, always implement them later. /* * BEGIN SCHOLAR FOR FIREFOX EXTENSIONS */ /* * Gets a given node (assumes only one value) */ Scholar.Utilities.Ingester.prototype.getNode = function(doc, contextNode, xpath, nsResolver) { return doc.evaluate(xpath, contextNode, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE, null).iterateNext(); } /* * Gets a given node as a string containing all child nodes */ Scholar.Utilities.Ingester.prototype.getNodeString = function(doc, contextNode, xpath, nsResolver) { var elmts = this.gatherElementsOnXPath(doc, contextNode, xpath, nsResolver); var returnVar = ""; for(var i=0; i