diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.js b/chrome/chromeFiles/content/scholar/ingester/browser.js
new file mode 100644
index 000000000..a88414d51
--- /dev/null
+++ b/chrome/chromeFiles/content/scholar/ingester/browser.js
@@ -0,0 +1,140 @@
+// Firefox Scholar Ingester Browser Functions
+// Utilities based on code taken from Greasemonkey
+// This code is licensed according to the GPL
+
+// Prepare the browser and collector instrumentation caches --------------------
+Scholar.Ingester.Interface = function() {}
+
+Scholar.Ingester.Interface.init = function() {
+ Scholar.Ingester.Interface.browsers = new Array();
+
+ window.addEventListener("load", Scholar.Ingester.Interface.chromeLoad, false);
+ window.addEventListener("unload", Scholar.Ingester.Interface.chromeUnload, false);
+
+ Scholar.Ingester.Interface.browsers = new Array();
+ Scholar.Ingester.Interface.browserDocuments = new Object();
+}
+
+Scholar.Ingester.Interface.chromeLoad = function() {
+ Scholar.Ingester.Interface.tabBrowser = document.getElementById("content");
+ Scholar.Ingester.Interface.appContent = document.getElementById("appcontent");
+ Scholar.Ingester.Interface.statusImage = document.getElementById("scholar-status-image");
+
+ // this gives us onLocationChange
+ Scholar.Ingester.Interface.tabBrowser.addProgressListener(Scholar.Ingester.Interface.Listener,
+ Components.interfaces.nsIWebProgress.NOTIFY_LOCATION);
+ // this gives us DOMContentLoaded
+ Scholar.Ingester.Interface.appContent.addEventListener("DOMContentLoaded",
+ Scholar.Ingester.Interface.contentLoad, true);
+}
+
+Scholar.Ingester.Interface.chromeUnload = function() {
+ this.tabBrowser.removeProgressListener(this);
+}
+
+Scholar.Ingester.Interface.getDocument = function(browser) {
+ try {
+ var key = browser.getAttribute("scholar-key");
+ if(Scholar.Ingester.Interface.browserDocuments[key]) {
+ return Scholar.Ingester.Interface.browserDocuments[key];
+ }
+ } finally {}
+ return false;
+}
+
+Scholar.Ingester.Interface.setDocument = function(browser) {
+ try {
+ var key = browser.getAttribute("scholar-key");
+ } finally {
+ if(!key) {
+ var key = (new Date()).getTime();
+ browser.setAttribute("scholar-key", key);
+ }
+ }
+ Scholar.Ingester.Interface.browserDocuments[key] = new Scholar.Ingester.Document(browser);
+ Scholar.Ingester.Interface.browserDocuments[key].retrieveScraper();
+}
+
+Scholar.Ingester.Interface.deleteDocument = function(browser) {
+ try {
+ var key = browser.getAttribute("scholar-key");
+ if(Scholar.Ingester.Interface.browserDocuments[key]) {
+ delete Scholar.Ingester.Interface.browserDocuments[key];
+ return true;
+ }
+ } finally {}
+ return false;
+}
+
+Scholar.Ingester.Interface.scrapeThisPage = function() {
+ var document = Scholar.Ingester.Interface.getDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
+ if(document.scraper) {
+ document.scrapePage();
+ }
+}
+
+Scholar.Ingester.Interface.updateStatus = function(browser) {
+ var document = Scholar.Ingester.Interface.getDocument(browser);
+ if(document && document.scraper) {
+ this.statusImage.src = "chrome://scholar/skin/capture_colored.png";
+ } else {
+ this.statusImage.src = "chrome://scholar/skin/capture_gray.png";
+ }
+}
+
+Scholar.Ingester.Interface.contentLoad = function() {
+ Scholar.Ingester.Interface.setDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
+ Scholar.Ingester.Interface.updateStatus(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
+}
+
+Scholar.Ingester.Interface.Listener = function() {}
+Scholar.Ingester.Interface.Listener.onStatusChange = function() {}
+Scholar.Ingester.Interface.Listener.onSecurityChange = function() {}
+Scholar.Ingester.Interface.Listener.onProgressChange = function() {}
+Scholar.Ingester.Interface.Listener.onStateChange = function() {}
+Scholar.Ingester.Interface.Listener.onLocationChange = function() {
+ var browsers = Scholar.Ingester.Interface.tabBrowser.browsers;
+
+ // Remove document object of any browser that no longer exists
+ for (var i = 0; i < Scholar.Ingester.Interface.browsers.length; i++) {
+ var browser = Scholar.Ingester.Interface.browsers[i];
+ var exists = false;
+
+ for (var j = 0; j < browsers.length; j++) {
+ if (browser == browsers[j]) {
+ exists = true;
+ break;
+ }
+ }
+
+ if (!exists) {
+ Scholar.Ingester.Interface.browsers.splice(i,1);
+
+ // To execute if document object does not exist
+ Scholar.Ingester.Interface.deleteDocument(browser);
+ }
+ }
+
+ /*// Add a collector to any new browser
+ for (var i = 0; i < browsers.length; i++) {
+ var browser = browsers[i];
+ var exists = false;
+
+ for (var j = 0; j < Scholar.Ingester.Interface.browsers.length; j++) {
+ if (browser == Scholar.Ingester.Interface.browsers[j]) {
+ exists = true;
+ break;
+ }
+ }
+
+ if (!exists) {
+ Scholar.Ingester.Interface.browsers.splice(i,0,browser);
+
+ // To execute if window is new
+ }
+ }*/
+
+ Scholar.Ingester.Interface.updateStatus(
+ Scholar.Ingester.Interface.tabBrowser.selectedBrowser
+ );
+}
\ No newline at end of file
diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.xul b/chrome/chromeFiles/content/scholar/ingester/browser.xul
new file mode 100755
index 000000000..150b3c550
--- /dev/null
+++ b/chrome/chromeFiles/content/scholar/ingester/browser.xul
@@ -0,0 +1,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/chrome/chromeFiles/content/scholar/ingester/ingester.js b/chrome/chromeFiles/content/scholar/ingester/ingester.js
new file mode 100644
index 000000000..7295c68c1
--- /dev/null
+++ b/chrome/chromeFiles/content/scholar/ingester/ingester.js
@@ -0,0 +1,473 @@
+// Firefox Scholar Ingester
+// Utilities based on code taken from Piggy Bank 2.1.1 (BSD-licensed)
+// This code is licensed according to the GPL
+
+Scholar.Ingester = new function() {}
+
+/////////////////////////////////////////////////////////////////
+//
+// Scholar.Ingester.Model
+//
+/////////////////////////////////////////////////////////////////
+
+// Scholar.Ingester.Model, an object representing an RDF data model with
+// methods to add to that model. In Piggy Bank, this was implemented in Java,
+// but seeing as we don't really want an enormous web server running with FS,
+// but we don't actually need that, so it's much simpler.
+//
+// The Java version of this class can be viewed at
+// http://simile.mit.edu/repository/piggy-bank/trunk/src/java/edu/mit/simile/piggyBank/WorkingModel.java
+Scholar.Ingester.Model = function() {
+ this.data = new Object();
+}
+
+// Piggy Bank provides a fourth argument, one that determines if the third
+// argument is a literal or an RDF URI. Since our ontologies are
+// sufficiently restricted, we have no chance of confusing a literal and an
+// RDF URI and thus this is unnecessary.
+Scholar.Ingester.Model.prototype.addStatement = function(uri, rdfUri, literal) {
+ if(!this.data[uri]) this.data[uri] = new Object();
+ this.data[uri][rdfUri] = literal;
+ Scholar.debug(rdfUri+" for "+uri+" is "+literal);
+}
+
+// Additional functions added for compatibility purposes only
+// No idea if any scraper actually uses these, but just in case, they're
+// implemented so as not to throw an exception
+Scholar.Ingester.Model.prototype.addTag = function() {}
+Scholar.Ingester.Model.prototype.getRepository = function() {}
+Scholar.Ingester.Model.prototype.detachRepository = function() {}
+
+/////////////////////////////////////////////////////////////////
+//
+// Scholar.Ingester.Utilities
+//
+/////////////////////////////////////////////////////////////////
+// Scholar.Ingester.Utilities class, a set of methods to assist in data
+// extraction. Most code here was stolen directly from the Piggy Bank project.
+Scholar.Ingester.Utilities = function() {}
+
+// Adapter for Piggy Bank function to print debug messages; log level is
+// fixed at 4 (could change this)
+Scholar.Ingester.Utilities.prototype.debugPrint = function(msg) {
+ Scholar.debug(msg, 4);
+}
+
+// Appears to trim a string, chopping of newlines/spacing
+Scholar.Ingester.Utilities.prototype.trimString = function(s) {
+ var i = 0;
+ var spaceChars = " \n\r\t" + String.fromCharCode(160) /* */;
+ while (i < s.length) {
+ var c = s.charAt(i);
+ if (spaceChars.indexOf(c) < 0) {
+ break;
+ }
+ i++;
+ }
+
+ s = s.substring(i);
+
+ i = s.length;
+ while (i > 0) {
+ var c = s.charAt(i - 1);
+ if (spaceChars.indexOf(c) < 0) {
+ break;
+ }
+ i--;
+ }
+
+ return s.substring(0, i);
+}
+
+// Takes an XPath query and returns the results
+Scholar.Ingester.Utilities.prototype.gatherElementsOnXPath = function(doc, parentNode, xpath, nsResolver) {
+ var elmts = [];
+
+ var iterator = doc.evaluate(xpath, parentNode, nsResolver, XPathResult.ANY_TYPE,null);
+ var elmt = iterator.iterateNext();
+ var i = 0;
+ while (elmt) {
+ elmts[i++] = elmt;
+ elmt = iterator.iterateNext();
+ }
+ return elmts;
+}
+
+// Loads a single document for a scraper, running succeeded() on success or
+// failed() on failure
+Scholar.Ingester.Utilities.prototype.loadDocument = function(url, browser, succeeded, failed) {
+ this.processDocuments(browser, null, [ url ], succeeded, function() {}, failed);
+}
+
+// Downloads and processes documents with processor()
+// browser - a browser object
+// firstDoc - the first document to process with the processor (if null,
+// first document is processed without processor)
+// urls - an array of URLs to load
+// processor - a function to execute to process each document
+// done - a function to execute when all document processing is complete
+// exception - a function to execute if an exception occurs (exceptions are
+// also logged in the Firefox Scholar log)
+Scholar.Ingester.Utilities.prototype.processDocuments = function(browser, firstDoc, urls, processor, done, exception) {
+ try {
+ if (urls.length == 0) {
+ if (firstDoc) {
+ processor(firstDoc, done);
+ } else {
+ done();
+ }
+ return;
+ }
+
+ var urlIndex = -1;
+ var doLoad = function() {
+ urlIndex++;
+ if (urlIndex < urls.length) {
+ try {
+ var url = urls[urlIndex];
+ var b = Scholar.Ingester.progressDialog.document.getElementById("hidden-browser");
+ b.loadURI(url);
+ } catch (e) {
+ exception(e);
+ Scholar.debug("Scholar.Ingester.Utilities.processDocuments doLoad: " + e, 2);
+ }
+ } else {
+ window.setTimeout(done, 10);
+ }
+ };
+ var onLoad = function() {
+ try {
+ var b = Scholar.Ingester.progressDialog.document.getElementById("hidden-browser").selectedBrowser;
+ processor(b.contentDocument, doLoad);
+ } catch (e) {
+ exception(e);
+ Scholar.debug("Scholar.Ingester.Utilities.processDocuments onLoad: " + e, 2);
+ }
+ };
+ var init = function() {
+ var listener;
+ listener.onStateChange = function(webProgress, request, stateFlags, status) {
+ if ((stateFlags & Components.interfaces.nsIWebProgressListener.STATE_STOP) > 0 &&
+ request.name == urls[urlIndex]) {
+ try {
+ Scholar.Ingester.progressDialog.setTimeout(onLoad, 10);
+ } catch (e) {
+ exception(e);
+ Scholar.debug("Scholar.Ingester.Utilities.processDocuments onLocationChange: " + e, 2);
+ }
+ }
+ };
+
+ var tb = Scholar.Ingester.progressDialog.document.getElementById("hidden-browser");
+ tb.addProgressListener(listener, Components.interfaces.nsIWebProgress.NOTIFY_STATUS);
+
+ if (firstDoc) {
+ processor(firstDoc, doLoad);
+ } else {
+ doLoad();
+ }
+ }
+
+ w.addEventListener("load", init, false);
+ } catch (e) {
+ exception(e);
+ PB_Debug.print("processDocuments: " + e);
+ }
+}
+
+// Appears to look for links in a document containing a certain substring
+Scholar.Ingester.Utilities.prototype.collectURLsWithSubstring = function(doc, substring) {
+ var urls = [];
+ var addedURLs = [];
+
+ var aElements = doc.evaluate("//a", doc, null, XPathResult.ANY_TYPE,null);
+ var aElement = aElements.iterateNext();
+ while (aElement) {
+ var href = aElement.href;
+ if (href.indexOf(substring) >= 0 && !(addedURLs[href])) {
+ urls.unshift(href);
+ addedURLs[href] = true;
+ }
+ aElement = aElements.iterateNext();
+ }
+ return urls;
+}
+
+// For now, we're going to skip the getLLsFromAddresses function (which gets
+// latitude and longitude pairs from a series of addresses, but requires the
+// big mess of Java code that is the Piggy Bank server) and the geoHelper
+// tools (which rely on getLLsFromAddresses) since these are probably not
+// essential components for Scholar and would take a great deal of effort to
+// implement. We can, however, always implement them later.
+
+// It looks like these are simple front-ends for XMLHttpRequest. They're a
+// component of the Piggy Bank API, so they're implemented here.
+Scholar.Ingester.Utilities.HTTPUtilities = function() {}
+
+Scholar.Ingester.Utilities.HTTPUtilities.prototype.doGet = function(url, onStatus, onDone) {
+ var xmlhttp = new XMLHttpRequest();
+
+ xmlhttp.open('GET', url, true);
+ xmlhttp.overrideMimeType("text/xml");
+ xmlhttp.onreadystatechange = function() {
+ Scholar.Ingester.Utilities.HTTPUtilities.stateChange(xmlhttp, onStatus, onDone);
+ };
+ xmlhttp.send(null);
+}
+
+Scholar.Ingester.Utilities.HTTPUtilities.prototype.doPost = function(url, body, onStatus, onDone) {
+ var xmlhttp = new XMLHttpRequest();
+
+ xmlhttp.open('POST', url, true);
+ xmlhttp.overrideMimeType("text/xml");
+ xmlhttp.onreadystatechange = function() {
+ Scholar.Ingester.Utilities.HTTPUtilities.stateChange(xmlhttp, onStatus, onDone);
+ };
+ xmlhttp.send(body);
+}
+
+Scholar.Ingester.Utilities.HTTPUtilities.prototype.doOptions = function(url, body, onStatus, onDone) {
+ var xmlhttp = new XMLHttpRequest();
+
+ xmlhttp.open('OPTIONS', url, true);
+ xmlhttp.overrideMimeType("text/xml");
+ xmlhttp.onreadystatechange = function() {
+ Scholar.Ingester.Utilities.HTTPUtilities.stateChange(xmlhttp, onStatus, onDone);
+ };
+ xmlhttp.send(body);
+}
+
+// Possible point of failure; for some reason, this used to be a separate
+// class, so make sure it works
+Scholar.Ingester.Utilities.HTTPUtilities.prototype.stateChange = function(xmlhttp, onStatus, onDone) {
+ switch (xmlhttp.readyState) {
+
+ // Request not yet made
+ case 1:
+ break;
+
+ // Contact established with server but nothing downloaded yet
+ case 2:
+ try {
+ // Check for HTTP status 200
+ if (xmlhttp.status != 200) {
+ if (onStatus) {
+ onStatus(
+ xmlhttp.status,
+ xmlhttp.statusText,
+ xmlhttp
+ );
+ xmlhttp.abort();
+ }
+ }
+ } catch (e) {
+ Scholar.debug(e, 2);
+ }
+ break;
+
+ // Called multiple while downloading in progress
+ case 3:
+ break;
+
+ // Download complete
+ case 4:
+ try {
+ if (onDone) {
+ onDone(xmlhttp.responseText, xmlhttp);
+ }
+ } catch (e) {
+ Scholar.debug(e, 2);
+ }
+ break;
+ }
+}
+//////////////////////////////////////////////////////////////////////////////
+//
+// Scholar.Ingester.Document
+//
+//////////////////////////////////////////////////////////////////////////////
+
+/* Public properties:
+ * browser - browser window object of document
+ * model - data model for semantic scrapers
+ * scraper - best scraper to use to scrape page
+ *
+ * Private properties:
+ * _sandbox - sandbox for code execution
+ * _progressDialog - dialog showing scrape progress
+ */
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Public Scholar.Ingester.Document methods
+//
+//////////////////////////////////////////////////////////////////////////////
+
+/*
+ * Constructor for Document object
+ */
+Scholar.Ingester.Document = function(browserWindow){
+ this.browser = browserWindow;
+ this.scraper = null
+ this.model = new Scholar.Ingester.Model();
+ this._generateSandbox();
+}
+
+/*
+ * Retrieves the best scraper to scrape a given page
+ */
+Scholar.Ingester.Document.prototype.retrieveScraper = function() {
+ Scholar.debug("Retrieving scrapers for "+this.browser.contentDocument.location.href);
+ var sql = 'SELECT * FROM scrapers ORDER BY scraperDetectCode IS NULL DESC';
+ var scrapers = Scholar.DB.query(sql);
+ for(var i=0; i
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/chrome/chromeFiles/skin/default/scholar/capture_colored.png b/chrome/chromeFiles/skin/default/scholar/capture_colored.png
new file mode 100644
index 000000000..fedb86584
Binary files /dev/null and b/chrome/chromeFiles/skin/default/scholar/capture_colored.png differ
diff --git a/chrome/chromeFiles/skin/default/scholar/capture_gray.png b/chrome/chromeFiles/skin/default/scholar/capture_gray.png
new file mode 100644
index 000000000..a100833ed
Binary files /dev/null and b/chrome/chromeFiles/skin/default/scholar/capture_gray.png differ