From 3890e5f1228fd88cd2fb315e30232f88f2c84a32 Mon Sep 17 00:00:00 2001 From: Simon Kornblith Date: Thu, 22 Jun 2006 15:50:46 +0000 Subject: [PATCH] - Made ingester automatically create hidden browser objects, given a window object. This should make things much easier for both David and me. - Multiple item detection code is now a part of the scraperJavaScript, rather than the scrapeDetectCode, and code to choose which items to add is part of Scholar.Ingester.Utilities, accessible from inside scrapers. The alternative approach would result in one request (or, in the case of JSTOR, three requests) per new item, while in some cases (e.g. Voyager) only one request is necessary to get all of the items. --- .../content/scholar/ingester/browser.js | 27 ++--- .../content/scholar/ingester/browser.xul | 6 - .../content/scholar/ingester/selectitems.js | 12 +- .../content/scholar/xpcom/ingester.js | 108 ++++++++++++++---- scrapers.sql | 51 ++------- 5 files changed, 109 insertions(+), 95 deletions(-) diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.js b/chrome/chromeFiles/content/scholar/ingester/browser.js index 8813c3582..9f28a83b2 100644 --- a/chrome/chromeFiles/content/scholar/ingester/browser.js +++ b/chrome/chromeFiles/content/scholar/ingester/browser.js @@ -35,7 +35,6 @@ Scholar_Ingester_Interface.init = function() { */ Scholar_Ingester_Interface.chromeLoad = function() { Scholar_Ingester_Interface.tabBrowser = document.getElementById("content"); - Scholar_Ingester_Interface.hiddenBrowser = document.getElementById("scholar-hidden-browser"); Scholar_Ingester_Interface.appContent = document.getElementById("appcontent"); Scholar_Ingester_Interface.statusImage = document.getElementById("scholar-status-image"); @@ -61,21 +60,11 @@ Scholar_Ingester_Interface.chromeUnload = function() { Scholar_Ingester_Interface.scrapeThisPage = function() { var documentObject = Scholar_Ingester_Interface._getDocument(Scholar_Ingester_Interface.tabBrowser.selectedBrowser); if(documentObject.scraper) { - if(documentObject.scrapeURLList) { - // In the case that there are multiple scrapable URLs, make the user choose - Scholar_Ingester_Interface.chooseURL(documentObject); - } Scholar_Ingester_Interface.scrapeProgress = new Scholar_Ingester_Interface.Progress(window, Scholar_Ingester_Interface.tabBrowser.selectedBrowser.contentDocument, Scholar.getString("ingester.scraping")); documentObject.scrapePage(Scholar_Ingester_Interface._finishScraping); } } -Scholar_Ingester_Interface.chooseURL = function(documentObject) { - Scholar.debug("chooseURL called"); - var newDialog = window.openDialog("chrome://scholar/content/ingester/selectitems.xul", - "_blank","chrome,modal,centerscreen,resizable=yes", documentObject); -} - /* * Updates the status of the capture icon to reflect the scrapability or lack * thereof of the current page @@ -182,7 +171,7 @@ Scholar_Ingester_Interface._setDocument = function(browser) { browser.setAttribute("scholar-key", key); } } - Scholar_Ingester_Interface.browserDocuments[key] = new Scholar.Ingester.Document(browser, Scholar_Ingester_Interface.hiddenBrowser); + Scholar_Ingester_Interface.browserDocuments[key] = new Scholar.Ingester.Document(browser, window); Scholar_Ingester_Interface.browserDocuments[key].retrieveScraper(); } @@ -203,7 +192,7 @@ Scholar_Ingester_Interface._deleteDocument = function(browser) { /* * Callback to be executed when scraping is complete */ -Scholar_Ingester_Interface._finishScraping = function(obj) { +Scholar_Ingester_Interface._finishScraping = function(obj, returnValue) { if(obj.items.length) { try { // Encased in a try block to fix a as-of-yet unresolved issue var item1 = obj.items[0]; @@ -243,12 +232,14 @@ Scholar_Ingester_Interface._finishScraping = function(obj) { for(i in obj.items) { obj.items[i].save(); } + setTimeout(function() { Scholar_Ingester_Interface.scrapeProgress.fade() }, 2000); + } else if(returnValue) { + Scholar_Ingester_Interface.scrapeProgress.kill(); } else { Scholar_Ingester_Interface.scrapeProgress.changeHeadline(Scholar.getString("ingester.scrapeError")); Scholar_Ingester_Interface.scrapeProgress.addDescription(Scholar.getString("ingester.scrapeErrorDescription")); + setTimeout(function() { Scholar_Ingester_Interface.scrapeProgress.fade() }, 2000); } - - setTimeout(function() { Scholar_Ingester_Interface.scrapeProgress.fade() }, 2000); } ////////////////////////////////////////////////////////////////////////////// @@ -333,7 +324,6 @@ Scholar_Ingester_Interface.Progress.prototype.addDescription = function(descript this.table.appendChild(tr); } - Scholar_Ingester_Interface.Progress.prototype.fade = function() { // Icky, icky hack to keep objects var me = this; @@ -349,3 +339,8 @@ Scholar_Ingester_Interface.Progress.prototype.fade = function() { // Begin fade this._fader(); } + +Scholar_Ingester_Interface.Progress.prototype.kill = function() { + this.div.style.display = 'none'; +} + diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.xul b/chrome/chromeFiles/content/scholar/ingester/browser.xul index b9b230b1e..ab025cc5a 100755 --- a/chrome/chromeFiles/content/scholar/ingester/browser.xul +++ b/chrome/chromeFiles/content/scholar/ingester/browser.xul @@ -12,10 +12,4 @@ - - - - - - diff --git a/chrome/chromeFiles/content/scholar/ingester/selectitems.js b/chrome/chromeFiles/content/scholar/ingester/selectitems.js index 4f78d6389..e09f30b3e 100644 --- a/chrome/chromeFiles/content/scholar/ingester/selectitems.js +++ b/chrome/chromeFiles/content/scholar/ingester/selectitems.js @@ -19,26 +19,26 @@ Scholar_Ingester_Interface_SelectItems = function() {} * loading */ Scholar_Ingester_Interface_SelectItems.init = function() { - this.documentObject = window.arguments[0]; + this.io = window.arguments[0]; + this.Scholar_Ingester_Interface = window.arguments[1]; this.listbox = document.getElementById("scholar-selectitems-links"); - for(i in this.documentObject.scrapeURLList) { // we could use a tree for this if we wanted to + for(i in this.io.dataIn) { // we could use a tree for this if we wanted to var itemNode = document.createElement("listitem"); itemNode.setAttribute("type", "checkbox"); itemNode.setAttribute("value", i); - itemNode.setAttribute("label", this.documentObject.scrapeURLList[i]); + itemNode.setAttribute("label", this.io.dataIn[i]); itemNode.setAttribute("checked", false); this.listbox.appendChild(itemNode); } } Scholar_Ingester_Interface_SelectItems.acceptSelection = function() { - // clear scrapeURLList - this.documentObject.scrapeURLList = new Object(); + this.io.dataOut = new Object(); // collect scrapeURLList from listbox for(var i=0; i