From e5404f4938eaec2032a727cb9d19a25fa3f43ec4 Mon Sep 17 00:00:00 2001 From: Simon Kornblith Date: Mon, 4 Sep 2006 17:37:07 +0000 Subject: [PATCH] closes #269, For some COinS pages "could not save item" error --- .../content/scholar/xpcom/ingester.js | 2 +- scrapers.sql | 81 +++++++++++++++---- 2 files changed, 68 insertions(+), 15 deletions(-) diff --git a/chrome/chromeFiles/content/scholar/xpcom/ingester.js b/chrome/chromeFiles/content/scholar/xpcom/ingester.js index eb9fb270a..c90b78ade 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/ingester.js +++ b/chrome/chromeFiles/content/scholar/xpcom/ingester.js @@ -415,7 +415,7 @@ Scholar.OpenURL = new function() { item.creators.push({firstName:value}); } } else if(key == "rft.au") { - item.creators.push(Scholar.cleanAuthor(value, "author", true)); + item.creators.push(Scholar.Utilities.prototype.cleanAuthor(value, "author", true)); } else if(key == "rft.aucorp") { item.creators.push({lastName:value, institutional:true}); } else if(key == "rft.isbn" && !item.ISBN) { diff --git a/scrapers.sql b/scrapers.sql index c058cba1b..f8e4669ae 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,4 +1,4 @@ --- 73 +-- 74 -- Set the following timestamp to the most recent scraper update date REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-31 22:44:00')); @@ -2792,7 +2792,7 @@ REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006 }', '// used to retrieve next COinS object when asynchronously parsing COinS objects // on a page -function retrieveNextCOinS(needFullItems, newItems, doc) { +function retrieveNextCOinS(needFullItems, newItems, couldUseFullItems, doc) { if(needFullItems.length) { var item = needFullItems.shift(); @@ -2802,26 +2802,26 @@ function retrieveNextCOinS(needFullItems, newItems, doc) { newItems.push(item); }); search.setHandler("done", function() { - retrieveNextCOinS(needFullItems, newItems, doc); + retrieveNextCOinS(needFullItems, newItems, couldUseFullItems, doc); }); search.setSearch(item); // look for translators var translators = search.getTranslators(); - if(translators) { + if(translators.length) { search.setTranslator(translators); search.translate(); } else { - retrieveNextCOinS(needFullItems, newItems, doc); + retrieveNextCOinS(needFullItems, newItems, couldUseFullItems, doc); } } else { - completeCOinS(newItems, doc); + completeCOinS(newItems, couldUseFullItems, doc); Scholar.done(true); } } // saves all COinS objects -function completeCOinS(newItems, doc) { +function completeCOinS(newItems, couldUseFullItems, doc) { if(newItems.length > 1) { var selectArray = new Array(); @@ -2829,21 +2829,67 @@ function completeCOinS(newItems, doc) { selectArray[i] = newItems[i].title; } selectArray = Scholar.selectItems(selectArray); + + var useIndices = new Array(); for(var i in selectArray) { + useIndices.push(i); + } + completeItems(newItems, useIndices, couldUseFullItems); + } else if(newItems.length) { + completeItems(newItems, [0], couldUseFullItems); + } +} + +function completeItems(newItems, useIndices, couldUseFullItems, doc) { + if(!useIndices.length) { + return; + } + var i = useIndices.shift(); + + // grab full item if requested + if(couldUseFullItems[i]) { + Scholar.Utilities.debug("looking up contextObject"); + var search = Scholar.loadTranslator("search"); + + var firstItem = false; + search.setHandler("itemDone", function(obj, newItem) { + if(!firstItem) { + // add doc as attachment + newItem.attachments.push({document:doc}); + newItem.complete(); + firstItem = true; + } + }); + search.setHandler("done", function(obj) { + // call next + completeItems(newItems, useIndices, couldUseFullItems); + }); + + search.setSearch(newItems[i]); + var translators = search.getTranslators(); + if(translators.length) { + search.setTranslator(translators); + search.translate(); + } else { // add doc as attachment newItems[i].attachments.push({document:doc}); - newItems[i].complete(); + // call next + completeItems(newItems, useIndices, couldUseFullItems); } - } else if(newItems.length) { - newItems[0].attachments.push({document:doc}); - newItems[0].complete(); + } else { + // add doc as attachment + newItems[i].attachments.push({document:doc}); + newItems[i].complete(); + // call next + completeItems(newItems, useIndices, couldUseFullItems); } } function doWeb(doc, url) { var newItems = new Array(); var needFullItems = new Array(); + var couldUseFullItems = new Array(); var spanTags = doc.getElementsByTagName("span"); @@ -2855,7 +2901,13 @@ function doWeb(doc, url) { var spanTitle = spanTags[i].getAttribute("title"); var newItem = new Scholar.Item(); if(Scholar.Utilities.parseContextObject(spanTitle, newItem)) { - if(newItem.title && newItem.creators.length) { + if(newItem.title) { + if(!newItem.creators.length) { + // if we have a title but little other identifying + // information, say we''ll get full item later + couldUseFullItems[newItems.length] = true; + } + // title and creators are minimum data to avoid looking up newItems.push(newItem); } else { @@ -2868,12 +2920,13 @@ function doWeb(doc, url) { } } + Scholar.Utilities.debug(needFullItems); if(needFullItems.length) { // retrieve full items asynchronously Scholar.wait(); - retrieveNextCOinS(needFullItems, newItems, doc); + retrieveNextCOinS(needFullItems, newItems, couldUseFullItems, doc); } else { - completeCOinS(newItems, doc); + completeCOinS(newItems, couldUseFullItems, doc); } }');