From f07cb5a5bcfc9664fd29c309a565ee8d2011e852 Mon Sep 17 00:00:00 2001 From: Simon Kornblith Date: Sat, 26 Aug 2006 03:50:15 +0000 Subject: [PATCH] adds an InfoTrac OneFile translator fixes a bug in ingester progress window handling --- .../content/scholar/ingester/browser.js | 4 +- scrapers.sql | 151 +++++++++++++++++- 2 files changed, 146 insertions(+), 9 deletions(-) diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.js b/chrome/chromeFiles/content/scholar/ingester/browser.js index 354a66bf1..5c5aca447 100644 --- a/chrome/chromeFiles/content/scholar/ingester/browser.js +++ b/chrome/chromeFiles/content/scholar/ingester/browser.js @@ -421,7 +421,9 @@ Scholar_Ingester_Interface.Progress = new function() { } function fade() { - setTimeout(_timeout, 2500); + if(_windowLoaded || _windowLoading) { + setTimeout(_timeout, 2500); + } } function kill() { diff --git a/scrapers.sql b/scrapers.sql index 6d7a6085b..b0ece078a 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,4 +1,4 @@ --- 55 +-- 56 -- Set the following timestamp to the most recent scraper update date REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-15 15:42:00')); @@ -1285,7 +1285,7 @@ function doWeb(doc, url) { // Require link to match this var tagRegexp = new RegExp(); - tagRegexp.compile(''^http://[^/]+/pqdweb\\?((?:.*&)?did=.*&Fmt=[12][^0-9]|(?:.*&)Fmt=[12][^0-9].*&did=)''); + tagRegexp.compile(''^http://[^/]+/pqdweb\\?((?:.*&)?did=.*&Fmt=[12](?:[^0-9]|$)|(?:.*&)Fmt=[12][^0-9].*&did=)''); var tableRows = doc.evaluate(''//tr[@class="rowUnMarked"]'', doc, nsResolver, XPathResult.ANY_TYPE, null); @@ -1476,6 +1476,125 @@ function doWeb(doc, url) { } }'); +REPLACE INTO "translators" VALUES ('63c25c45-6257-4985-9169-35b785a2995e', '2006-08-24 14:11:00', 4, 'InfoTrac OneFile', 'Simon Kornblith', '^https?://[^/]+/itx/(?:[a-z]+Search|retrieve|paginate|tab)\.do', +'function detectWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + if(doc.evaluate(''//img[@alt="Thomson Gale"]'', doc, nsResolver, + XPathResult.ANY_TYPE, null).iterateNext()) { + if(doc.evaluate(''//table[@class="resultstable"][tbody/tr[@class="unselectedRow"]]'', + doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + return "multiple"; + } else { + return "journalArticle"; + } + } +}', +'function infoTracRIS(text) { + // load translator for RIS + var translator = Scholar.loadTranslator("import"); + translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7"); + translator.setString(text); + translator.setHandler("itemDone", function(obj, item) { + if(item.notes && item.notes[0]) { + item.extra = item.notes[0].note; + + delete item.notes; + item.notes = undefined; + } + + // get underscored terms (term headings?) out of tags + for(var i in item.tags) { + var index = item.tags[i].indexOf("_"); + if(index != -1) { + item.tags[i] = item.tags[i].substr(0, index); + } + } + + // add names to attachments + for(var i in item.attachments) { + if(!item.attachments[i].title) { + item.attachments[i] = undefined; + } else { + item.attachments[i].title = "InfoTrac OneFile "+item.attachments[i].title; + } + } + + //item.attachments = newAttachments.shift(); + //Scholar.Utilities.debug(item.attachments); + item.complete(); + }); + translator.translate(); + Scholar.done(); +} + +function readEncoded(url) { + var newArray = new Array(); + + var parts = url.split(/[?&]/); + for each(var part in parts) { + var index = part.indexOf("="); + if(index !== -1) { + newArray[part.substr(0, index)] = part.substr(index+1); + } + } + + return newArray; +} + +function doWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var hostRe = new RegExp("^https?://[^/]+/"); + var host = hostRe.exec(doc.location.href)[0]; + + if(doc.evaluate(''//table[@class="resultstable"][tbody/tr[@class="unselectedRow"]]'', + doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/itx/retrieve\\.do\\?.*docId=''); + items = Scholar.selectItems(items); + + if(!items) { + return true; + } + + // parse things out of URLs + var time = new Date(); + time = time.getTime(); + var markedString = ""; + for(var i in items) { + var postVal = readEncoded(i); + markedString += postVal.tabID+"_"+postVal.docId+"_1_0_"+postVal.contentSet+"_srcprod="+postVal.prodId+"|^"; + } + + var postData = "inPS=true&ts="+time+"&prodId="+postVal.prodId+"&actionCmd=UPDATE_MARK_LIST&userGroupName="+postVal.userGroupName+"&markedString="+markedString+"&a="+time; + Scholar.Utilities.HTTP.doGet(host+"itx/marklist.do?inPS=true&ts="+time+"&prodId="+postVal.prodId+"&actionCmd=CLEAR_MARK_LIST&userGroupName="+postVal.userGroupName, + function(text) { // clear marked + Scholar.Utilities.HTTP.doPost(host+"itx/marklist.do", postData, + function(text) { // mark + Scholar.Utilities.HTTP.doGet(host+"itx/generateCitation.do?contentSet="+postVal.contentSet+"&inPS=true&tabID=T-ALL&prodId="+postVal.prodId+"&docId=&actionString=FormatCitation&userGroupName="+postVal.userGroupName+"&citationFormat=ENDNOTE", + function(text) { // get marked + infoTracRIS(text); + }); + }); + }); + } else { + // just extract from single page + var postVal = readEncoded(url); + Scholar.Utilities.HTTP.doGet(host+"itx/generateCitation.do?contentSet="+postVal.contentSet+"&inPS=true&tabID="+postVal.tabID+"&prodId="+postVal.prodId+"&docId="+postVal.docId+"&actionString=FormatCitation&citationFormat=ENDNOTE", + function(text) { + infoTracRIS(text); + }); + } + + Scholar.wait(); +}'); + REPLACE INTO "translators" VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-26 16:01:00', 4, 'LexisNexis', 'Simon Kornblith', '^http://web\.lexis-?nexis\.com/universe/(?:document|doclist)', 'function detectWeb(doc, url) { var detailRe = new RegExp("^http://[^/]+/universe/document"); @@ -1910,7 +2029,7 @@ REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006 uris.push(i); } } else { - var uris = new Array(doc.location.href); + var ug = new Array(doc.location.href); } for(var i in uris) { @@ -4736,10 +4855,10 @@ Scholar.addOption("exportNotes", true); function detectImport() { var line; while(line = Scholar.read()) { - if(line.replace(/\s/g, "") != "") { + line = line.replace(/^\s+/, ""); + if(line != "") { if(line.substr(0, 6) == "TY - ") { return true; - Scholar.Utilities.debug("YES!"); } else { return false; } @@ -4884,9 +5003,24 @@ function processTag(item, tag, value) { if(!item.ISSN) { item.ISSN = value; } - } else if(tag == "UR") { + } else if(tag == "UR" || tag == "L1" || tag == "L2" || tag == "L4") { // URL - item.url = value; + if(!item.url) { + item.url = value; + } + + if(tag == "UR") { + item.attachments.push({url:value}); + } else if(tag == "L1") { + item.attachments.push({url:value, mimeType:"application/pdf", + title:"Full Text (PDF)", downloadable:true}); + } else if(tag == "L2") { + item.attachments.push({url:value, mimeType:"text/html", + title:"Full Text (HTML)", downloadable:true}); + } else if(tag == "L4") { + item.attachments.push({url:value, + title:"Image", downloadable:true}); + } } } @@ -4898,7 +5032,7 @@ function doImport(attachments) { do { // first valid line is type Scholar.Utilities.debug("ignoring "+line); line = Scholar.read(); - Scholar.Utilities.debug(line); + line = line.replace(/^\s+/, ""); } while(line !== false && line.substr(0, 6) != "TY - "); var item = new Scholar.Item(); @@ -4910,6 +5044,7 @@ function doImport(attachments) { var tag = "TY"; var data = line.substr(6); while((line = Scholar.read()) !== false) { // until EOF + line = line.replace(/^\s+/, ""); if(line.substr(2, 4) == " - ") { // if this line is a tag, take a look at the previous line to map // its tag