From d5989a3d1e367d6b960c3d6504543cd6c3beccd4 Mon Sep 17 00:00:00 2001 From: Matt Burton Date: Thu, 15 Jan 2009 03:34:39 +0000 Subject: [PATCH] Addresses #1242, incorporated santaworts code, still some issues with PDF download --- translators/ACS Publications.js | 173 +++++++++++++++----------------- 1 file changed, 82 insertions(+), 91 deletions(-) diff --git a/translators/ACS Publications.js b/translators/ACS Publications.js index e8b352c84..ac0eb56e0 100644 --- a/translators/ACS Publications.js +++ b/translators/ACS Publications.js @@ -2,13 +2,13 @@ "translatorID":"938ebe32-2b2e-4349-a5b3-b3a05d3de627", "translatorType":4, "label":"ACS Publications", - "creator":"Sean Takats and Michael Berkowitz", + "creator":"Sean Takats and Michael Berkowitz and Santawort", "target":"http://[^/]*pubs3?.acs.org[^/]*/(?:wls/journals/query/(?:subscriberResults|query)\\.html|acs/journals/toc.page|cgi-bin/(?:article|abstract|sample|asap).cgi)?", "minVersion":"1.0.0b3.r1", "maxVersion":"", "priority":100, "inRepository":true, - "lastUpdated":"2008-05-06 08:15:00" + "lastUpdated":"2009-01-14 10:15:00" } function detectWeb(doc, url) { @@ -17,110 +17,101 @@ function detectWeb(doc, url) { if (prefix == 'x') return namespace; else return null; } : null; - if(doc.evaluate('//input[@name="jid"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + if(doc.evaluate('//input[@id="articleListHeader_selectAllToc"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + Zotero.debug("multiple"); return "multiple"; - } else if (doc.evaluate('//jid', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + } else if (doc.evaluate('//div[@id="articleHead"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { return "journalArticle"; - } + } return false; } -function handleRequests(requests, pdfs) { - if(requests.length == 0) { - Zotero.done(); - return; - } - - var request = requests.shift(); - - Zotero.Utilities.HTTP.doGet("http://pubs.acs.org/wls/journals/citation2/Citation?"+request.jid, function() { - Zotero.Utilities.HTTP.doPost("http://pubs.acs.org/wls/journals/citation2/Citation", - "includeAbstract=citation-abstract&format=refmgr&submit=1&mode=GET", function(text) { - // load translator for RIS - var translator = Zotero.loadTranslator("import"); - translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7"); - translator.setString(text); - translator.setHandler("itemDone", function(obj, item) { - var pdf = pdfs.shift(); - if(pdf) { - item.attachments.push({ - title:"ACS Full Text PDF", - url:pdf, mimeType:"application/pdf" - }); - } - if (!item.attachments[0].title) - item.attachments[0].title = "ACS Snapshot"; - item.complete(); - }); - translator.translate(); - - handleRequests(requests); - }); - }); -} - -function doWeb(doc, url) { +function doWeb(doc, url){ var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == 'x') return namespace; else return null; } : null; - - var pdfs = new Array(); - var requests = new Array(); - - if (detectWeb(doc, url) == "multiple") { - // search page - var items = new Array(); - if (doc.evaluate('//form[@name="citationSelect"]//tbody/tr[1]//span[@class="textbold"][1]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { - var titles = doc.evaluate('//form[@name="citationSelect"]//tbody/tr[1]//span[@class="textbold"][1]', doc, nsResolver, XPathResult.ANY_TYPE, null); - } else if (doc.evaluate('//form/div[@class="artBox"]/div[@class="artBody"]/div[@class="artTitle"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { - var titles = doc.evaluate('//form/div[@class="artBox"]/div[@class="artBody"]/div[@class="artTitle"]', doc, nsResolver, XPathResult.ANY_TYPE, null); - } - if (doc.evaluate('//form[@name="citationSelect"]//input[@name="jid"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { - var jids = doc.evaluate('//form[@name="citationSelect"]//input[@name="jid"]', doc, nsResolver, XPathResult.ANY_TYPE, null); - } else if (doc.evaluate('//div[@id="content"]/form/div[@class="artBox"]/div[@class="artHeadBox"]/div[@class="artHeader"]/input', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { - var jids = doc.evaluate('//div[@id="content"]/form/div[@class="artBox"]/div[@class="artHeadBox"]/div[@class="artHeader"]/input', doc, nsResolver, XPathResult.ANY_TYPE, null); - } - var links = doc.evaluate('//form[@name="citationSelect"]//tbody/tr[2]//a[@class="link"]', doc, nsResolver, XPathResult.ANY_TYPE, null); + var host = 'http://' + doc.location.host + "/"; + //Zotero.debug(host); + var m = url.match(/https?:\/\/[^\/]*\/doi\/(abs|full)\/(.+)/); + var dois = new Array(); + if(detectWeb(doc, url) == "multiple") { //search + var doi; var title; - var jid; - var id; - var link; - while ((title = titles.iterateNext()) && (jid = jids.iterateNext())){ - id = jid.value - items[id] = Zotero.Utilities.trimInternal(title.textContent); - - var link = doc.evaluate('../../..//a[contains(text(), "PDF")]', title, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); - if(link) { - links[id] = link.href.replace("searchRedirect.cgi", "article.cgi"); + var availableItems = new Array(); + var xpath = '//div[@class="articleBox"]'; + if (doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null); + var elmt = elmts.iterateNext(); + do { + title = doc.evaluate('./div[@class="articleBoxMeta"]/h2', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; + doi = doc.evaluate('./div[@class="articleBoxMeta"]/h2/a/@href', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace("/doi/abs/",""); + if (doi.indexOf("prevSearch") != -1){ + doi = doi.substring(0,doi.indexOf("?")); } + availableItems[doi] = title; + } while (elmt = elmts.iterateNext()) + } + var items = Zotero.selectItems(availableItems); + if(!items) { + return true; } - - items = Zotero.selectItems(items); - if(!items) return true; - - var getstring = ""; for(var i in items) { - getstring = getstring + "jid=" + encodeURIComponent(i) + "&"; - pdfs.push(links[i]+"?sessid="); + dois.push(i); } - requests.push({jid:getstring}); - } else { - // single page - var jid = doc.evaluate('//jid', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; - jid = jid.substr(jid.indexOf("/")+1); - var pdf = doc.evaluate('/html/body//a[contains(text(), "PDF")]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); - if (!pdf) { - var pdf = doc.evaluate('/html/body//a[contains(@href, "/pdf/")]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + } else if (m){ //single article + var doi = m[2]; + if (doi.match("prevSearch")) { + doi = doi.substring(0,doi.indexOf("?")); } - if (pdf) { - pdf = pdf.href; - pdf = pdf.replace("searchRedirect.cgi", "article.cgi"); - pdfs.push(pdf+"?sessid="); - } - var requests = [{jid:"jid=" + encodeURIComponent(jid)}]; + Zotero.debug("DOI= "+doi); + dois.push(doi); } - handleRequests(requests, pdfs); - + + var setupSets = []; + for each (doi in dois) { + var citUrl = host + 'action/showCitFormats?doi=' + doi; + setupSets.push({ doi: doi, citUrl: citUrl }); + } + + var setupCallback = function () { + //get citation export page's source code; + if (setupSets.length) { + var set = setupSets.shift(); + Zotero.Utilities.HTTP.doGet(set.citUrl, function(text){ + //get the exported RIS file name; + var downloadFileName = text.match(/name=\"downloadFileName\" value=\"([A-Za-z0-9_]+)\"/)[1]; + Zotero.debug("downloadfilename= "+downloadFileName); + processCallback(set.doi,downloadFileName); + }); + } + else { + Zotero.done(); + } + } + var processCallback = function (doi,downloadFileName) { + var baseurl = "http://pubs.acs.org/action/downloadCitation"; + var post = "doi=" + doi + "&downloadFileName=" + downloadFileName + "&include=abs&format=refman&direct=on&submit=Download+article+citation+data"; + Zotero.Utilities.HTTP.doPost(baseurl, post,function(text){ + // Fix the RIS doi mapping + text = text.replace("N1 - doi:","M3 - "); + Zotero.debug("ris= "+ text); + var translator = Zotero.loadTranslator("import"); + translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7"); + translator.setString(text); + translator.setHandler("itemDone", function(obj, item) { + var pdfUrl = host + 'doi/pdf/' + doi; + var fullTextUrl = host + 'doi/full/' + doi; + item.attachments.push( + {title:"ACS Full Text PDF",url:pdfUrl, mimeType:"application/pdf"}, + {title:"ACS Full Text Snapshot",url:fullTextUrl, mimeType:"text/html"} + ); + item.complete(); + }); + translator.translate(); + setupCallback(); + }); + } + setupCallback(); Zotero.wait(); } \ No newline at end of file