From 7b0d3c89ee19c7b03e866e09ae0e76f259823e9e Mon Sep 17 00:00:00 2001 From: Sean Takats Date: Thu, 15 Mar 2007 18:49:46 +0000 Subject: [PATCH] ScientificCommons translator added --- scrapers.sql | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 2 deletions(-) diff --git a/scrapers.sql b/scrapers.sql index 6df01a454..69888fc48 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,4 +1,4 @@ --- 176 +-- 177 -- ***** BEGIN LICENSE BLOCK ***** -- @@ -22,7 +22,7 @@ -- Set the following timestamp to the most recent scraper update date -REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-03-08 22:10:00')); +REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-03-15 22:10:00')); REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b3.r1', '', '2006-12-15 03:40:00', 1, 100, 4, 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon', 'function detectWeb(doc, url) { @@ -4550,6 +4550,82 @@ function doWeb(doc, url) { Zotero.wait(); }'); +REPLACE INTO translators VALUES ('19643c25-a4b2-480d-91b7-4e0b761fb6ad', '1.0.0b3.r1', '', '2007-03-15 22:10:00', '1', '100', '4', 'ScientificCommons', 'Sean Takats', '^http://(?:en|de|www)\.scientificcommons\.org', +'function detectWeb(doc, url) { + var articleRe = /^http:\/\/(?:www|en|de)\.scientificcommons\.org\/([0-9]+)/; + var m = articleRe.exec(url); + + if(m) { + return "journalArticle"; + } else { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + var listElt = doc.evaluate(''//div[@class="content_element"]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (listElt) { + return "multiple"; + } + } + return false; +}', +'function doWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var hostRe = new RegExp("^(http://[^/]+)/"); + var m = hostRe.exec(url); + var host = m[1]; + + var articleRe = /^http:\/\/(?:www|en|de)\.scientificcommons\.org\/([0-9]+)/; + m = articleRe.exec(url); + var uris = new Array(); + + if(m) { + var idElt = doc.evaluate(''//div[@id="publication_id"]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (idElt) { + uris.push(host + "/export/ris/" + idElt.textContent); + } else { + return false; + } + } else { + var items = new Array(); + var listElts = doc.evaluate(''//div[@class="content_element"]'', doc, nsResolver, XPathResult.ANY_TYPE, null); + var id; + var link; + var title; + while (listElt = listElts.iterateNext()) { + id = doc.evaluate(''./@id'', listElt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue; + link = host + "/export/ris/" + id; + title = doc.evaluate(''.//p[@class="title"]'', listElt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; + items[link] = Zotero.Utilities.cleanString(title); + } + + items = Zotero.selectItems(items); + if(!items) return true; + + for(var uri in items) { + uris.push(uri); + } + } + + Zotero.Utilities.HTTP.doGet(uris, function(text) { + // load translator for RIS + var translator = Zotero.loadTranslator("import"); + translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7"); + translator.setString(text); + translator.setHandler("itemDone", function(obj, item) { + // add attachment support? + item.complete(); + }); + translator.translate(); + Zotero.done(); + }); + Zotero.wait(); +}'); + REPLACE INTO translators VALUES ('cde4428-5434-437f-9cd9-2281d14dbf9', '1.0.0b3.r1', '', '2006-12-15 22:19:00', 1, 100, 4, 'Ovid', 'Simon Kornblith', '/gw1/ovidweb\.cgi', 'function detectWeb(doc, url) { var namespace = doc.documentElement.namespaceURI;