diff --git a/scrapers.sql b/scrapers.sql index 0b0f900d7..dd7102dfc 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -22,7 +22,7 @@ -- Set the following timestamp to the most recent scraper update date -REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-05-14 14:30:00')); +REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-05-14 15:00:00')); REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2008-03-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats and Michael Berkowitz', '^https?://(?:www\.)?amazon', 'function detectWeb(doc, url) { @@ -4987,24 +4987,36 @@ REPLACE INTO translators VALUES ('636c8ea6-2af7-4488-8ccd-ea280e4a7a98', '1.0.0b Zotero.wait(); }'); -REPLACE INTO translators VALUES ('3eabecf9-663a-4774-a3e6-0790d2732eed', '1.0.0b4.r5', '', '2008-04-28 17:50:00', '0', '100', '4', 'SciELO', 'Michael Berkowitz', 'http://www.scielo.(org|br)/', +REPLACE INTO translators VALUES ('3eabecf9-663a-4774-a3e6-0790d2732eed', '1.0.0b4.r5', '', '2008-05-14 15:00:00', '1', '100', '4', 'SciELO', 'Michael Berkowitz', 'http://(www.)?scielo.(org|br)/', 'function detectWeb(doc, url) { if (url.indexOf("wxis.exe") != -1) { if (doc.evaluate(''//*[@class="isoref"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { return "multiple"; } + } else if (url.indexOf("issuetoc") != -1) { + return "multiple" } else if (url.indexOf("&pid=") != -1) { return "journalArticle"; } else { Zotero.debug("ok"); } }', -'function doWeb(doc, url) { +'function makeURL(host, str) { + return ''http://'' + host + ''/scieloOrg/php/articleXML.php?pid='' + str.match(/pid=([^&]+)/)[1] + ''&lang=en''; +} + +function doWeb(doc, url) { + var host = doc.location.host; var arts = new Array(); if (detectWeb(doc, url) == "multiple") { var items = new Object(); - var titlepath = ''//font[@class="isoref"]/font[@class="negrito"]''; - var linkpath = ''//font[@class="isoref"]/a[@class="isoref"]''; + if (url.indexOf(".exe") != -1) { + var titlepath = ''//font[@class="isoref"]/font[@class="negrito"]''; + var linkpath = ''//font[@class="isoref"]/a[@class="isoref"]''; + } else { + var titlepath = ''//font[@class="normal"]/b/b[1]''; + var linkpath = ''//tr/td/div/a[1]''; + } var titles = doc.evaluate(titlepath, doc, null, XPathResult.ANY_TYPE, null); var links = doc.evaluate(linkpath, doc, null, XPathResult.ANY_TYPE, null); var next_title; @@ -5014,69 +5026,59 @@ REPLACE INTO translators VALUES ('3eabecf9-663a-4774-a3e6-0790d2732eed', '1.0.0b } items = Zotero.selectItems(items); for (var i in items) { - arts.push(i); + arts.push(makeURL(host, i)); } } else { - arts = [url]; + arts = [makeURL(host, url)]; } - for each (var url in arts) { - Zotero.debug(url); - Zotero.Utilities.HTTP.doGet(url, function(text) { - var link = text.match(/\"([^"]+articleXML[^"]+)\"/)[1]; - Zotero.debug(link); - - Zotero.Utilities.HTTP.doGet(link, function(text) { - var item = new Zotero.Item("journalArticle"); - - text = text.replace(/]*>/, "").replace(/<\?xml[^>]*\?>/, "").replace(//g, ""); - var journal = text.split("")[1].split("")[0]; - journal = "" + journal + ""; - journal = journal.replace(/\-([a-z])/g, "$1"); - var xml2 = new XML(journal); - var art = text.split("")[1].split("")[0]; - art = "
" + art + "
"; - art = art.replace(/\-([a-z])/g, "$1"); - var xml3 = new XML(art); - - item.publicationTitle = xml2..journaltitle.text().toString(); - item.journalAbbreviation = xml2..abbrevjournaltitle.text().toString(); - item.ISSN = xml2..issn.text().toString(); - item.publisher = xml2..publisher..publishername.text().toString(); - - item.title = xml3..titlegroup..articletitle.text().toString(); - for (var i = 0 ; i < xml3..contribgroup..contrib.length() ; i++) { - var name = xml3..contribgroup..contrib[i]..name; - item.creators.push({firstName:name..givennames.text().toString(), lastName:name..surname.text().toString(), creatorType:"author"}); - } - - var date = xml3..pubdate[0]; - var day = date..day.text().toString(); - var month = date..month.text().toString(); - var year = date..year.text().toString(); - - date = year; - if (month != "00") { - date = month + "/" + date; - } - if (day != "00") { - date = day + "/" + date; - } - item.date = date; - item.volume = xml3..volume.text().toString(); - item.pages = xml3..fpage.text().toString() + "-" + xml3..lpage.text().toString(); - - for (var i = 0 ; i < xml3..kwdgroup..kwd.length() ; i++) { - item.tags.push(xml3..kwdgroup..kwd[i].text().toString()); - } - - item.attachments = [ - {url:url, title:"SciELO Snapshot", mimeType:"text/html"} - ]; + Zotero.Utilities.HTTP.doGet(arts, function(text) { + var item = new Zotero.Item("journalArticle"); + text = text.replace(/]*>/, "").replace(/<\?xml[^>]*\?>/, "").replace(//g, ""); + var journal = text.split("")[1].split("")[0]; + journal = "" + journal + ""; + journal = journal.replace(/\-([a-z])/g, "$1"); + var xml2 = new XML(journal); + var art = text.split("")[1].split("")[0]; + art = "
" + art + "
"; + art = art.replace(/\-([a-z])/g, "$1"); + var xml3 = new XML(art); - item.complete(); - }); - }); - } + item.publicationTitle = xml2..journaltitle.text().toString(); + item.journalAbbreviation = xml2..abbrevjournaltitle.text().toString(); + item.ISSN = xml2..issn.text().toString(); + item.publisher = xml2..publisher..publishername.text().toString(); + item.title = xml3..titlegroup..articletitle.text().toString(); + for (var i = 0 ; i < xml3..contribgroup..contrib.length() ; i++) { + var name = xml3..contribgroup..contrib[i]..name; + item.creators.push({firstName:name..givennames.text().toString(), lastName:name..surname.text().toString(), creatorType:"author"}); + } + + var date = xml3..pubdate[0]; + var day = date..day.text().toString(); + var month = date..month.text().toString(); + var year = date..year.text().toString(); + + date = year; + if (month != "00") { + date = month + "/" + date; + } + if (day != "00") { + date = day + "/" + date; + } + item.date = date; + item.volume = xml3..volume.text().toString(); + item.pages = xml3..fpage.text().toString() + "-" + xml3..lpage.text().toString(); + + for (var i = 0 ; i < xml3..kwdgroup..kwd.length() ; i++) { + item.tags.push(xml3..kwdgroup..kwd[i].text().toString()); + } + + item.attachments = [ + {url:url, title:"SciELO Snapshot", mimeType:"text/html"} + ]; + + item.complete(); + }); }'); REPLACE INTO translators VALUES ('0a84a653-79ea-4c6a-8a68-da933e3b504a', '1.0.0b4.r5', '', '2008-03-28 16:30:00', '0', '100', '4', 'Alexander Street Press', 'John West and Michael Berkowitz', 'http://(?:www\.)alexanderstreet',