diff --git a/scrapers.sql b/scrapers.sql index 372e07ab8..32225225b 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -22,7 +22,7 @@ -- Set the following timestamp to the most recent scraper update date -REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-03-31 20:15:00')); +REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-03-31 21:30:00')); REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2008-03-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats and Michael Berkowitz', '^https?://(?:www\.)?amazon', 'function detectWeb(doc, url) { @@ -1579,6 +1579,82 @@ function doWeb(doc, url) { } }'); +REPLACE INTO translators VALUES ('b0abb562-218c-4bf6-af66-c320fdb8ddd3', '1.0.0b4.r5', '', '2008-03-31 21:30:00', '0', '100', '4', 'Philosopher''s Imprint', 'Michael Berkowitz', 'http://quod.lib.umich.edu/cgi/t/', +'function detectWeb(doc, url) { + if (doc.evaluate(''//div/span[text() = "Search Results"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { + return "multiple"; + } else if (url.match(/\d+\.\d+\.\d+/)) { + return "journalArticle"; + } +}', +'function getID(str) { + return str.match(/\d+\.\d+\.\d+/)[0]; +} +function doWeb(doc, url) { + var ids = new Array(); + if (detectWeb(doc, url) == "multiple") { + var items = new Object(); + var titles = doc.evaluate(''//div[@class="itemcitation"]//a'', doc, null, XPathResult.ANY_TYPE, null); + var title; + while (title = titles.iterateNext()) { + items[title.href] = title.textContent; + } + items = Zotero.selectItems(items); + for (var i in items) { + ids.push(''http://quod.lib.umich.edu/cgi/t/text/text-idx?c=phimp;view=text;rgn=main;idno='' + getID(i)); + } + } else { + ids = [''http://quod.lib.umich.edu/cgi/t/text/text-idx?c=phimp;view=text;rgn=main;idno='' + getID(url)]; + } + Zotero.Utilities.processDocuments(ids, function(newDoc) { + var rows = newDoc.evaluate(''//tr[td[@id="labelcell"]]'', newDoc, null, XPathResult.ANY_TYPE, null); + var row; + var data = new Object(); + while (row = rows.iterateNext()) { + var heading = newDoc.evaluate(''./td[1]'', row, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; + var value = newDoc.evaluate(''./td[2]'', row, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; + data[heading.replace(/[\s:]/g, "")] = value; + } + var item = new Zotero.Item("journalArticle"); + item.title = Zotero.Utilities.trimInternal(data[''Title'']); + if (data[''Author'']) { + item.creators.push(Zotero.Utilities.cleanAuthor(data[''Author''], "author")); + } else if (data[''Authors'']) { + var authors = data[''Authors''].split(","); + for each (var a in authors) { + item.creators.push(Zotero.Utilities.cleanAuthor(a, "author")); + } + } + if (data[''Keywords'']) { + var kws = data[''Keywords''].split(/\n/); + for each (var kw in kws) { + if (kw != "") item.tags.push(kw); + } + } + var voliss = data[''Source''].replace(item.title, ""); + if (item.creators.length > 1) { + voliss = voliss.replace(data[''Authors''], ""); + } else if (item.creators.length == 1) { + voliss = voliss.replace(data[''Author''], ""); + } + Zotero.debug(voliss); + item.volume = voliss.match(/vol\.\s+(\d+)/)[1]; + item.issue = voliss.match(/no\.\s+(\d+)/)[1]; + item.pages = voliss.match(/pp\.\s+([\d\-]+)/)[1]; + item.date = Zotero.Utilities.trimInternal(voliss.match(/[^,]+$/)[0]); + item.place = "Ann Arbor, MI"; + item.publisher = "University of Michigan"; + item.abstractNote = data[''Abstract'']; + item.url = data[''URL'']; + item.attachments = [ + {url:item.url, title:item.title + " Snapshot", mimeType:"text/html"}, + {url:''http://quod.lib.umich.edu/p/phimp/images/'' + getID(item.url) + ''.pdf'', title:"Philosopher''s Imprint Full Text PDF", mimeType:"application/pdf"} + ]; + item.complete(); + }, function() {Zotero.done;}); + Zotero.wait(); +}'); + REPLACE INTO translators VALUES ('2a5dc3ed-ee5e-4bfb-baad-36ae007e40ce', '1.0.0b4.r5', '', '2008-03-31 18:30:00', '0', '100', '4', 'Berkeley Electronic Press', 'Michael Berkowitz', 'http://www.bepress.com/', 'function detectWeb(doc, url) { if (url.match("cgi/query.cgi")) {