From c0812a8f0a3f84cfcbc8516e57c112d86df3999b Mon Sep 17 00:00:00 2001 From: Michael Berkowitz Date: Thu, 29 May 2008 17:24:08 +0000 Subject: [PATCH] -Adds Tim Sherratt's Australian Dictionary of Biography translator. --- scrapers.sql | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/scrapers.sql b/scrapers.sql index 486a46db7..64839ff74 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -22,7 +22,7 @@ -- Set the following timestamp to the most recent scraper update date -REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-05-29 17:00:00')); +REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-05-29 17:30:00')); REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2008-03-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats and Michael Berkowitz', '^https?://(?:www\.)?amazon', 'function detectWeb(doc, url) { @@ -1089,6 +1089,72 @@ REPLACE INTO translators VALUES ('88915634-1af6-c134-0171-56fd198235ed', '1.0.0b Zotero.wait(); }'); +REPLACE INTO translators VALUES ('0aea3026-a246-4201-a4b5-265f75b9a6a7', '1.0.0b4.r5', '', '2008-05-29 17:30:00', '0', '100', '4', 'Australian Dictionary of Biography', 'Tim Sherratt and Michael Berkowitz', 'http://www.adb.online.anu.edu.au', +'function detectWeb(doc, url) { + if (url.match(/adbp-ent_search|browse_people|browse_authors/)) { + return "multiple"; + } else if (url.match(/biogs\/AS*\d+b.htm/)) { + return "bookSection"; + } +}', +'function doWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == "x") return namespace; else return null; + } : null; + if (detectWeb(doc, url) == "multiple") { + var records = new Array(); + var items = new Object(); + if (url.match(/browse_people/)) { + var titles = doc.evaluate(''//ul[@class="pb-results"]/li'', doc, nsResolver, XPathResult.ANY_TYPE, null); + var links = doc.evaluate(''//ul[@class="pb-results"]/li/a[1]'', doc, nsResolver, XPathResult.ANY_TYPE, null); + } else if (url.match(/browse_authors/)) { + var titles = doc.evaluate(''//div[@id="content"]/dl/dd'', doc, nsResolver, XPathResult.ANY_TYPE, null); + var links = doc.evaluate(''//div[@id="content"]/dl/dd/a[1]'', doc, nsResolver, XPathResult.ANY_TYPE, null); + } else if (url.match(/adbp-ent_search/)) { + var titles = doc.evaluate(''//div[@id="content"]/ol/li'', doc, nsResolver, XPathResult.ANY_TYPE, null); + var links = doc.evaluate(''//div[@id="content"]/ol/li//a[1]'', doc, nsResolver, XPathResult.ANY_TYPE, null); + } + var title; + var link; + while ((link = links.iterateNext()) && (title = titles.iterateNext())) { + items[link.href] = Zotero.Utilities.trimInternal(title.textContent); + } + + items = Zotero.selectItems(items); + for (var i in items) { + records.push(i); + } + } else { + records = [url]; + } + Zotero.Utilities.processDocuments(records, function(doc) { + var item = new Zotero.Item("bookSection"); + var author = Zotero.Utilities.cleanString(doc.evaluate(''//div[@id="content"]/p[strong="Author"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent); + item.creators.push(Zotero.Utilities.cleanAuthor(author, "author")); + item.title = Zotero.Utilities.cleanString(doc.evaluate(''//h1'', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent); + var pubdetails = Zotero.Utilities.cleanString(doc.evaluate(''//div[@id="content"]/p[strong="Print Publication Details"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent); + pubdetails = pubdetails.match(/Volume (\d+), ([\w ]+), (\d{4}), p+\.*\s+([\d-]+)/); + item.volume = RegExp.$1; + item.publisher = RegExp.$2; + item.date = RegExp.$3; + item.pages = RegExp.$4; + item.url = doc.location.href; + item.bookTitle = "Australian Dictionary of Biography"; + item.place = "Melbourne"; + item.repository = "Australian Dictionary of Biography"; + var tags = doc.evaluate(''//li/a[starts-with(@title, "find people with the occupation")]'', doc, nsResolver, XPathResult.ANY_TYPE, null); + while (tag = tags.iterateNext()) { + item.tags.push(tag.textContent); + } + item.attachments = [ + {url:item.url, title: "Snapshot - " + item.title, mimeType:"text/html"}, + ]; + item.complete(); + + }, function() {Zotero.done;}); +}'); + REPLACE INTO translators VALUES ('83538f48-906f-40ef-bdb3-e94f63676307', '1.0.0b4.r5', '', '2008-05-29 17:00:00', '1', '100', '4', 'NAA RecordSearch', 'Tim Sherratt', 'http://naa12.naa.gov.au/scripts/', 'function detectWeb(doc, url) { if (url.match(/Items_listing.asp/i)) {