From c57143b3cb4f37113fdfe01c20d3e63e1aef98f7 Mon Sep 17 00:00:00 2001 From: Michael Berkowitz Date: Wed, 11 Jun 2008 18:48:06 +0000 Subject: [PATCH] -Adds translator for Helsinki University of Technology Library. --- scrapers.sql | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/scrapers.sql b/scrapers.sql index 3cbc292e6..acff9caab 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -22,7 +22,7 @@ -- Set the following timestamp to the most recent scraper update date -REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-06-11 05:00:00')); +REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-06-11 20:00:00')); REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2008-03-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats and Michael Berkowitz', '^https?://(?:www\.)?amazon', 'function detectWeb(doc, url) { @@ -1987,6 +1987,75 @@ function doImport() { } }'); +REPLACE INTO translators VALUES ('2943d7fc-3ce8-401c-afd5-ee1f70b7aae0', '1.0.0b4.r5', '', '2008-06-11 20:00:00', '0', '100', '4', 'Helsinki University of Technology', 'Michael Berkowitz', 'https?://teemu.linneanet.fi/', +'function detectWeb(doc, url) { + if (url.match(/v\d+=\d+/)) { + return "book"; + } else if (url.match(/Search_Arg/)) { + return "multiple"; + } +}', +'function MARCify(str) { + return str.replace(/v\d+=([^&]+)/, "v3=$1"); +} + +function doWeb(doc, url) { + var n = doc.documentElement.namespaceURI; + var ns = n ? function(prefix) { + if (prefix == ''x'') return n; else return null; + } : null; + + var books = new Array(); + if (detectWeb(doc, url) == "multiple") { + var titles = doc.evaluate(''/html/body/form/table/tbody/tr/td[3]/a'', doc, ns, XPathResult.ANY_TYPE, null); + var title; + var items = new Object(); + while (title = titles.iterateNext()) { + items[title.href] = Zotero.Utilities.trimInternal(title.textContent); + } + items = Zotero.selectItems(items); + for (var i in items) { + books.push(MARCify(i)); + } + } else { + books = [MARCify(url)]; + } + var translator = Zotero.loadTranslator("import"); + translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973"); + var marc = translator.getTranslatorObject(); + Zotero.Utilities.processDocuments(books, function(doc) { + var elmts = doc.evaluate(''/html/body/form/table/tbody/tr[th]'', doc, ns, XPathResult.ANY_TYPE, null); + var record = new marc.record(); + var elmt; + while (elmt = elmts.iterateNext()) { + var field = Zotero.Utilities.superCleanString(doc.evaluate(''./th'', elmt, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent); + if (field) { + var value = doc.evaluate(''./td[1]'', elmt, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent; + if (value.split(/\n/)[1]) value = Zotero.Utilities.trimInternal(value.split(/\n/)[1]); + if(field == "LDR") { + record.leader = value; + } else if(field != "FMT") { + value = value.replace(/\|([a-z]) /g, marc.subfieldDelimiter+"$1"); + var code = field.substring(0, 3); + var ind = ""; + if(field.length > 3) { + ind = field[3]; + if(field.length > 4) { + ind += field[4]; + } + } + + record.addField(code, ind, value); + } + } + } + var item = new Zotero.Item("book"); + record.translate(item); + item.complete(); + }, function() {Zotero.done;}); + Zotero.wait(); +}'); + REPLACE INTO translators VALUES ('b662c6eb-e478-46bd- bad4-23cdfd0c9d67', '1.0.0b4.r5', '', '2008-06-10 22:30:00', '0', '100', '4', 'JurPC', 'Oliver Vivell and Michael Berkowitz', 'http://www.jurpc.de/', 'function detectWeb(doc, url) { var doctype = doc.evaluate(''//meta/@doctype'', doc, null,XPathResult.ANY_TYPE, null).iterateNext().textContent;