From 38531da9fa47f0e9a209f71636e850667730cece Mon Sep 17 00:00:00 2001 From: Simon Kornblith Date: Sat, 25 Nov 2006 03:41:13 +0000 Subject: [PATCH] closes #396, accents are lost when scraping multiple items (with InnoPAC) --- scrapers.sql | 150 +++++++++++++++++++-------------------------------- 1 file changed, 54 insertions(+), 96 deletions(-) diff --git a/scrapers.sql b/scrapers.sql index 4235d8047..e46c3b60b 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,4 +1,4 @@ --- 109 +-- 110 -- ***** BEGIN LICENSE BLOCK ***** -- @@ -22,7 +22,7 @@ -- Set the following timestamp to the most recent scraper update date -REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-11-24 19:13:00')); +REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-11-24 19:34:00')); REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-11-21 22:30:00', 1, 100, 4, 'Amazon', 'Sean Takats', '^http://(?:www\.)amazon', 'function detectWeb(doc, url) { @@ -801,7 +801,7 @@ function doWeb(doc, url) { } }'); -REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-10-02 17:00:00', 1, 100, 4, 'InnoPAC', 'Simon Kornblith', '^http://[^/]+/(?:search/|record=)', +REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-11-24 19:34:00', 1, 100, 4, 'InnoPAC', 'Simon Kornblith', '^http://[^/]+/(?:search/|record=)', 'function detectWeb(doc, url) { // First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$''); @@ -835,63 +835,68 @@ REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006 if (prefix == ''x'') return namespace; else return null; } : null; - var xpath = ''//pre/text()[1]''; - var text = newDoc.evaluate(xpath, newDoc, nsResolver, - XPathResult.ANY_TYPE, null).iterateNext().nodeValue; + var xpath = ''//pre/text()''; + var elmts = newDoc.evaluate(xpath, newDoc, nsResolver, + XPathResult.ANY_TYPE, null); + var elmt; - var newItem = new Zotero.Item(); - var record = new marc.record(); - - var linee = text.split("\n"); - for (var i=0; i 1) { - postString += "save_func=save_marked"; - - - Zotero.Utilities.HTTP.doGet(clearUrl, function() { - Zotero.Utilities.HTTP.doPost(postUrl, postString, function() { - Zotero.Utilities.HTTP.doPost(exportUrl, "ex_format=50&ex_device=45&SUBMIT=Submit", function(text) { - var notSpace = /[^\s]/ - if(notSpace.test(text)) { - marc.setString(text); - marc.translate(); - - Zotero.done(); - } else { - pageByPage(marc, newUrls); - } - }); - }); - }); - } else { - pageByPage(marc, newUrls); - } + pageByPage(marc, newUrls); } Zotero.wait();