From 690b9417fca98886fa6d796308f38e9e2ba5a43f Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Wed, 12 Dec 2007 04:20:02 +0000 Subject: [PATCH] Fixes #864, OCLC FirstSearch WorldCat not importing accented characters properly Added responseCharset parameter to doPost() and doGet() to set xmlhttp.channel.contentCharset before attempting to read textual data -- fifth and fourth parameters, respectively --- chrome/content/zotero/xpcom/utilities.js | 24 ++++++++++++++---------- scrapers.sql | 6 +++--- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/chrome/content/zotero/xpcom/utilities.js b/chrome/content/zotero/xpcom/utilities.js index ddcbfdd15..345c57aee 100644 --- a/chrome/content/zotero/xpcom/utilities.js +++ b/chrome/content/zotero/xpcom/utilities.js @@ -488,7 +488,7 @@ Zotero.Utilities.Ingester.HTTP = function(translate) { this.translate = translate; } -Zotero.Utilities.Ingester.HTTP.prototype.doGet = function(urls, processor, done) { +Zotero.Utilities.Ingester.HTTP.prototype.doGet = function(urls, processor, done, responseCharset) { var callAgain = false; if(typeof(urls) == "string") { @@ -523,10 +523,10 @@ Zotero.Utilities.Ingester.HTTP.prototype.doGet = function(urls, processor, done) } catch(e) { me.translate.error(false, e); } - }); + }, responseCharset); } -Zotero.Utilities.Ingester.HTTP.prototype.doPost = function(url, body, onDone, contentType) { +Zotero.Utilities.Ingester.HTTP.prototype.doPost = function(url, body, onDone, requestContentType, responseCharset) { if(this.translate.locationIsProxied) { url = Zotero.Ingester.ProxyMonitor.properToProxy(url); } @@ -541,7 +541,7 @@ Zotero.Utilities.Ingester.HTTP.prototype.doPost = function(url, body, onDone, co } catch(e) { translate.error(false, e); } - }, contentType); + }, requestContentType, responseCharset); } // These are front ends for XMLHttpRequest. XMLHttpRequest can't actually be @@ -563,7 +563,7 @@ Zotero.Utilities.HTTP = new function() { * doGet can be called as: * Zotero.Utilities.HTTP.doGet(url, onDone) **/ - function doGet(url, onDone, onError) { + function doGet(url, onDone, onError, responseCharset) { Zotero.debug("HTTP GET "+url); if (this.browserIsOffline()){ return false; @@ -575,7 +575,7 @@ Zotero.Utilities.HTTP = new function() { var test = xmlhttp.open('GET', url, true); xmlhttp.onreadystatechange = function(){ - _stateChange(xmlhttp, onDone); + _stateChange(xmlhttp, onDone, responseCharset); }; xmlhttp.send(null); @@ -592,7 +592,7 @@ Zotero.Utilities.HTTP = new function() { * doPost can be called as: * Zotero.Utilities.HTTP.doPost(url, body, onDone) **/ - function doPost(url, body, onDone, contentType) { + function doPost(url, body, onDone, requestContentType, responseCharset) { Zotero.debug("HTTP POST "+body+" to "+url); if (this.browserIsOffline()){ return false; @@ -602,10 +602,10 @@ Zotero.Utilities.HTTP = new function() { .createInstance(); xmlhttp.open('POST', url, true); - xmlhttp.setRequestHeader("Content-Type", (contentType ? contentType : "application/x-www-form-urlencoded" )); + xmlhttp.setRequestHeader("Content-Type", (requestContentType ? requestContentType : "application/x-www-form-urlencoded" )); xmlhttp.onreadystatechange = function(){ - _stateChange(xmlhttp, onDone); + _stateChange(xmlhttp, onDone, responseCharset); }; xmlhttp.send(body); @@ -671,7 +671,7 @@ Zotero.Utilities.HTTP = new function() { } - function _stateChange(xmlhttp, onDone){ + function _stateChange(xmlhttp, onDone, responseCharset){ switch (xmlhttp.readyState){ // Request not yet made case 1: @@ -684,6 +684,10 @@ Zotero.Utilities.HTTP = new function() { // Download complete case 4: if(onDone){ + // Override the content charset + if (responseCharset) { + xmlhttp.channel.contentCharset = responseCharset; + } onDone(xmlhttp); } break; diff --git a/scrapers.sql b/scrapers.sql index 4a67f9f0b..4aee91368 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -22,7 +22,7 @@ -- Set the following timestamp to the most recent scraper update date -REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-12-05 17:00:00')); +REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-12-12 05:00:00')); REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon', 'function detectWeb(doc, url) { @@ -577,7 +577,7 @@ function doWeb(doc, url) { } }'); -REPLACE INTO translators VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '1.0.0b3.r1', '', '2007-03-24 22:20:00', 1, 100, 4, 'OCLC WorldCat FirstSearch', 'Simon Kornblith', '^https?://(?:new)?firstsearch\.oclc\.org[^/]*/WebZ/', +REPLACE INTO translators VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '1.0.0b3.r1', '', '2007-12-12 05:00:00', 1, 100, 4, 'OCLC WorldCat FirstSearch', 'Simon Kornblith', '^https?://(?:new)?firstsearch\.oclc\.org[^/]*/WebZ/', 'function detectWeb(doc, url) { var detailRe = /FirstSearch: [\w ]+ Detailed Record/; var searchRe = /FirstSearch: [\w ]+ List of Records/; @@ -710,7 +710,7 @@ REPLACE INTO translators VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '1.0.0b newItem.complete(); processURLs(urls); - }); + }, false, ''iso-8859-1''); } function doWeb(doc, url) {