From 1374cc911aa727d88f3bbe0d39eddf219f3fc6dd Mon Sep 17 00:00:00 2001 From: Aurimas Vinckevicius Date: Thu, 11 Jun 2015 18:25:32 -0500 Subject: [PATCH 1/2] Always save snapshots in UTF-8 encoding Re https://forums.zotero.org/discussion/49897/thecreate-web-page-item-from-current-pageis-sometimes-not-working-correctly/ There is something wrong with the way nsIScriptableUnicodeConverter converts to gbk (maybe other formats too). The HTML of http://www.cmiw.cn/forum.php is truncated in the middle of an em tag. In general, ConvertFromUnicode is supposed to return an ACString (not AString), which is not something that JavaScript can handle well. --- chrome/content/zotero/webpagedump/domsaver.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chrome/content/zotero/webpagedump/domsaver.js b/chrome/content/zotero/webpagedump/domsaver.js index 5802557d1..e2ff7d791 100644 --- a/chrome/content/zotero/webpagedump/domsaver.js +++ b/chrome/content/zotero/webpagedump/domsaver.js @@ -199,7 +199,7 @@ var wpdDOMSaver = { // Changed by Dan for Zotero "script": true, // no scripts - "encodeUTF8": false, // write the DOM Tree as UTF-8 and change the charset entry of the document + "encodeUTF8": true, // write the DOM Tree as UTF-8 and change the charset entry of the document "metainfo": true, // include meta tags with URL and date/time information "metacharset": false // if the meta charset is defined inside html override document charset //"xtagging" : true // include a x tag around each word From 5817a2d6541293f46be1aa1f3e7cfa54868bc0fc Mon Sep 17 00:00:00 2001 From: Aurimas Vinckevicius Date: Thu, 11 Jun 2015 18:30:57 -0500 Subject: [PATCH 2/2] Store UTF-8 as charset for all snapshots --- chrome/content/zotero/xpcom/attachments.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chrome/content/zotero/xpcom/attachments.js b/chrome/content/zotero/xpcom/attachments.js index 5e5b26e04..76fe34e7b 100644 --- a/chrome/content/zotero/xpcom/attachments.js +++ b/chrome/content/zotero/xpcom/attachments.js @@ -540,7 +540,7 @@ Zotero.Attachments = new function(){ mimeType = "application/pdf"; } - var charsetID = Zotero.CharacterSets.getID(document.characterSet); + var charsetID = Zotero.CharacterSets.getID('utf-8'); // WPD will output UTF-8 if (!forceTitle) { // Remove e.g. " - Scaled (-17%)" from end of images saved from links,