make toMathML.js handle non-BMP characters properly. Fix issue 361.

This commit is contained in:
Frédéric Wang 2012-12-20 17:41:40 +01:00
parent f8d4215519
commit 96dc4b2b2a

View File

@ -1,3 +1,5 @@
/* -*- Mode: Javascript; indent-tabs-mode:nil; js-indent-level: 2 -*- */
/* vim: set ts=2 et sw=2 tw=80: */
/*************************************************************
*
* MathJax/extensions/toMathML.js
@ -106,11 +108,24 @@ MathJax.Hub.Register.LoadHook("[MathJax]/jax/element/mml/jax.js",function () {
string = String(string).split("");
for (var i = 0, m = string.length; i < m; i++) {
var n = string[i].charCodeAt(0);
if (n < 0x20 || n > 0x7E) {
string[i] = "&#x"+n.toString(16).toUpperCase()+";";
} else {
var c = {'&':'&amp;', '<':'&lt;', '>':'&gt;', '"':'&quot;'}[string[i]];
if (c) {string[i] = c}
if (n <= 0xD7FF || 0xE000 <= n) {
// Code points U+0000 to U+D7FF and U+E000 to U+FFFF.
// They are directly represented by n.
if (n < 0x20 || n > 0x7E) {
string[i] = "&#x"+n.toString(16).toUpperCase()+";";
} else {
var c =
{'&':'&amp;', '<':'&lt;', '>':'&gt;', '"':'&quot;'}[string[i]];
if (c) {string[i] = c}
}
} else if (i+1 < m) {
// Code points U+10000 to U+10FFFF.
// n is the lead surrogate, let's read the trail surrogate.
var trailSurrogate = string[i+1].charCodeAt(0);
var codePoint = (((n-0xD800)*0x400)+(trailSurrogate-0xDC00)+0x10000);
string[i] = "&#x"+codePoint.toString(16).toUpperCase()+";";
string[i+1] = "";
i++;
}
}
return string.join("");