diff --git a/src/Lexer.js b/src/Lexer.js index 648c705b9..5243bf280 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -63,7 +63,6 @@ Token.prototype.range = function(endToken, text) { /* The following tokenRegex * - matches typical whitespace (but not NBSP etc.) using its first group - * - matches symbol combinations which result in a single output character * - does not match any control character \x00-\x1f except whitespace * - does not match a bare backslash * - matches any ASCII character except those just mentioned @@ -78,9 +77,8 @@ Token.prototype.range = function(endToken, text) { * still reject the input. */ var tokenRegex = new RegExp( - "([ \r\n\t]+)|(" + // whitespace - "---?" + // special combinations - "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint + "([ \r\n\t]+)|" + // whitespace + "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair "|\\\\(?:[a-zA-Z]+|[^\uD800-\uDFFF])" + // function name ")" diff --git a/src/Parser.js b/src/Parser.js index 64b1689b7..4bfde913b 100644 --- a/src/Parser.js +++ b/src/Parser.js @@ -545,7 +545,7 @@ Parser.prototype.parseArguments = function(func, funcData) { if (argType) { arg = this.parseGroupOfType(argType, true); } else { - arg = this.parseOptionalGroup(); + arg = this.parseGroup(true); } if (!arg) { args.push(null); @@ -623,12 +623,7 @@ Parser.prototype.parseGroupOfType = function(innerMode, optional) { } // By the time we get here, innerMode is one of "text" or "math". // We switch the mode of the parser, recurse, then restore the old mode. - var res; - if (optional) { - res = this.parseOptionalGroup(); - } else { - res = this.parseGroup(); - } + var res = this.parseGroup(optional); this.switchMode(outerMode); return res; }; @@ -638,7 +633,7 @@ Parser.prototype.parseGroupOfType = function(innerMode, optional) { * brace-enclosed tokens plus some position information. * * @param {string} modeName Used to describe the mode in error messages - * @param {boolean} optional Whether the group is optional or required + * @param {boolean=} optional Whether the group is optional or required */ Parser.prototype.parseStringGroup = function(modeName, optional) { if (optional && this.nextToken.text !== "[") { @@ -707,53 +702,71 @@ Parser.prototype.parseSizeGroup = function(optional) { }; /** - * Parses a group, which is either a single nucleus (like "x") or an expression - * in braces (like "{x+y}") + * If the argument is false or absent, this parses an ordinary group, + * which is either a single nucleus (like "x") or an expression + * in braces (like "{x+y}"). + * If the argument is true, it parses either a bracket-delimited expression + * (like "[x+y]") or returns null to indicate the absence of a + * bracket-enclosed group. * + * @param {boolean=} optional Whether the group is optional or required * @return {?ParseFuncOrArgument} */ -Parser.prototype.parseGroup = function() { +Parser.prototype.parseGroup = function(optional) { var firstToken = this.nextToken; // Try to parse an open brace - if (this.nextToken.text === "{") { + if (this.nextToken.text === (optional ? "[" : "{")) { // If we get a brace, parse an expression this.consume(); - var expression = this.parseExpression(false); + var expression = this.parseExpression(false, optional ? "]" : null); var lastToken = this.nextToken; // Make sure we get a close brace - this.expect("}"); + this.expect(optional ? "]" : "}"); + if (this.mode === "text") { + this.formLigatures(expression); + } return new ParseFuncOrArgument( new ParseNode("ordgroup", expression, this.mode, firstToken, lastToken), false); } else { - // Otherwise, just return a nucleus - return this.parseSymbol(); + // Otherwise, just return a nucleus, or nothing for an optional group + return optional ? null : this.parseSymbol(); } }; /** - * Parses a group, which is an expression in brackets (like "[x+y]") + * Form ligature-like combinations of characters for text mode. + * This includes inputs like "--", "---", "``" and "''". + * The result will simply replace multiple textord nodes with a single + * character in each value by a single textord node having multiple + * characters in its value. The representation is still ASCII source. * - * @return {?ParseFuncOrArgument} + * @param {Array.} group the nodes of this group, + * list will be moified in place */ -Parser.prototype.parseOptionalGroup = function() { - var firstToken = this.nextToken; - // Try to parse an open bracket - if (this.nextToken.text === "[") { - // If we get a brace, parse an expression - this.consume(); - var expression = this.parseExpression(false, "]"); - var lastToken = this.nextToken; - // Make sure we get a close bracket - this.expect("]"); - return new ParseFuncOrArgument( - new ParseNode("ordgroup", expression, this.mode, - firstToken, lastToken), - false); - } else { - // Otherwise, return null, - return null; +Parser.prototype.formLigatures = function(group) { + var i; + var n = group.length - 1; + for (i = 0; i < n; ++i) { + var a = group[i]; + var v = a.value; + if (v === "-" && group[i + 1].value === "-") { + if (i + 1 < n && group[i + 2].value === "-") { + group.splice(i, 3, new ParseNode( + "textord", "---", "text", a, group[i + 2])); + n -= 2; + } else { + group.splice(i, 2, new ParseNode( + "textord", "--", "text", a, group[i + 1])); + n -= 1; + } + } + if ((v === "'" || v === "`") && group[i + 1].value === v) { + group.splice(i, 2, new ParseNode( + "textord", v + v, "text", a, group[i + 1])); + n -= 1; + } } }; diff --git a/src/buildCommon.js b/src/buildCommon.js index b60e1860a..d793b5b38 100644 --- a/src/buildCommon.js +++ b/src/buildCommon.js @@ -23,9 +23,11 @@ var greekCapitals = [ "\\Omega", ]; -var dotlessLetters = [ +// The following have to be loaded from Main-Italic font, using class mainit +var mainitLetters = [ "\u0131", // dotless i, \imath "\u0237", // dotless j, \jmath + "\u00a3", // \pounds ]; /** @@ -101,7 +103,7 @@ var mathit = function(value, mode, color, classes) { if (/[0-9]/.test(value.charAt(0)) || // glyphs for \imath and \jmath do not exist in Math-Italic so we // need to use Main-Italic instead - utils.contains(dotlessLetters, value) || + utils.contains(mainitLetters, value) || utils.contains(greekCapitals, value)) { return makeSymbol( value, "Main-Italic", mode, color, classes.concat(["mainit"])); @@ -126,7 +128,7 @@ var makeOrd = function(group, options, type) { var font = options.font; if (font) { - if (font === "mathit" || utils.contains(dotlessLetters, value)) { + if (font === "mathit" || utils.contains(mainitLetters, value)) { return mathit(value, mode, color, classes); } else { var fontName = fontMap[font].fontName; diff --git a/src/symbols.js b/src/symbols.js index 6170cb5e7..aef7c8748 100644 --- a/src/symbols.js +++ b/src/symbols.js @@ -589,6 +589,18 @@ defineSymbol(math, main, accent, "\u02d9", "\\dot"); defineSymbol(math, main, mathord, "\u0131", "\\imath"); defineSymbol(math, main, mathord, "\u0237", "\\jmath"); +defineSymbol(text, main, textord, "\u2013", "--"); +defineSymbol(text, main, textord, "\u2014", "---"); +defineSymbol(text, main, textord, "\u2018", "`"); +defineSymbol(text, main, textord, "\u2019", "'"); +defineSymbol(text, main, textord, "\u201c", "``"); +defineSymbol(text, main, textord, "\u201d", "''"); +defineSymbol(math, main, textord, "\u00b0", "\\degree"); +defineSymbol(text, main, textord, "\u00b0", "\\degree"); +defineSymbol(math, main, mathord, "\u00a3", "\\pounds"); +defineSymbol(math, ams, textord, "\u2720", "\\maltese"); +defineSymbol(text, ams, textord, "\u2720", "\\maltese"); + defineSymbol(text, main, spacing, "\u00a0", "\\ "); defineSymbol(text, main, spacing, "\u00a0", " "); defineSymbol(text, main, spacing, "\u00a0", "~"); @@ -605,7 +617,7 @@ for (i = 0; i < mathTextSymbols.length; i++) { } // All of these are textords in text mode -var textSymbols = "0123456789`!@*()-=+[]'\";:?/.,"; +var textSymbols = "0123456789!@*()-=+[]\";:?/.,"; for (i = 0; i < textSymbols.length; i++) { ch = textSymbols.charAt(i); defineSymbol(text, main, textord, ch, ch); diff --git a/test/screenshotter/images/DashesAndQuotes-chrome.png b/test/screenshotter/images/DashesAndQuotes-chrome.png new file mode 100644 index 000000000..14fdba805 Binary files /dev/null and b/test/screenshotter/images/DashesAndQuotes-chrome.png differ diff --git a/test/screenshotter/images/DashesAndQuotes-firefox.png b/test/screenshotter/images/DashesAndQuotes-firefox.png new file mode 100644 index 000000000..bf62c8511 Binary files /dev/null and b/test/screenshotter/images/DashesAndQuotes-firefox.png differ diff --git a/test/screenshotter/images/Symbols1-chrome.png b/test/screenshotter/images/Symbols1-chrome.png new file mode 100644 index 000000000..d2adc7c85 Binary files /dev/null and b/test/screenshotter/images/Symbols1-chrome.png differ diff --git a/test/screenshotter/images/Symbols1-firefox.png b/test/screenshotter/images/Symbols1-firefox.png new file mode 100644 index 000000000..c6865972b Binary files /dev/null and b/test/screenshotter/images/Symbols1-firefox.png differ diff --git a/test/screenshotter/ss_data.yaml b/test/screenshotter/ss_data.yaml index a2ae511d7..4f2ce6d6c 100644 --- a/test/screenshotter/ss_data.yaml +++ b/test/screenshotter/ss_data.yaml @@ -36,6 +36,7 @@ Cases: | Colors: tex: \blue{a}\color{#0f0}{b}\color{red}{c} nolatex: different syntax and different scope +DashesAndQuotes: \text{``a'' b---c -- d----`e'-{-}-f}--``x'' DeepFontSizing: tex: | a^{\big| x^{\big(}}_{\Big\uparrow} + @@ -109,6 +110,9 @@ SupSubHorizSpacing: | SupSubLeftAlignReset: | \omega^8_{888} \quad \frac{1}{\hat{\omega}^{8}_{888}} \quad \displaystyle\sum_{\omega^{8}_{888}} SupSubOffsets: \displaystyle \int_{2+3}x f^{2+3}+3\lim_{2+3+4+5}f +Symbols1: | + \maltese\degree\pounds\$ + \text{\maltese\degree} Text: \frac{a}{b}\text{c~ {ab} \ e}+fg UnsupportedCmds: tex: \err\,\frac\fracerr3\,2^\superr_\suberr\,\sqrt\sqrterr diff --git a/test/screenshotter/test.tex b/test/screenshotter/test.tex index 2945e8b8c..85c5d2032 100644 --- a/test/screenshotter/test.tex +++ b/test/screenshotter/test.tex @@ -1,6 +1,6 @@ \documentclass[10pt]{article} -\usepackage{amsmath,amssymb} +\usepackage{amsmath,amssymb,textcomp,gensymb} \usepackage[mathscr]{eucal} \usepackage{eufrak} \usepackage[papersize={133pt,100pt},margin=0.5pt]{geometry}