diff --git a/src/Lexer.js b/src/Lexer.js index 4d6697c6a..648c705b9 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -17,16 +17,50 @@ var ParseError = require("./ParseError"); // The main lexer class function Lexer(input) { - this._input = input; + this.input = input; + this.pos = 0; } -// The resulting token returned from `lex`. -function Token(text, data, position) { +/** + * The resulting token returned from `lex`. + * + * It consists of the token text plus some position information. + * The position information is essentially a range in an input string, + * but instead of referencing the bare input string, we refer to the lexer. + * That way it is possible to attach extra metadata to the input string, + * like for example a file name or similar. + * + * The position information (all three parameters) is optional, + * so it is OK to construct synthetic tokens if appropriate. + * Not providing available position information may lead to + * degraded error reporting, though. + * + * @param {string} text the text of this token + * @param {number=} start the start offset, zero-based inclusive + * @param {number=} end the end offset, zero-based exclusive + * @param {Lexer=} lexer the lexer which in turn holds the input string + */ +function Token(text, start, end, lexer) { this.text = text; - this.data = data; - this.position = position; + this.start = start; + this.end = end; + this.lexer = lexer; } +/** + * Given a pair of tokens (this and endToken), compute a “Token” encompassing + * the whole input range enclosed by these two. + * + * @param {Token} endToken last token of the range, inclusive + * @param {string} text the text of the newly constructed token + */ +Token.prototype.range = function(endToken, text) { + if (endToken.lexer !== this.lexer) { + return new Token(text); // sorry, no position information available + } + return new Token(text, this.start, endToken.end, this.lexer); +}; + /* The following tokenRegex * - matches typical whitespace (but not NBSP etc.) using its first group * - matches symbol combinations which result in a single output character @@ -52,111 +86,26 @@ var tokenRegex = new RegExp( ")" ); -var whitespaceRegex = /\s*/; - /** - * This function lexes a single normal token. It takes a position and - * whether it should completely ignore whitespace or not. + * This function lexes a single token. */ -Lexer.prototype._innerLex = function(pos, ignoreWhitespace) { - var input = this._input; +Lexer.prototype.lex = function() { + var input = this.input; + var pos = this.pos; if (pos === input.length) { - return new Token("EOF", null, pos); + return new Token("EOF", pos, pos, this); } var match = matchAt(tokenRegex, input, pos); if (match === null) { throw new ParseError( "Unexpected character: '" + input[pos] + "'", - this, pos); - } else if (match[2]) { // matched non-whitespace - return new Token(match[2], null, pos + match[2].length); - } else if (ignoreWhitespace) { - return this._innerLex(pos + match[1].length, true); - } else { // concatenate whitespace to a single space - return new Token(" ", null, pos + match[1].length); - } -}; - -// A regex to match a CSS color (like #ffffff or BlueViolet) -var cssColor = /#[a-z0-9]+|[a-z]+/i; - -/** - * This function lexes a CSS color. - */ -Lexer.prototype._innerLexColor = function(pos) { - var input = this._input; - - // Ignore whitespace - var whitespace = matchAt(whitespaceRegex, input, pos)[0]; - pos += whitespace.length; - - var match; - if ((match = matchAt(cssColor, input, pos))) { - // If we look like a color, return a color - return new Token(match[0], null, pos + match[0].length); - } else { - throw new ParseError("Invalid color", this, pos); - } -}; - -// A regex to match a dimension. Dimensions look like -// "1.2em" or ".4pt" or "1 ex" -var sizeRegex = /(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/; - -/** - * This function lexes a dimension. - */ -Lexer.prototype._innerLexSize = function(pos) { - var input = this._input; - - // Ignore whitespace - var whitespace = matchAt(whitespaceRegex, input, pos)[0]; - pos += whitespace.length; - - var match; - if ((match = matchAt(sizeRegex, input, pos))) { - var unit = match[3]; - // We only currently handle "em" and "ex" units - if (unit !== "em" && unit !== "ex") { - throw new ParseError("Invalid unit: '" + unit + "'", this, pos); - } - return new Token(match[0], { - number: +(match[1] + match[2]), - unit: unit, - }, pos + match[0].length); - } - - throw new ParseError("Invalid size", this, pos); -}; - -/** - * This function lexes a string of whitespace. - */ -Lexer.prototype._innerLexWhitespace = function(pos) { - var input = this._input; - - var whitespace = matchAt(whitespaceRegex, input, pos)[0]; - pos += whitespace.length; - - return new Token(whitespace[0], null, pos); -}; - -/** - * This function lexes a single token starting at `pos` and of the given mode. - * Based on the mode, we defer to one of the `_innerLex` functions. - */ -Lexer.prototype.lex = function(pos, mode) { - if (mode === "math") { - return this._innerLex(pos, true); - } else if (mode === "text") { - return this._innerLex(pos, false); - } else if (mode === "color") { - return this._innerLexColor(pos); - } else if (mode === "size") { - return this._innerLexSize(pos); - } else if (mode === "whitespace") { - return this._innerLexWhitespace(pos); + new Token(input[pos], pos, pos + 1, this)); } + var text = match[2] || " "; + var start = this.pos; + this.pos += match[0].length; + var end = this.pos; + return new Token(text, start, end, this); }; module.exports = Lexer; diff --git a/src/MacroExpander.js b/src/MacroExpander.js new file mode 100644 index 000000000..c8dde4aea --- /dev/null +++ b/src/MacroExpander.js @@ -0,0 +1,70 @@ +/** + * This file contains the “gullet” where macros are expanded + * until only non-macro tokens remain. + */ + +var Lexer = require("./Lexer"); + +function MacroExpander(input, macros) { + this.lexer = new Lexer(input); + this.macros = macros; + this.stack = []; // contains tokens in REVERSE order + this.discardedWhiteSpace = []; +} + +/** + * Recursively expand first token, then return first non-expandable token. + */ +MacroExpander.prototype.nextToken = function() { + for (;;) { + if (this.stack.length === 0) { + this.stack.push(this.lexer.lex()); + } + var topToken = this.stack.pop(); + var name = topToken.text; + if (!(name.charAt(0) === "\\" && this.macros.hasOwnProperty(name))) { + return topToken; + } + var expansion = this.macros[name]; + if (typeof expansion === "string") { + var bodyLexer = new Lexer(expansion); + expansion = []; + var tok = bodyLexer.lex(); + while (tok.text !== "EOF") { + expansion.push(tok); + tok = bodyLexer.lex(); + } + expansion.reverse(); // to fit in with stack using push and pop + this.macros[name] = expansion; + } + this.stack = this.stack.concat(expansion); + } +}; + +MacroExpander.prototype.get = function(ignoreSpace) { + this.discardedWhiteSpace = []; + var token = this.nextToken(); + if (ignoreSpace) { + while (token.text === " ") { + this.discardedWhiteSpace.push(token); + token = this.nextToken(); + } + } + return token; +}; + +/** + * Undo the effect of the preceding call to the get method. + * A call to this method MUST be immediately preceded and immediately followed + * by a call to get. Only used during mode switching, i.e. after one token + * was got in the old mode but should get got again in a new mode + * with possibly different whitespace handling. + */ +MacroExpander.prototype.unget = function(token) { + this.stack.push(token); + while (this.discardedWhiteSpace.length !== 0) { + this.stack.push(this.discardedWhiteSpace.pop()); + } +}; + +module.exports = MacroExpander; diff --git a/src/ParseError.js b/src/ParseError.js index 320f0bd69..20bea4af9 100644 --- a/src/ParseError.js +++ b/src/ParseError.js @@ -2,26 +2,50 @@ * This is the ParseError class, which is the main error thrown by KaTeX * functions when something has gone wrong. This is used to distinguish internal * errors from errors in the expression that the user provided. + * + * If possible, a caller should provide a Token or ParseNode with information + * about where in the source string the problem occurred. + * + * @param {string} message The error message + * @param {(Token|ParseNode)=} token An object providing position information */ -function ParseError(message, lexer, position) { +function ParseError(message, token) { var error = "KaTeX parse error: " + message; + var start; + var end; - if (lexer !== undefined && position !== undefined) { + if (token && token.lexer && token.start <= token.end) { // If we have the input and a position, make the error a bit fancier - // Prepend some information - error += " at position " + position + ": "; - // Get the input - var input = lexer._input; - // Insert a combining underscore at the correct position - input = input.slice(0, position) + "\u0332" + - input.slice(position); + var input = token.lexer.input; + + // Prepend some information + start = token.start; + end = token.end; + if (start === input.length) { + error += " at end of input: "; + } else { + error += " at position " + (start + 1) + ": "; + } + + // Underline token in question using combining underscores + var underlined = input.slice(start, end).replace(/[^]/g, "$&\u0332"); // Extract some context from the input and add it to the error - var begin = Math.max(0, position - 15); - var end = position + 15; - error += input.slice(begin, end); + var left; + if (start > 15) { + left = "…" + input.slice(start - 15, start); + } else { + left = input.slice(0, start); + } + var right; + if (end + 15 < input.length) { + right = input.slice(end, end + 15) + "…"; + } else { + right = input.slice(end); + } + error += left + underlined + right; } // Some hackery to make ParseError a prototype of Error @@ -30,7 +54,7 @@ function ParseError(message, lexer, position) { self.name = "ParseError"; self.__proto__ = ParseError.prototype; - self.position = position; + self.position = start; return self; } diff --git a/src/Parser.js b/src/Parser.js index 15b808e17..64b1689b7 100644 --- a/src/Parser.js +++ b/src/Parser.js @@ -1,7 +1,7 @@ /* eslint no-constant-condition:0 */ var functions = require("./functions"); var environments = require("./environments"); -var Lexer = require("./Lexer"); +var MacroExpander = require("./MacroExpander"); var symbols = require("./symbols"); var utils = require("./utils"); @@ -46,8 +46,9 @@ var ParseError = require("./ParseError"); * Main Parser class */ function Parser(input, settings) { - // Make a new lexer - this.lexer = new Lexer(input); + // Create a new macro expander (gullet) and (indirectly via that) also a + // new lexer (mouth) for this parser (stomach, in the language of TeX) + this.gullet = new MacroExpander(input, settings.macros); // Store the settings for use in parsing this.settings = settings; } @@ -58,10 +59,11 @@ var ParseNode = parseData.ParseNode; * An initial function (without its arguments), or an argument to a function. * The `result` argument should be a ParseNode. */ -function ParseFuncOrArgument(result, isFunction) { +function ParseFuncOrArgument(result, isFunction, token) { this.result = result; // Is this a function (i.e. is it something defined in functions.js)? this.isFunction = isFunction; + this.token = token; } /** @@ -75,7 +77,7 @@ Parser.prototype.expect = function(text, consume) { if (this.nextToken.text !== text) { throw new ParseError( "Expected '" + text + "', got '" + this.nextToken.text + "'", - this.lexer, this.nextToken.position + this.nextToken ); } if (consume !== false) { @@ -88,8 +90,13 @@ Parser.prototype.expect = function(text, consume) { * and fetches the one after that as the new look ahead. */ Parser.prototype.consume = function() { - this.pos = this.nextToken.position; - this.nextToken = this.lexer.lex(this.pos, this.mode); + this.nextToken = this.gullet.get(this.mode === "math"); +}; + +Parser.prototype.switchMode = function(newMode) { + this.gullet.unget(this.nextToken); + this.mode = newMode; + this.consume(); }; /** @@ -100,8 +107,7 @@ Parser.prototype.consume = function() { Parser.prototype.parse = function() { // Try to parse the input this.mode = "math"; - this.pos = 0; - this.nextToken = this.lexer.lex(this.pos, this.mode); + this.consume(); var parse = this.parseInput(); return parse; }; @@ -122,26 +128,29 @@ var endOfExpression = ["}", "\\end", "\\right", "&", "\\\\", "\\cr"]; /** * Parses an "expression", which is a list of atoms. * - * @param {boolean} breakOnInfix Should the parsing stop when we hit infix + * @param {boolean} breakOnInfix Should the parsing stop when we hit infix * nodes? This happens when functions have higher precendence * than infix nodes in implicit parses. * - * @param {?string} breakOnToken The token that the expression should end with, - * or `null` if something else should end the expression. + * @param {?string} breakOnTokenText The text of the token that the expression + * should end with, or `null` if something else should end the + * expression. * * @return {ParseNode} */ -Parser.prototype.parseExpression = function(breakOnInfix, breakOnToken) { +Parser.prototype.parseExpression = function(breakOnInfix, breakOnTokenText) { var body = []; // Keep adding atoms to the body until we can't parse any more atoms (either // we reached the end, a }, or a \right) while (true) { var lex = this.nextToken; - var pos = this.pos; if (endOfExpression.indexOf(lex.text) !== -1) { break; } - if (breakOnToken && lex.text === breakOnToken) { + if (breakOnTokenText && lex.text === breakOnTokenText) { + break; + } + if (breakOnInfix && functions[lex.text] && functions[lex.text].infix) { break; } var atom = this.parseAtom(); @@ -149,19 +158,11 @@ Parser.prototype.parseExpression = function(breakOnInfix, breakOnToken) { if (!this.settings.throwOnError && lex.text[0] === "\\") { var errorNode = this.handleUnsupportedCmd(); body.push(errorNode); - - pos = lex.position; continue; } break; } - if (breakOnInfix && atom.type === "infix") { - // rewind so we can parse the infix atom again - this.pos = pos; - this.nextToken = lex; - break; - } body.push(atom); } return this.handleInfixNodes(body); @@ -184,8 +185,9 @@ Parser.prototype.handleInfixNodes = function(body) { var node = body[i]; if (node.type === "infix") { if (overIndex !== -1) { - throw new ParseError("only one infix operator per group", - this.lexer, -1); + throw new ParseError( + "only one infix operator per group", + node.value.token); } overIndex = i; funcName = node.value.replaceWith; @@ -226,8 +228,8 @@ var SUPSUB_GREEDINESS = 1; * Handle a subscript or superscript with nice errors. */ Parser.prototype.handleSupSubscript = function(name) { - var symbol = this.nextToken.text; - var symPos = this.pos; + var symbolToken = this.nextToken; + var symbol = symbolToken.text; this.consume(); var group = this.parseGroup(); @@ -237,8 +239,7 @@ Parser.prototype.handleSupSubscript = function(name) { } else { throw new ParseError( "Expected group after '" + symbol + "'", - this.lexer, - symPos + 1 + symbolToken ); } } else if (group.isFunction) { @@ -250,8 +251,7 @@ Parser.prototype.handleSupSubscript = function(name) { } else { throw new ParseError( "Got function '" + group.result + "' with no arguments " + - "as " + name, - this.lexer, symPos + 1); + "as " + name, symbolToken); } } else { return group.result; @@ -319,7 +319,7 @@ Parser.prototype.parseAtom = function() { if (!base || base.type !== "op") { throw new ParseError( "Limit controls must follow a math operator", - this.lexer, this.pos); + lex); } else { var limits = lex.text === "\\limits"; base.value.limits = limits; @@ -329,15 +329,13 @@ Parser.prototype.parseAtom = function() { } else if (lex.text === "^") { // We got a superscript start if (superscript) { - throw new ParseError( - "Double superscript", this.lexer, this.pos); + throw new ParseError("Double superscript", lex); } superscript = this.handleSupSubscript("superscript"); } else if (lex.text === "_") { // We got a subscript start if (subscript) { - throw new ParseError( - "Double subscript", this.lexer, this.pos); + throw new ParseError("Double subscript", lex); } subscript = this.handleSupSubscript("subscript"); } else if (lex.text === "'") { @@ -427,8 +425,7 @@ Parser.prototype.parseImplicitGroup = function() { var envName = begin.value.name; if (!environments.hasOwnProperty(envName)) { throw new ParseError( - "No such environment: " + envName, - this.lexer, begin.value.namepos); + "No such environment: " + envName, begin.value.nameGroup); } // Build the environment object. Arguments and other information will // be made available to the begin and end methods using properties. @@ -438,19 +435,17 @@ Parser.prototype.parseImplicitGroup = function() { mode: this.mode, envName: envName, parser: this, - lexer: this.lexer, positions: args.pop(), }; var result = env.handler(context, args); this.expect("\\end", false); + var endNameToken = this.nextToken; var end = this.parseFunction(); if (end.value.name !== envName) { throw new ParseError( "Mismatch: \\begin{" + envName + "} matched " + "by \\end{" + end.value.name + "}", - this.lexer /* , end.value.namepos */); - // TODO: Add position to the above line and adjust test case, - // requires #385 to get merged first + endNameToken); } result.position = end.position; return result; @@ -497,11 +492,12 @@ Parser.prototype.parseFunction = function(baseGroup) { if (this.mode === "text" && !funcData.allowedInText) { throw new ParseError( "Can't use function '" + func + "' in text mode", - this.lexer, baseGroup.position); + baseGroup.token); } var args = this.parseArguments(func, funcData); - var result = this.callFunction(func, args, args.pop()); + var token = baseGroup.token; + var result = this.callFunction(func, args, args.pop(), token); return new ParseNode(result.type, result, this.mode); } else { return baseGroup.result; @@ -514,12 +510,12 @@ Parser.prototype.parseFunction = function(baseGroup) { /** * Call a function handler with a suitable context and arguments. */ -Parser.prototype.callFunction = function(name, args, positions) { +Parser.prototype.callFunction = function(name, args, positions, token) { var context = { funcName: name, parser: this, - lexer: this.lexer, positions: positions, + token: token, }; return functions[name].handler(context, args); }; @@ -542,11 +538,12 @@ Parser.prototype.parseArguments = function(func, funcData) { var args = []; for (var i = 0; i < totalArgs; i++) { + var nextToken = this.nextToken; var argType = funcData.argTypes && funcData.argTypes[i]; var arg; if (i < funcData.numOptionalArgs) { if (argType) { - arg = this.parseSpecialGroup(argType, true); + arg = this.parseGroupOfType(argType, true); } else { arg = this.parseOptionalGroup(); } @@ -557,7 +554,7 @@ Parser.prototype.parseArguments = function(func, funcData) { } } else { if (argType) { - arg = this.parseSpecialGroup(argType); + arg = this.parseGroupOfType(argType); } else { arg = this.parseGroup(); } @@ -569,8 +566,7 @@ Parser.prototype.parseArguments = function(func, funcData) { false); } else { throw new ParseError( - "Expected group after '" + func + "'", - this.lexer, this.pos); + "Expected group after '" + func + "'", nextToken); } } } @@ -583,8 +579,7 @@ Parser.prototype.parseArguments = function(func, funcData) { } else { throw new ParseError( "Got function '" + arg.result + "' as " + - "argument to '" + func + "'", - this.lexer, this.pos - 1); + "argument to '" + func + "'", nextToken); } } else { argNode = arg.result; @@ -600,64 +595,117 @@ Parser.prototype.parseArguments = function(func, funcData) { /** - * Parses a group when the mode is changing. Takes a position, a new mode, and - * an outer mode that is used to parse the outside. + * Parses a group when the mode is changing. * * @return {?ParseFuncOrArgument} */ -Parser.prototype.parseSpecialGroup = function(innerMode, optional) { +Parser.prototype.parseGroupOfType = function(innerMode, optional) { var outerMode = this.mode; // Handle `original` argTypes if (innerMode === "original") { innerMode = outerMode; } - if (innerMode === "color" || innerMode === "size") { - // color and size modes are special because they should have braces and - // should only lex a single symbol inside - var openBrace = this.nextToken; - if (optional && openBrace.text !== "[") { - // optional arguments should return null if they don't exist - return null; - } - // The call to expect will lex the token after the '{' in inner mode - this.mode = innerMode; - this.expect(optional ? "[" : "{"); - var inner = this.nextToken; - this.mode = outerMode; - var data; - if (innerMode === "color") { - data = inner.text; - } else { - data = inner.data; - } - this.consume(); // consume the token stored in inner - this.expect(optional ? "]" : "}"); - return new ParseFuncOrArgument( - new ParseNode(innerMode, data, outerMode), - false); - } else if (innerMode === "text") { - // text mode is special because it should ignore the whitespace before - // it - var whitespace = this.lexer.lex(this.pos, "whitespace"); - this.pos = whitespace.position; + if (innerMode === "color") { + return this.parseColorGroup(optional); + } + if (innerMode === "size") { + return this.parseSizeGroup(optional); } + this.switchMode(innerMode); + if (innerMode === "text") { + // text mode is special because it should ignore the whitespace before + // it + while (this.nextToken.text === " ") { + this.consume(); + } + } // By the time we get here, innerMode is one of "text" or "math". // We switch the mode of the parser, recurse, then restore the old mode. - this.mode = innerMode; - this.nextToken = this.lexer.lex(this.pos, innerMode); var res; if (optional) { res = this.parseOptionalGroup(); } else { res = this.parseGroup(); } - this.mode = outerMode; - this.nextToken = this.lexer.lex(this.pos, outerMode); + this.switchMode(outerMode); return res; }; +/** + * Parses a group, essentially returning the string formed by the + * brace-enclosed tokens plus some position information. + * + * @param {string} modeName Used to describe the mode in error messages + * @param {boolean} optional Whether the group is optional or required + */ +Parser.prototype.parseStringGroup = function(modeName, optional) { + if (optional && this.nextToken.text !== "[") { + return null; + } + var outerMode = this.mode; + this.mode = "text"; + this.expect(optional ? "[" : "{"); + var str = ""; + var firstToken = this.nextToken; + var lastToken = firstToken; + while (this.nextToken.text !== (optional ? "]" : "}")) { + if (this.nextToken.text === "EOF") { + throw new ParseError( + "Unexpected end of input in " + modeName, + firstToken.range(this.nextToken, str)); + } + lastToken = this.nextToken; + str += lastToken.text; + this.consume(); + } + this.mode = outerMode; + this.expect(optional ? "]" : "}"); + return firstToken.range(lastToken, str); +}; + +/** + * Parses a color description. + */ +Parser.prototype.parseColorGroup = function(optional) { + var res = this.parseStringGroup("color", optional); + if (!res) { + return null; + } + var match = (/^(#[a-z0-9]+|[a-z]+)$/i).exec(res.text); + if (!match) { + throw new ParseError("Invalid color: '" + res.text + "'", res); + } + return new ParseFuncOrArgument( + new ParseNode("color", match[0], this.mode), + false); +}; + +/** + * Parses a size specification, consisting of magnitude and unit. + */ +Parser.prototype.parseSizeGroup = function(optional) { + var res = this.parseStringGroup("size", optional); + if (!res) { + return null; + } + var match = (/(-?) *(\d+(?:\.\d*)?|\.\d+) *([a-z]{2})/).exec(res.text); + if (!match) { + throw new ParseError("Invalid size: '" + res.text + "'", res); + } + var data = { + number: +(match[1] + match[2]), // sign + magnitude, cast to number + unit: match[3], + }; + if (data.unit !== "em" && data.unit !== "ex") { + throw new ParseError("Invalid unit: '" + data.unit + "'", res); + } + return new ParseFuncOrArgument( + new ParseNode("color", data, this.mode), + false); +}; + /** * Parses a group, which is either a single nucleus (like "x") or an expression * in braces (like "{x+y}") @@ -665,15 +713,18 @@ Parser.prototype.parseSpecialGroup = function(innerMode, optional) { * @return {?ParseFuncOrArgument} */ Parser.prototype.parseGroup = function() { + var firstToken = this.nextToken; // Try to parse an open brace if (this.nextToken.text === "{") { // If we get a brace, parse an expression this.consume(); var expression = this.parseExpression(false); + var lastToken = this.nextToken; // Make sure we get a close brace this.expect("}"); return new ParseFuncOrArgument( - new ParseNode("ordgroup", expression, this.mode), + new ParseNode("ordgroup", expression, this.mode, + firstToken, lastToken), false); } else { // Otherwise, just return a nucleus @@ -687,15 +738,18 @@ Parser.prototype.parseGroup = function() { * @return {?ParseFuncOrArgument} */ Parser.prototype.parseOptionalGroup = function() { + var firstToken = this.nextToken; // Try to parse an open bracket if (this.nextToken.text === "[") { // If we get a brace, parse an expression this.consume(); var expression = this.parseExpression(false, "]"); + var lastToken = this.nextToken; // Make sure we get a close bracket this.expect("]"); return new ParseFuncOrArgument( - new ParseNode("ordgroup", expression, this.mode), + new ParseNode("ordgroup", expression, this.mode, + firstToken, lastToken), false); } else { // Otherwise, return null, @@ -718,15 +772,15 @@ Parser.prototype.parseSymbol = function() { // say that it is a function. return new ParseFuncOrArgument( nucleus.text, - true); + true, nucleus); } else if (symbols[this.mode][nucleus.text]) { this.consume(); // Otherwise if this is a no-argument function, find the type it // corresponds to in the symbols map return new ParseFuncOrArgument( new ParseNode(symbols[this.mode][nucleus.text].group, - nucleus.text, this.mode), - false); + nucleus.text, this.mode, nucleus), + false, nucleus); } else { return null; } diff --git a/src/Settings.js b/src/Settings.js index 644014504..b6b96595b 100644 --- a/src/Settings.js +++ b/src/Settings.js @@ -23,6 +23,7 @@ function Settings(options) { this.displayMode = get(options.displayMode, false); this.throwOnError = get(options.throwOnError, true); this.errorColor = get(options.errorColor, "#cc0000"); + this.macros = options.macros || {}; } module.exports = Settings; diff --git a/src/environments.js b/src/environments.js index f0fff42a5..8a23a3dd2 100644 --- a/src/environments.js +++ b/src/environments.js @@ -28,10 +28,8 @@ function parseArray(parser, result) { row = []; body.push(row); } else { - // TODO: Clean up the following hack once #385 got merged - var pos = Math.min(parser.pos + 1, parser.lexer._input.length); throw new ParseError("Expected & or \\\\ or \\end", - parser.lexer, pos); + parser.nextToken); } } result.body = body; @@ -106,7 +104,7 @@ defineEnvironment("array", { } throw new ParseError( "Unknown column alignment: " + node.value, - context.lexer, context.positions[1]); + node); }); var res = { type: "array", diff --git a/src/functions.js b/src/functions.js index c236738e8..788824243 100644 --- a/src/functions.js +++ b/src/functions.js @@ -55,6 +55,7 @@ var ParseError = require("./ParseError"); * should parse. If the optional arguments aren't found, * `null` will be passed to the handler in their place. * (default 0) + * - infix: (optional) Must be true if the function is an infix operator. * * The last argument is that implementation, the handler for the function(s). * It is called to handle these functions and their arguments. @@ -91,6 +92,7 @@ function defineFunction(names, props, handler) { greediness: (props.greediness === undefined) ? 1 : props.greediness, allowedInText: !!props.allowedInText, numOptionalArgs: props.numOptionalArgs || 0, + infix: !!props.infix, handler: handler, }; for (var i = 0; i < names.length; ++i) { @@ -456,8 +458,7 @@ defineFunction([ if (!utils.contains(delimiters, delim.value)) { throw new ParseError( "Invalid delimiter: '" + delim.value + "' after '" + - context.funcName + "'", - context.lexer, context.positions[1]); + context.funcName + "'", delim); } // \left and \right are caught somewhere in Parser.js, which is @@ -536,6 +537,7 @@ defineFunction([ // Infix generalized fractions defineFunction(["\\over", "\\choose"], { numArgs: 0, + infix: true, }, function(context) { var replaceWith; switch (context.funcName) { @@ -551,6 +553,7 @@ defineFunction(["\\over", "\\choose"], { return { type: "infix", replaceWith: replaceWith, + token: context.token, }; }); @@ -574,9 +577,7 @@ defineFunction(["\\begin", "\\end"], { }, function(context, args) { var nameGroup = args[0]; if (nameGroup.type !== "ordgroup") { - throw new ParseError( - "Invalid environment name", - context.lexer, context.positions[1]); + throw new ParseError("Invalid environment name", nameGroup); } var name = ""; for (var i = 0; i < nameGroup.value.length; ++i) { @@ -585,6 +586,6 @@ defineFunction(["\\begin", "\\end"], { return { type: "environment", name: name, - namepos: context.positions[1], + nameGroup: nameGroup, }; }); diff --git a/src/parseData.js b/src/parseData.js index be8dd67f4..61ebd069a 100644 --- a/src/parseData.js +++ b/src/parseData.js @@ -1,10 +1,29 @@ /** * The resulting parse tree nodes of the parse tree. + * + * It is possible to provide position information, so that a ParseNode can + * fulfil a role similar to a Token in error reporting. + * For details on the corresponding properties see Token constructor. + * Providing such information can lead to better error reporting. + * + * @param {string} type type of node, like e.g. "ordgroup" + * @param {?object} value type-specific representation of the node + * @param {string} mode parse mode in action for this node, + * "math" or "text" + * @param {Token=} firstToken first token of the input for this node, + * will omit position information if unset + * @param {Token=} lastToken last token of the input for this node, + * will default to firstToken if unset */ -function ParseNode(type, value, mode) { +function ParseNode(type, value, mode, firstToken, lastToken) { this.type = type; this.value = value; this.mode = mode; + if (firstToken && (!lastToken || lastToken.lexer === firstToken.lexer)) { + this.lexer = firstToken.lexer; + this.start = firstToken.start; + this.end = (lastToken || firstToken).end; + } } module.exports = { diff --git a/static/main.js b/static/main.js index 49dbcc5fd..7df2b327c 100644 --- a/static/main.js +++ b/static/main.js @@ -10,13 +10,9 @@ function init() { } if ("addEventListener" in permalink) { - permalink.addEventListener("click", function() { - window.location.search = "?text=" + encodeURIComponent(input.value); - }); + permalink.addEventListener("click", setSearch); } else { - permalink.attachEvent("click", function() { - window.location.search = "?text=" + encodeURIComponent(input.value); - }); + permalink.attachEvent("click", setSearch); } var match = (/(?:^\?|&)text=([^&]*)/).exec(window.location.search); @@ -24,11 +20,26 @@ function init() { input.value = decodeURIComponent(match[1]); } + var macros = {}; + var options = {}; + var macroRegex = /(?:^\?|&)(?:\\|%5[Cc])([A-Za-z]+)=([^&]*)/g; + var macroString = ""; + while ((match = macroRegex.exec(window.location.search)) !== null) { + options.macros = macros; + macros["\\" + match[1]] = decodeURIComponent(match[2]); + macroString += "&" + match[0].substr(1); + } + reprocess(); + function setSearch() { + window.location.search = + "?text=" + encodeURIComponent(input.value) + macroString; + } + function reprocess() { try { - katex.render(input.value, math); + katex.render(input.value, math, options); } catch (e) { if (e.__proto__ == katex.ParseError.prototype) { console.error(e); diff --git a/test/errors-spec.js b/test/errors-spec.js index a583b2411..e20f3705a 100644 --- a/test/errors-spec.js +++ b/test/errors-spec.js @@ -61,16 +61,15 @@ beforeEach(function() { describe("Parser:", function() { describe("#handleInfixNodes", function() { - // TODO: The position information here is broken, should be fixed. it("rejects repeated infix operators", function() { expect("1\\over 2\\over 3").toFailWithParseError( - "only one infix operator per group at position -1: " + - "1\\over 2\\over "); + "only one infix operator per group at position 9: " + + "1\\over 2\\̲o̲v̲e̲r̲ 3"); }); it("rejects conflicting infix operators", function() { expect("1\\over 2\\choose 3").toFailWithParseError( - "only one infix operator per group at position -1: " + - "1\\over 2\\choos"); + "only one infix operator per group at position 9: " + + "1\\over 2\\̲c̲h̲o̲o̲s̲e̲ 3"); }); }); @@ -91,84 +90,85 @@ describe("Parser:", function() { }); describe("#parseAtom", function() { - // TODO: The positions in the following error messages appear to be - // off by one, i.e. they should be one character later. it("rejects \\limits without operator", function() { expect("\\alpha\\limits\\omega").toFailWithParseError( "Limit controls must follow a math operator" + - " at position 6: \\alpha̲\\limits\\omega"); + " at position 7: \\alpha\\̲l̲i̲m̲i̲t̲s̲\\omega"); }); it("rejects \\limits at the beginning of the input", function() { expect("\\limits\\omega").toFailWithParseError( "Limit controls must follow a math operator" + - " at position 0: ̲\\limits\\omega"); + " at position 1: \\̲l̲i̲m̲i̲t̲s̲\\omega"); }); it("rejects double superscripts", function() { expect("1^2^3").toFailWithParseError( - "Double superscript at position 3: 1^2̲^3"); + "Double superscript at position 4: 1^2^̲3"); expect("1^{2+3}_4^5").toFailWithParseError( - "Double superscript at position 9: 1^{2+3}_4̲^5"); + "Double superscript at position 10: 1^{2+3}_4^̲5"); }); it("rejects double subscripts", function() { expect("1_2_3").toFailWithParseError( - "Double subscript at position 3: 1_2̲_3"); + "Double subscript at position 4: 1_2_̲3"); expect("1_{2+3}^4_5").toFailWithParseError( - "Double subscript at position 9: 1_{2+3}^4̲_5"); + "Double subscript at position 10: 1_{2+3}^4_̲5"); }); }); describe("#parseImplicitGroup", function() { it("reports unknown environments", function() { expect("\\begin{foo}bar\\end{foo}").toFailWithParseError( - "No such environment: foo at position 11:" + - " \\begin{foo}̲bar\\end{foo}"); + "No such environment: foo at position 7:" + + " \\begin{̲f̲o̲o̲}̲bar\\end{foo}"); }); it("reports mismatched environments", function() { expect("\\begin{pmatrix}1&2\\\\3&4\\end{bmatrix}+5") .toFailWithParseError( - "Mismatch: \\begin{pmatrix} matched by \\end{bmatrix}"); + "Mismatch: \\begin{pmatrix} matched by \\end{bmatrix}" + + " at position 24: …matrix}1&2\\\\3&4\\̲e̲n̲d̲{bmatrix}+5"); }); }); describe("#parseFunction", function() { it("rejects math-mode functions in text mode", function() { - // TODO: The position info is missing here expect("\\text{\\sqrt2 is irrational}").toFailWithParseError( - "Can't use function '\\sqrt' in text mode"); + "Can't use function '\\sqrt' in text mode" + + " at position 7: \\text{\\̲s̲q̲r̲t̲2 is irrational…"); }); }); describe("#parseArguments", function() { it("complains about missing argument at end of input", function() { expect("2\\sqrt").toFailWithParseError( - "Expected group after '\\sqrt' at position 6: 2\\sqrt̲"); + "Expected group after '\\sqrt' at end of input: 2\\sqrt"); }); it("complains about missing argument at end of group", function() { expect("1^{2\\sqrt}").toFailWithParseError( - "Expected group after '\\sqrt' at position 9: 1^{2\\sqrt̲}"); + "Expected group after '\\sqrt'" + + " at position 10: 1^{2\\sqrt}̲"); }); it("complains about functions as arguments to others", function() { // TODO: The position looks pretty wrong here expect("\\sqrt\\over2").toFailWithParseError( "Got function '\\over' as argument to '\\sqrt'" + - " at position 9: \\sqrt\\ove̲r2"); + " at position 6: \\sqrt\\̲o̲v̲e̲r̲2"); }); }); describe("#parseArguments", function() { it("complains about missing argument at end of input", function() { expect("2\\sqrt").toFailWithParseError( - "Expected group after '\\sqrt' at position 6: 2\\sqrt̲"); + "Expected group after '\\sqrt' at end of input: 2\\sqrt"); }); it("complains about missing argument at end of group", function() { expect("1^{2\\sqrt}").toFailWithParseError( - "Expected group after '\\sqrt' at position 9: 1^{2\\sqrt̲}"); + "Expected group after '\\sqrt'" + + " at position 10: 1^{2\\sqrt}̲"); }); it("complains about functions as arguments to others", function() { // TODO: The position looks pretty wrong here expect("\\sqrt\\over2").toFailWithParseError( "Got function '\\over' as argument to '\\sqrt'" + - " at position 9: \\sqrt\\ove̲r2"); + " at position 6: \\sqrt\\̲o̲v̲e̲r̲2"); }); }); @@ -183,12 +183,12 @@ describe("Parser.expect calls:", function() { }); it("complains about extra \\end", function() { expect("x\\end{matrix}").toFailWithParseError( - "Expected 'EOF', got '\\end' at position 5:" + - " x\\end̲{matrix}"); + "Expected 'EOF', got '\\end' at position 2:" + + " x\\̲e̲n̲d̲{matrix}"); }); it("complains about top-level \\\\", function() { expect("1\\\\2").toFailWithParseError( - "Expected 'EOF', got '\\\\' at position 3: 1\\\\̲2"); + "Expected 'EOF', got '\\\\' at position 2: 1\\̲\\̲2"); }); it("complains about top-level &", function() { expect("1&2").toFailWithParseError( @@ -199,8 +199,8 @@ describe("Parser.expect calls:", function() { describe("#parseImplicitGroup expecting \\right", function() { it("rejects missing \\right", function() { expect("\\left(1+2)").toFailWithParseError( - "Expected '\\right', got 'EOF' at position 10:" + - " \\left(1+2)̲"); + "Expected '\\right', got 'EOF' at end of input:" + + " \\left(1+2)"); }); it("rejects incorrectly scoped \\right", function() { expect("{\\left(1+2}\\right)").toFailWithParseError( @@ -224,32 +224,42 @@ describe("Parser.expect calls:", function() { }); // Can't test for the [ of an optional group since it's optional it("complains about missing } for color", function() { - expect("\\color{#ffffff {text}").toFailWithParseError( - "Expected '}', got '{' at position 16:" + - " color{#ffffff {̲text}"); + expect("\\color{#ffffff{text}").toFailWithParseError( + "Invalid color: '#ffffff{text' at position 8:" + + " \\color{#̲f̲f̲f̲f̲f̲f̲{̲t̲e̲x̲t̲}"); }); it("complains about missing ] for size", function() { expect("\\rule[1em{2em}{3em}").toFailWithParseError( - "Expected ']', got '{' at position 10:" + - " \\rule[1em{̲2em}{3em}"); + "Unexpected end of input in size" + + " at position 7: \\rule[1̲e̲m̲{̲2̲e̲m̲}̲{̲3̲e̲m̲}̲"); + }); + it("complains about missing ] for size at end of input", function() { + expect("\\rule[1em").toFailWithParseError( + "Unexpected end of input in size" + + " at position 7: \\rule[1̲e̲m̲"); + }); + it("complains about missing } for color at end of input", function() { + expect("\\color{#123456").toFailWithParseError( + "Unexpected end of input in color" + + " at position 8: \\color{#̲1̲2̲3̲4̲5̲6̲"); }); }); describe("#parseGroup expecting }", function() { it("at end of file", function() { expect("\\sqrt{2").toFailWithParseError( - "Expected '}', got 'EOF' at position 7: \\sqrt{2̲"); + "Expected '}', got 'EOF' at end of input: \\sqrt{2"); }); }); describe("#parseOptionalGroup expecting ]", function() { it("at end of file", function() { expect("\\sqrt[3").toFailWithParseError( - "Expected ']', got 'EOF' at position 7: \\sqrt[3̲"); + "Expected ']', got 'EOF' at end of input: \\sqrt[3"); }); it("before group", function() { expect("\\sqrt[3{2}").toFailWithParseError( - "Expected ']', got 'EOF' at position 10: \\sqrt[3{2}̲"); + "Expected ']', got 'EOF' at end of input: \\sqrt[3{2}"); }); }); @@ -260,13 +270,13 @@ describe("environments.js:", function() { describe("parseArray", function() { it("rejects missing \\end", function() { expect("\\begin{matrix}1").toFailWithParseError( - "Expected & or \\\\ or \\end at position 15:" + - " \\begin{matrix}1̲"); + "Expected & or \\\\ or \\end at end of input:" + + " \\begin{matrix}1"); }); it("rejects incorrectly scoped \\end", function() { expect("{\\begin{matrix}1}\\end{matrix}").toFailWithParseError( "Expected & or \\\\\ or \\end at position 17:" + - " begin{matrix}1}̲\\end{matrix}"); + " …\\begin{matrix}1}̲\\end{matrix}"); }); }); @@ -274,8 +284,8 @@ describe("environments.js:", function() { it("rejects unknown column types", function() { // TODO: The error position here looks strange expect("\\begin{array}{cba}\\end{array}").toFailWithParseError( - "Unknown column alignment: b at position 18:" + - " gin{array}{cba}̲\\end{array}"); + "Unknown column alignment: b at position 16:" + + " \\begin{array}{cb̲a}\\end{array}"); }); }); @@ -298,9 +308,8 @@ describe("functions.js:", function() { describe("\\begin and \\end", function() { it("reject invalid environment names", function() { - expect("\\begin{foobar}\\end{foobar}").toFailWithParseError( - "No such environment: foobar at position 14:" + - " \\begin{foobar}̲\\end{foobar}"); + expect("\\begin x\\end y").toFailWithParseError( + "Invalid environment name at position 8: \\begin x̲\\end y"); }); }); @@ -311,34 +320,35 @@ describe("Lexer:", function() { describe("#_innerLex", function() { it("rejects lone surrogate char", function() { expect("\udcba").toFailWithParseError( - "Unexpected character: '\udcba' at position 0:" + - " \u0332\udcba"); + "Unexpected character: '\udcba' at position 1:" + + " \udcba\u0332"); }); it("rejects lone backslash at end of input", function() { expect("\\").toFailWithParseError( - "Unexpected character: '\\' at position 0: ̲\\"); + "Unexpected character: '\\' at position 1: \\̲"); }); }); describe("#_innerLexColor", function() { it("reject hex notation without #", function() { expect("\\color{1a2b3c}{foo}").toFailWithParseError( - "Invalid color at position 7: \\color{̲1a2b3c}{foo}"); + "Invalid color: '1a2b3c'" + + " at position 8: \\color{1̲a̲2̲b̲3̲c̲}{foo}"); }); }); describe("#_innerLexSize", function() { it("reject size without unit", function() { expect("\\rule{0}{2em}").toFailWithParseError( - "Invalid size at position 6: \\rule{̲0}{2em}"); + "Invalid size: '0' at position 7: \\rule{0̲}{2em}"); }); it("reject size with bogus unit", function() { expect("\\rule{1au}{2em}").toFailWithParseError( - "Invalid unit: 'au' at position 6: \\rule{̲1au}{2em}"); + "Invalid unit: 'au' at position 7: \\rule{1̲a̲u̲}{2em}"); }); it("reject size without number", function() { expect("\\rule{em}{2em}").toFailWithParseError( - "Invalid size at position 6: \\rule{̲em}{2em}"); + "Invalid size: 'em' at position 7: \\rule{e̲m̲}{2em}"); }); }); diff --git a/test/katex-spec.js b/test/katex-spec.js index 07b7d7519..af9688d9e 100644 --- a/test/katex-spec.js +++ b/test/katex-spec.js @@ -57,8 +57,39 @@ var getParsed = function(expr, settings) { return parseTree(expr, usedSettings); }; +var stripPositions = function(expr) { + if (typeof expr !== "object" || expr === null) { + return expr; + } + if (expr.lexer && typeof expr.start === "number") { + delete expr.lexer; + delete expr.start; + delete expr.end; + } + Object.keys(expr).forEach(function(key) { + stripPositions(expr[key]); + }); + return expr; +}; + +var parseAndSetResult = function(expr, result, settings) { + try { + return parseTree(expr, settings || defaultSettings); + } catch (e) { + result.pass = false; + if (e instanceof ParseError) { + result.message = "'" + expr + "' failed " + + "parsing with error: " + e.message; + } else { + result.message = "'" + expr + "' failed " + + "parsing with unknown error: " + e.message; + } + } +}; + beforeEach(function() { jasmine.addMatchers({ + toParse: function() { return { compare: function(actual, settings) { @@ -68,20 +99,7 @@ beforeEach(function() { pass: true, message: "'" + actual + "' succeeded parsing", }; - - try { - parseTree(actual, usedSettings); - } catch (e) { - result.pass = false; - if (e instanceof ParseError) { - result.message = "'" + actual + "' failed " + - "parsing with error: " + e.message; - } else { - result.message = "'" + actual + "' failed " + - "parsing with unknown error: " + e.message; - } - } - + parseAndSetResult(actual, result, usedSettings); return result; }, }; @@ -145,6 +163,36 @@ beforeEach(function() { }, }; }, + + toParseLike: function(util, baton) { + return { + compare: function(actual, expected) { + var result = { + pass: true, + message: "Parse trees of '" + actual + + "' and '" + expected + "' are equivalent", + }; + + var actualTree = parseAndSetResult(actual, result); + if (!actualTree) { + return result; + } + var expectedTree = parseAndSetResult(expected, result); + if (!expectedTree) { + return result; + } + stripPositions(actualTree); + stripPositions(expectedTree); + if (!util.equals(actualTree, expectedTree, baton)) { + result.pass = false; + result.message = "Parse trees of '" + actual + + "' and '" + expected + "' are not equivalent"; + } + return result; + }, + }; + }, + }); }); @@ -154,8 +202,8 @@ describe("A parser", function() { }); it("should ignore whitespace", function() { - var parseA = getParsed(" x y "); - var parseB = getParsed("xy"); + var parseA = stripPositions(getParsed(" x y ")); + var parseB = stripPositions(getParsed("xy")); expect(parseA).toEqual(parseB); }); }); @@ -340,8 +388,8 @@ describe("A subscript and superscript parser", function() { }); it("should produce the same thing regardless of order", function() { - var parseA = getParsed("x^2_3"); - var parseB = getParsed("x_3^2"); + var parseA = stripPositions(getParsed("x^2_3")); + var parseB = stripPositions(getParsed("x_3^2")); expect(parseA).toEqual(parseB); }); @@ -623,6 +671,13 @@ describe("An over parser", function() { expect(parse.value.denom).toBeDefined(); }); + it("should handle \\textstyle correctly", function() { + expect("\\textstyle 1 \\over 2") + .toParseLike("\\frac{\\textstyle 1}{2}"); + expect("{\\textstyle 1} \\over 2") + .toParseLike("\\frac{\\textstyle 1}{2}"); + }); + it("should handle nested factions", function() { var nestedOverExpression = "{1 \\over 2} \\over 3"; var parse = getParsed(nestedOverExpression)[0]; @@ -1523,7 +1578,7 @@ describe("A markup generator", function() { describe("A parse tree generator", function() { it("generates a tree", function() { - var tree = katex.__parse("\\sigma^2"); + var tree = stripPositions(katex.__parse("\\sigma^2")); expect(JSON.stringify(tree)).toEqual(JSON.stringify([ { "type": "supsub", @@ -1802,3 +1857,24 @@ describe("The symbol table integraty", function() { .toEqual(getBuilt("\\left\\lt\\frac{1}{x}\\right\\gt")); }); }); + +describe("A macro expander", function() { + + var compareParseTree = function(actual, expected, macros) { + var settings = new Settings({macros: macros}); + actual = stripPositions(parseTree(actual, settings)); + expected = stripPositions(parseTree(expected, defaultSettings)); + expect(actual).toEqual(expected); + }; + + it("should produce individual tokens", function() { + compareParseTree("e^\\foo", "e^1 23", {"\\foo": "123"}); + }); + + it("should allow for multiple expansion", function() { + compareParseTree("1\\foo2", "1aa2", { + "\\foo": "\\bar\\bar", + "\\bar": "a", + }); + }); +});