Allow macro definitions in settings (#493)

* Introduce MacroExpander

The job of the MacroExpander is turning a stream of possibly expandable
tokens, as obtained from the Lexer, into a stream of non-expandable tokens
(in KaTeX, even though they may well be expandable in TeX) which can be
processed by the Parser.  The challenge here is that we don't have
mode-specific lexer implementations any more, so we need to do everything on
the token level, including reassembly of sizes and colors.

* Make macros available in development server

Now one can specify macro definitions like \foo=bar as part of the query
string and use these macros in the formula being typeset.

* Add tests for macro expansions

* Handle end of input in special groups

This avoids an infinite loop if input ends prematurely.

* Simplify parseSpecialGroup

The parseSpecialGroup methos now returns a single token spanning the whole
special group, and leaves matching that string against a suitable regular
expression to whoever is calling the method.  Suggested by @cbreeden.

* Incorporate review suggestions

Add improvements suggested by Kevin Barabash during review.

* Input range sanity checks

Ensure that both tokens of a token range come from the same lexer,
and that the range has a non-negative length.

* Improved wording of two comments
This commit is contained in:
Martin von Gagern 2016-07-08 21:24:31 +02:00 committed by Kevin Barabash
parent b49eee4de7
commit 8c55aed39a
11 changed files with 509 additions and 296 deletions

View File

@ -17,16 +17,50 @@ var ParseError = require("./ParseError");
// The main lexer class
function Lexer(input) {
this._input = input;
this.input = input;
this.pos = 0;
}
// The resulting token returned from `lex`.
function Token(text, data, position) {
/**
* The resulting token returned from `lex`.
*
* It consists of the token text plus some position information.
* The position information is essentially a range in an input string,
* but instead of referencing the bare input string, we refer to the lexer.
* That way it is possible to attach extra metadata to the input string,
* like for example a file name or similar.
*
* The position information (all three parameters) is optional,
* so it is OK to construct synthetic tokens if appropriate.
* Not providing available position information may lead to
* degraded error reporting, though.
*
* @param {string} text the text of this token
* @param {number=} start the start offset, zero-based inclusive
* @param {number=} end the end offset, zero-based exclusive
* @param {Lexer=} lexer the lexer which in turn holds the input string
*/
function Token(text, start, end, lexer) {
this.text = text;
this.data = data;
this.position = position;
this.start = start;
this.end = end;
this.lexer = lexer;
}
/**
* Given a pair of tokens (this and endToken), compute a Token encompassing
* the whole input range enclosed by these two.
*
* @param {Token} endToken last token of the range, inclusive
* @param {string} text the text of the newly constructed token
*/
Token.prototype.range = function(endToken, text) {
if (endToken.lexer !== this.lexer) {
return new Token(text); // sorry, no position information available
}
return new Token(text, this.start, endToken.end, this.lexer);
};
/* The following tokenRegex
* - matches typical whitespace (but not NBSP etc.) using its first group
* - matches symbol combinations which result in a single output character
@ -52,111 +86,26 @@ var tokenRegex = new RegExp(
")"
);
var whitespaceRegex = /\s*/;
/**
* This function lexes a single normal token. It takes a position and
* whether it should completely ignore whitespace or not.
* This function lexes a single token.
*/
Lexer.prototype._innerLex = function(pos, ignoreWhitespace) {
var input = this._input;
Lexer.prototype.lex = function() {
var input = this.input;
var pos = this.pos;
if (pos === input.length) {
return new Token("EOF", null, pos);
return new Token("EOF", pos, pos, this);
}
var match = matchAt(tokenRegex, input, pos);
if (match === null) {
throw new ParseError(
"Unexpected character: '" + input[pos] + "'",
this, pos);
} else if (match[2]) { // matched non-whitespace
return new Token(match[2], null, pos + match[2].length);
} else if (ignoreWhitespace) {
return this._innerLex(pos + match[1].length, true);
} else { // concatenate whitespace to a single space
return new Token(" ", null, pos + match[1].length);
}
};
// A regex to match a CSS color (like #ffffff or BlueViolet)
var cssColor = /#[a-z0-9]+|[a-z]+/i;
/**
* This function lexes a CSS color.
*/
Lexer.prototype._innerLexColor = function(pos) {
var input = this._input;
// Ignore whitespace
var whitespace = matchAt(whitespaceRegex, input, pos)[0];
pos += whitespace.length;
var match;
if ((match = matchAt(cssColor, input, pos))) {
// If we look like a color, return a color
return new Token(match[0], null, pos + match[0].length);
} else {
throw new ParseError("Invalid color", this, pos);
}
};
// A regex to match a dimension. Dimensions look like
// "1.2em" or ".4pt" or "1 ex"
var sizeRegex = /(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/;
/**
* This function lexes a dimension.
*/
Lexer.prototype._innerLexSize = function(pos) {
var input = this._input;
// Ignore whitespace
var whitespace = matchAt(whitespaceRegex, input, pos)[0];
pos += whitespace.length;
var match;
if ((match = matchAt(sizeRegex, input, pos))) {
var unit = match[3];
// We only currently handle "em" and "ex" units
if (unit !== "em" && unit !== "ex") {
throw new ParseError("Invalid unit: '" + unit + "'", this, pos);
}
return new Token(match[0], {
number: +(match[1] + match[2]),
unit: unit,
}, pos + match[0].length);
}
throw new ParseError("Invalid size", this, pos);
};
/**
* This function lexes a string of whitespace.
*/
Lexer.prototype._innerLexWhitespace = function(pos) {
var input = this._input;
var whitespace = matchAt(whitespaceRegex, input, pos)[0];
pos += whitespace.length;
return new Token(whitespace[0], null, pos);
};
/**
* This function lexes a single token starting at `pos` and of the given mode.
* Based on the mode, we defer to one of the `_innerLex` functions.
*/
Lexer.prototype.lex = function(pos, mode) {
if (mode === "math") {
return this._innerLex(pos, true);
} else if (mode === "text") {
return this._innerLex(pos, false);
} else if (mode === "color") {
return this._innerLexColor(pos);
} else if (mode === "size") {
return this._innerLexSize(pos);
} else if (mode === "whitespace") {
return this._innerLexWhitespace(pos);
new Token(input[pos], pos, pos + 1, this));
}
var text = match[2] || " ";
var start = this.pos;
this.pos += match[0].length;
var end = this.pos;
return new Token(text, start, end, this);
};
module.exports = Lexer;

70
src/MacroExpander.js Normal file
View File

@ -0,0 +1,70 @@
/**
* This file contains the gullet where macros are expanded
* until only non-macro tokens remain.
*/
var Lexer = require("./Lexer");
function MacroExpander(input, macros) {
this.lexer = new Lexer(input);
this.macros = macros;
this.stack = []; // contains tokens in REVERSE order
this.discardedWhiteSpace = [];
}
/**
* Recursively expand first token, then return first non-expandable token.
*/
MacroExpander.prototype.nextToken = function() {
for (;;) {
if (this.stack.length === 0) {
this.stack.push(this.lexer.lex());
}
var topToken = this.stack.pop();
var name = topToken.text;
if (!(name.charAt(0) === "\\" && this.macros.hasOwnProperty(name))) {
return topToken;
}
var expansion = this.macros[name];
if (typeof expansion === "string") {
var bodyLexer = new Lexer(expansion);
expansion = [];
var tok = bodyLexer.lex();
while (tok.text !== "EOF") {
expansion.push(tok);
tok = bodyLexer.lex();
}
expansion.reverse(); // to fit in with stack using push and pop
this.macros[name] = expansion;
}
this.stack = this.stack.concat(expansion);
}
};
MacroExpander.prototype.get = function(ignoreSpace) {
this.discardedWhiteSpace = [];
var token = this.nextToken();
if (ignoreSpace) {
while (token.text === " ") {
this.discardedWhiteSpace.push(token);
token = this.nextToken();
}
}
return token;
};
/**
* Undo the effect of the preceding call to the get method.
* A call to this method MUST be immediately preceded and immediately followed
* by a call to get. Only used during mode switching, i.e. after one token
* was got in the old mode but should get got again in a new mode
* with possibly different whitespace handling.
*/
MacroExpander.prototype.unget = function(token) {
this.stack.push(token);
while (this.discardedWhiteSpace.length !== 0) {
this.stack.push(this.discardedWhiteSpace.pop());
}
};
module.exports = MacroExpander;

View File

@ -2,26 +2,50 @@
* This is the ParseError class, which is the main error thrown by KaTeX
* functions when something has gone wrong. This is used to distinguish internal
* errors from errors in the expression that the user provided.
*
* If possible, a caller should provide a Token or ParseNode with information
* about where in the source string the problem occurred.
*
* @param {string} message The error message
* @param {(Token|ParseNode)=} token An object providing position information
*/
function ParseError(message, lexer, position) {
function ParseError(message, token) {
var error = "KaTeX parse error: " + message;
var start;
var end;
if (lexer !== undefined && position !== undefined) {
if (token && token.lexer && token.start <= token.end) {
// If we have the input and a position, make the error a bit fancier
// Prepend some information
error += " at position " + position + ": ";
// Get the input
var input = lexer._input;
// Insert a combining underscore at the correct position
input = input.slice(0, position) + "\u0332" +
input.slice(position);
var input = token.lexer.input;
// Prepend some information
start = token.start;
end = token.end;
if (start === input.length) {
error += " at end of input: ";
} else {
error += " at position " + (start + 1) + ": ";
}
// Underline token in question using combining underscores
var underlined = input.slice(start, end).replace(/[^]/g, "$&\u0332");
// Extract some context from the input and add it to the error
var begin = Math.max(0, position - 15);
var end = position + 15;
error += input.slice(begin, end);
var left;
if (start > 15) {
left = "…" + input.slice(start - 15, start);
} else {
left = input.slice(0, start);
}
var right;
if (end + 15 < input.length) {
right = input.slice(end, end + 15) + "…";
} else {
right = input.slice(end);
}
error += left + underlined + right;
}
// Some hackery to make ParseError a prototype of Error
@ -30,7 +54,7 @@ function ParseError(message, lexer, position) {
self.name = "ParseError";
self.__proto__ = ParseError.prototype;
self.position = position;
self.position = start;
return self;
}

View File

@ -1,7 +1,7 @@
/* eslint no-constant-condition:0 */
var functions = require("./functions");
var environments = require("./environments");
var Lexer = require("./Lexer");
var MacroExpander = require("./MacroExpander");
var symbols = require("./symbols");
var utils = require("./utils");
@ -46,8 +46,9 @@ var ParseError = require("./ParseError");
* Main Parser class
*/
function Parser(input, settings) {
// Make a new lexer
this.lexer = new Lexer(input);
// Create a new macro expander (gullet) and (indirectly via that) also a
// new lexer (mouth) for this parser (stomach, in the language of TeX)
this.gullet = new MacroExpander(input, settings.macros);
// Store the settings for use in parsing
this.settings = settings;
}
@ -58,10 +59,11 @@ var ParseNode = parseData.ParseNode;
* An initial function (without its arguments), or an argument to a function.
* The `result` argument should be a ParseNode.
*/
function ParseFuncOrArgument(result, isFunction) {
function ParseFuncOrArgument(result, isFunction, token) {
this.result = result;
// Is this a function (i.e. is it something defined in functions.js)?
this.isFunction = isFunction;
this.token = token;
}
/**
@ -75,7 +77,7 @@ Parser.prototype.expect = function(text, consume) {
if (this.nextToken.text !== text) {
throw new ParseError(
"Expected '" + text + "', got '" + this.nextToken.text + "'",
this.lexer, this.nextToken.position
this.nextToken
);
}
if (consume !== false) {
@ -88,8 +90,13 @@ Parser.prototype.expect = function(text, consume) {
* and fetches the one after that as the new look ahead.
*/
Parser.prototype.consume = function() {
this.pos = this.nextToken.position;
this.nextToken = this.lexer.lex(this.pos, this.mode);
this.nextToken = this.gullet.get(this.mode === "math");
};
Parser.prototype.switchMode = function(newMode) {
this.gullet.unget(this.nextToken);
this.mode = newMode;
this.consume();
};
/**
@ -100,8 +107,7 @@ Parser.prototype.consume = function() {
Parser.prototype.parse = function() {
// Try to parse the input
this.mode = "math";
this.pos = 0;
this.nextToken = this.lexer.lex(this.pos, this.mode);
this.consume();
var parse = this.parseInput();
return parse;
};
@ -122,26 +128,29 @@ var endOfExpression = ["}", "\\end", "\\right", "&", "\\\\", "\\cr"];
/**
* Parses an "expression", which is a list of atoms.
*
* @param {boolean} breakOnInfix Should the parsing stop when we hit infix
* @param {boolean} breakOnInfix Should the parsing stop when we hit infix
* nodes? This happens when functions have higher precendence
* than infix nodes in implicit parses.
*
* @param {?string} breakOnToken The token that the expression should end with,
* or `null` if something else should end the expression.
* @param {?string} breakOnTokenText The text of the token that the expression
* should end with, or `null` if something else should end the
* expression.
*
* @return {ParseNode}
*/
Parser.prototype.parseExpression = function(breakOnInfix, breakOnToken) {
Parser.prototype.parseExpression = function(breakOnInfix, breakOnTokenText) {
var body = [];
// Keep adding atoms to the body until we can't parse any more atoms (either
// we reached the end, a }, or a \right)
while (true) {
var lex = this.nextToken;
var pos = this.pos;
if (endOfExpression.indexOf(lex.text) !== -1) {
break;
}
if (breakOnToken && lex.text === breakOnToken) {
if (breakOnTokenText && lex.text === breakOnTokenText) {
break;
}
if (breakOnInfix && functions[lex.text] && functions[lex.text].infix) {
break;
}
var atom = this.parseAtom();
@ -149,19 +158,11 @@ Parser.prototype.parseExpression = function(breakOnInfix, breakOnToken) {
if (!this.settings.throwOnError && lex.text[0] === "\\") {
var errorNode = this.handleUnsupportedCmd();
body.push(errorNode);
pos = lex.position;
continue;
}
break;
}
if (breakOnInfix && atom.type === "infix") {
// rewind so we can parse the infix atom again
this.pos = pos;
this.nextToken = lex;
break;
}
body.push(atom);
}
return this.handleInfixNodes(body);
@ -184,8 +185,9 @@ Parser.prototype.handleInfixNodes = function(body) {
var node = body[i];
if (node.type === "infix") {
if (overIndex !== -1) {
throw new ParseError("only one infix operator per group",
this.lexer, -1);
throw new ParseError(
"only one infix operator per group",
node.value.token);
}
overIndex = i;
funcName = node.value.replaceWith;
@ -226,8 +228,8 @@ var SUPSUB_GREEDINESS = 1;
* Handle a subscript or superscript with nice errors.
*/
Parser.prototype.handleSupSubscript = function(name) {
var symbol = this.nextToken.text;
var symPos = this.pos;
var symbolToken = this.nextToken;
var symbol = symbolToken.text;
this.consume();
var group = this.parseGroup();
@ -237,8 +239,7 @@ Parser.prototype.handleSupSubscript = function(name) {
} else {
throw new ParseError(
"Expected group after '" + symbol + "'",
this.lexer,
symPos + 1
symbolToken
);
}
} else if (group.isFunction) {
@ -250,8 +251,7 @@ Parser.prototype.handleSupSubscript = function(name) {
} else {
throw new ParseError(
"Got function '" + group.result + "' with no arguments " +
"as " + name,
this.lexer, symPos + 1);
"as " + name, symbolToken);
}
} else {
return group.result;
@ -319,7 +319,7 @@ Parser.prototype.parseAtom = function() {
if (!base || base.type !== "op") {
throw new ParseError(
"Limit controls must follow a math operator",
this.lexer, this.pos);
lex);
} else {
var limits = lex.text === "\\limits";
base.value.limits = limits;
@ -329,15 +329,13 @@ Parser.prototype.parseAtom = function() {
} else if (lex.text === "^") {
// We got a superscript start
if (superscript) {
throw new ParseError(
"Double superscript", this.lexer, this.pos);
throw new ParseError("Double superscript", lex);
}
superscript = this.handleSupSubscript("superscript");
} else if (lex.text === "_") {
// We got a subscript start
if (subscript) {
throw new ParseError(
"Double subscript", this.lexer, this.pos);
throw new ParseError("Double subscript", lex);
}
subscript = this.handleSupSubscript("subscript");
} else if (lex.text === "'") {
@ -427,8 +425,7 @@ Parser.prototype.parseImplicitGroup = function() {
var envName = begin.value.name;
if (!environments.hasOwnProperty(envName)) {
throw new ParseError(
"No such environment: " + envName,
this.lexer, begin.value.namepos);
"No such environment: " + envName, begin.value.nameGroup);
}
// Build the environment object. Arguments and other information will
// be made available to the begin and end methods using properties.
@ -438,19 +435,17 @@ Parser.prototype.parseImplicitGroup = function() {
mode: this.mode,
envName: envName,
parser: this,
lexer: this.lexer,
positions: args.pop(),
};
var result = env.handler(context, args);
this.expect("\\end", false);
var endNameToken = this.nextToken;
var end = this.parseFunction();
if (end.value.name !== envName) {
throw new ParseError(
"Mismatch: \\begin{" + envName + "} matched " +
"by \\end{" + end.value.name + "}",
this.lexer /* , end.value.namepos */);
// TODO: Add position to the above line and adjust test case,
// requires #385 to get merged first
endNameToken);
}
result.position = end.position;
return result;
@ -497,11 +492,12 @@ Parser.prototype.parseFunction = function(baseGroup) {
if (this.mode === "text" && !funcData.allowedInText) {
throw new ParseError(
"Can't use function '" + func + "' in text mode",
this.lexer, baseGroup.position);
baseGroup.token);
}
var args = this.parseArguments(func, funcData);
var result = this.callFunction(func, args, args.pop());
var token = baseGroup.token;
var result = this.callFunction(func, args, args.pop(), token);
return new ParseNode(result.type, result, this.mode);
} else {
return baseGroup.result;
@ -514,12 +510,12 @@ Parser.prototype.parseFunction = function(baseGroup) {
/**
* Call a function handler with a suitable context and arguments.
*/
Parser.prototype.callFunction = function(name, args, positions) {
Parser.prototype.callFunction = function(name, args, positions, token) {
var context = {
funcName: name,
parser: this,
lexer: this.lexer,
positions: positions,
token: token,
};
return functions[name].handler(context, args);
};
@ -542,11 +538,12 @@ Parser.prototype.parseArguments = function(func, funcData) {
var args = [];
for (var i = 0; i < totalArgs; i++) {
var nextToken = this.nextToken;
var argType = funcData.argTypes && funcData.argTypes[i];
var arg;
if (i < funcData.numOptionalArgs) {
if (argType) {
arg = this.parseSpecialGroup(argType, true);
arg = this.parseGroupOfType(argType, true);
} else {
arg = this.parseOptionalGroup();
}
@ -557,7 +554,7 @@ Parser.prototype.parseArguments = function(func, funcData) {
}
} else {
if (argType) {
arg = this.parseSpecialGroup(argType);
arg = this.parseGroupOfType(argType);
} else {
arg = this.parseGroup();
}
@ -569,8 +566,7 @@ Parser.prototype.parseArguments = function(func, funcData) {
false);
} else {
throw new ParseError(
"Expected group after '" + func + "'",
this.lexer, this.pos);
"Expected group after '" + func + "'", nextToken);
}
}
}
@ -583,8 +579,7 @@ Parser.prototype.parseArguments = function(func, funcData) {
} else {
throw new ParseError(
"Got function '" + arg.result + "' as " +
"argument to '" + func + "'",
this.lexer, this.pos - 1);
"argument to '" + func + "'", nextToken);
}
} else {
argNode = arg.result;
@ -600,64 +595,117 @@ Parser.prototype.parseArguments = function(func, funcData) {
/**
* Parses a group when the mode is changing. Takes a position, a new mode, and
* an outer mode that is used to parse the outside.
* Parses a group when the mode is changing.
*
* @return {?ParseFuncOrArgument}
*/
Parser.prototype.parseSpecialGroup = function(innerMode, optional) {
Parser.prototype.parseGroupOfType = function(innerMode, optional) {
var outerMode = this.mode;
// Handle `original` argTypes
if (innerMode === "original") {
innerMode = outerMode;
}
if (innerMode === "color" || innerMode === "size") {
// color and size modes are special because they should have braces and
// should only lex a single symbol inside
var openBrace = this.nextToken;
if (optional && openBrace.text !== "[") {
// optional arguments should return null if they don't exist
return null;
}
// The call to expect will lex the token after the '{' in inner mode
this.mode = innerMode;
this.expect(optional ? "[" : "{");
var inner = this.nextToken;
this.mode = outerMode;
var data;
if (innerMode === "color") {
data = inner.text;
} else {
data = inner.data;
}
this.consume(); // consume the token stored in inner
this.expect(optional ? "]" : "}");
return new ParseFuncOrArgument(
new ParseNode(innerMode, data, outerMode),
false);
} else if (innerMode === "text") {
// text mode is special because it should ignore the whitespace before
// it
var whitespace = this.lexer.lex(this.pos, "whitespace");
this.pos = whitespace.position;
if (innerMode === "color") {
return this.parseColorGroup(optional);
}
if (innerMode === "size") {
return this.parseSizeGroup(optional);
}
this.switchMode(innerMode);
if (innerMode === "text") {
// text mode is special because it should ignore the whitespace before
// it
while (this.nextToken.text === " ") {
this.consume();
}
}
// By the time we get here, innerMode is one of "text" or "math".
// We switch the mode of the parser, recurse, then restore the old mode.
this.mode = innerMode;
this.nextToken = this.lexer.lex(this.pos, innerMode);
var res;
if (optional) {
res = this.parseOptionalGroup();
} else {
res = this.parseGroup();
}
this.mode = outerMode;
this.nextToken = this.lexer.lex(this.pos, outerMode);
this.switchMode(outerMode);
return res;
};
/**
* Parses a group, essentially returning the string formed by the
* brace-enclosed tokens plus some position information.
*
* @param {string} modeName Used to describe the mode in error messages
* @param {boolean} optional Whether the group is optional or required
*/
Parser.prototype.parseStringGroup = function(modeName, optional) {
if (optional && this.nextToken.text !== "[") {
return null;
}
var outerMode = this.mode;
this.mode = "text";
this.expect(optional ? "[" : "{");
var str = "";
var firstToken = this.nextToken;
var lastToken = firstToken;
while (this.nextToken.text !== (optional ? "]" : "}")) {
if (this.nextToken.text === "EOF") {
throw new ParseError(
"Unexpected end of input in " + modeName,
firstToken.range(this.nextToken, str));
}
lastToken = this.nextToken;
str += lastToken.text;
this.consume();
}
this.mode = outerMode;
this.expect(optional ? "]" : "}");
return firstToken.range(lastToken, str);
};
/**
* Parses a color description.
*/
Parser.prototype.parseColorGroup = function(optional) {
var res = this.parseStringGroup("color", optional);
if (!res) {
return null;
}
var match = (/^(#[a-z0-9]+|[a-z]+)$/i).exec(res.text);
if (!match) {
throw new ParseError("Invalid color: '" + res.text + "'", res);
}
return new ParseFuncOrArgument(
new ParseNode("color", match[0], this.mode),
false);
};
/**
* Parses a size specification, consisting of magnitude and unit.
*/
Parser.prototype.parseSizeGroup = function(optional) {
var res = this.parseStringGroup("size", optional);
if (!res) {
return null;
}
var match = (/(-?) *(\d+(?:\.\d*)?|\.\d+) *([a-z]{2})/).exec(res.text);
if (!match) {
throw new ParseError("Invalid size: '" + res.text + "'", res);
}
var data = {
number: +(match[1] + match[2]), // sign + magnitude, cast to number
unit: match[3],
};
if (data.unit !== "em" && data.unit !== "ex") {
throw new ParseError("Invalid unit: '" + data.unit + "'", res);
}
return new ParseFuncOrArgument(
new ParseNode("color", data, this.mode),
false);
};
/**
* Parses a group, which is either a single nucleus (like "x") or an expression
* in braces (like "{x+y}")
@ -665,15 +713,18 @@ Parser.prototype.parseSpecialGroup = function(innerMode, optional) {
* @return {?ParseFuncOrArgument}
*/
Parser.prototype.parseGroup = function() {
var firstToken = this.nextToken;
// Try to parse an open brace
if (this.nextToken.text === "{") {
// If we get a brace, parse an expression
this.consume();
var expression = this.parseExpression(false);
var lastToken = this.nextToken;
// Make sure we get a close brace
this.expect("}");
return new ParseFuncOrArgument(
new ParseNode("ordgroup", expression, this.mode),
new ParseNode("ordgroup", expression, this.mode,
firstToken, lastToken),
false);
} else {
// Otherwise, just return a nucleus
@ -687,15 +738,18 @@ Parser.prototype.parseGroup = function() {
* @return {?ParseFuncOrArgument}
*/
Parser.prototype.parseOptionalGroup = function() {
var firstToken = this.nextToken;
// Try to parse an open bracket
if (this.nextToken.text === "[") {
// If we get a brace, parse an expression
this.consume();
var expression = this.parseExpression(false, "]");
var lastToken = this.nextToken;
// Make sure we get a close bracket
this.expect("]");
return new ParseFuncOrArgument(
new ParseNode("ordgroup", expression, this.mode),
new ParseNode("ordgroup", expression, this.mode,
firstToken, lastToken),
false);
} else {
// Otherwise, return null,
@ -718,15 +772,15 @@ Parser.prototype.parseSymbol = function() {
// say that it is a function.
return new ParseFuncOrArgument(
nucleus.text,
true);
true, nucleus);
} else if (symbols[this.mode][nucleus.text]) {
this.consume();
// Otherwise if this is a no-argument function, find the type it
// corresponds to in the symbols map
return new ParseFuncOrArgument(
new ParseNode(symbols[this.mode][nucleus.text].group,
nucleus.text, this.mode),
false);
nucleus.text, this.mode, nucleus),
false, nucleus);
} else {
return null;
}

View File

@ -23,6 +23,7 @@ function Settings(options) {
this.displayMode = get(options.displayMode, false);
this.throwOnError = get(options.throwOnError, true);
this.errorColor = get(options.errorColor, "#cc0000");
this.macros = options.macros || {};
}
module.exports = Settings;

View File

@ -28,10 +28,8 @@ function parseArray(parser, result) {
row = [];
body.push(row);
} else {
// TODO: Clean up the following hack once #385 got merged
var pos = Math.min(parser.pos + 1, parser.lexer._input.length);
throw new ParseError("Expected & or \\\\ or \\end",
parser.lexer, pos);
parser.nextToken);
}
}
result.body = body;
@ -106,7 +104,7 @@ defineEnvironment("array", {
}
throw new ParseError(
"Unknown column alignment: " + node.value,
context.lexer, context.positions[1]);
node);
});
var res = {
type: "array",

View File

@ -55,6 +55,7 @@ var ParseError = require("./ParseError");
* should parse. If the optional arguments aren't found,
* `null` will be passed to the handler in their place.
* (default 0)
* - infix: (optional) Must be true if the function is an infix operator.
*
* The last argument is that implementation, the handler for the function(s).
* It is called to handle these functions and their arguments.
@ -91,6 +92,7 @@ function defineFunction(names, props, handler) {
greediness: (props.greediness === undefined) ? 1 : props.greediness,
allowedInText: !!props.allowedInText,
numOptionalArgs: props.numOptionalArgs || 0,
infix: !!props.infix,
handler: handler,
};
for (var i = 0; i < names.length; ++i) {
@ -456,8 +458,7 @@ defineFunction([
if (!utils.contains(delimiters, delim.value)) {
throw new ParseError(
"Invalid delimiter: '" + delim.value + "' after '" +
context.funcName + "'",
context.lexer, context.positions[1]);
context.funcName + "'", delim);
}
// \left and \right are caught somewhere in Parser.js, which is
@ -536,6 +537,7 @@ defineFunction([
// Infix generalized fractions
defineFunction(["\\over", "\\choose"], {
numArgs: 0,
infix: true,
}, function(context) {
var replaceWith;
switch (context.funcName) {
@ -551,6 +553,7 @@ defineFunction(["\\over", "\\choose"], {
return {
type: "infix",
replaceWith: replaceWith,
token: context.token,
};
});
@ -574,9 +577,7 @@ defineFunction(["\\begin", "\\end"], {
}, function(context, args) {
var nameGroup = args[0];
if (nameGroup.type !== "ordgroup") {
throw new ParseError(
"Invalid environment name",
context.lexer, context.positions[1]);
throw new ParseError("Invalid environment name", nameGroup);
}
var name = "";
for (var i = 0; i < nameGroup.value.length; ++i) {
@ -585,6 +586,6 @@ defineFunction(["\\begin", "\\end"], {
return {
type: "environment",
name: name,
namepos: context.positions[1],
nameGroup: nameGroup,
};
});

View File

@ -1,10 +1,29 @@
/**
* The resulting parse tree nodes of the parse tree.
*
* It is possible to provide position information, so that a ParseNode can
* fulfil a role similar to a Token in error reporting.
* For details on the corresponding properties see Token constructor.
* Providing such information can lead to better error reporting.
*
* @param {string} type type of node, like e.g. "ordgroup"
* @param {?object} value type-specific representation of the node
* @param {string} mode parse mode in action for this node,
* "math" or "text"
* @param {Token=} firstToken first token of the input for this node,
* will omit position information if unset
* @param {Token=} lastToken last token of the input for this node,
* will default to firstToken if unset
*/
function ParseNode(type, value, mode) {
function ParseNode(type, value, mode, firstToken, lastToken) {
this.type = type;
this.value = value;
this.mode = mode;
if (firstToken && (!lastToken || lastToken.lexer === firstToken.lexer)) {
this.lexer = firstToken.lexer;
this.start = firstToken.start;
this.end = (lastToken || firstToken).end;
}
}
module.exports = {

View File

@ -10,13 +10,9 @@ function init() {
}
if ("addEventListener" in permalink) {
permalink.addEventListener("click", function() {
window.location.search = "?text=" + encodeURIComponent(input.value);
});
permalink.addEventListener("click", setSearch);
} else {
permalink.attachEvent("click", function() {
window.location.search = "?text=" + encodeURIComponent(input.value);
});
permalink.attachEvent("click", setSearch);
}
var match = (/(?:^\?|&)text=([^&]*)/).exec(window.location.search);
@ -24,11 +20,26 @@ function init() {
input.value = decodeURIComponent(match[1]);
}
var macros = {};
var options = {};
var macroRegex = /(?:^\?|&)(?:\\|%5[Cc])([A-Za-z]+)=([^&]*)/g;
var macroString = "";
while ((match = macroRegex.exec(window.location.search)) !== null) {
options.macros = macros;
macros["\\" + match[1]] = decodeURIComponent(match[2]);
macroString += "&" + match[0].substr(1);
}
reprocess();
function setSearch() {
window.location.search =
"?text=" + encodeURIComponent(input.value) + macroString;
}
function reprocess() {
try {
katex.render(input.value, math);
katex.render(input.value, math, options);
} catch (e) {
if (e.__proto__ == katex.ParseError.prototype) {
console.error(e);

View File

@ -61,16 +61,15 @@ beforeEach(function() {
describe("Parser:", function() {
describe("#handleInfixNodes", function() {
// TODO: The position information here is broken, should be fixed.
it("rejects repeated infix operators", function() {
expect("1\\over 2\\over 3").toFailWithParseError(
"only one infix operator per group at position -1: " +
"1\\over 2\\over ");
"only one infix operator per group at position 9: " +
"1\\over 2\\̲o̲v̲e̲r̲ 3");
});
it("rejects conflicting infix operators", function() {
expect("1\\over 2\\choose 3").toFailWithParseError(
"only one infix operator per group at position -1: " +
"1\\over 2\\choos");
"only one infix operator per group at position 9: " +
"1\\over 2\\̲c̲h̲o̲o̲s̲e̲ 3");
});
});
@ -91,84 +90,85 @@ describe("Parser:", function() {
});
describe("#parseAtom", function() {
// TODO: The positions in the following error messages appear to be
// off by one, i.e. they should be one character later.
it("rejects \\limits without operator", function() {
expect("\\alpha\\limits\\omega").toFailWithParseError(
"Limit controls must follow a math operator" +
" at position 6: \\alpha̲\\limits\\omega");
" at position 7: \\alpha\\̲l̲i̲m̲i̲t̲s̲\\omega");
});
it("rejects \\limits at the beginning of the input", function() {
expect("\\limits\\omega").toFailWithParseError(
"Limit controls must follow a math operator" +
" at position 0: ̲\\limits\\omega");
" at position 1: \\̲l̲i̲m̲i̲t̲s̲\\omega");
});
it("rejects double superscripts", function() {
expect("1^2^3").toFailWithParseError(
"Double superscript at position 3: 1^2̲^3");
"Double superscript at position 4: 1^2^̲3");
expect("1^{2+3}_4^5").toFailWithParseError(
"Double superscript at position 9: 1^{2+3}_4̲^5");
"Double superscript at position 10: 1^{2+3}_4^̲5");
});
it("rejects double subscripts", function() {
expect("1_2_3").toFailWithParseError(
"Double subscript at position 3: 1_2̲_3");
"Double subscript at position 4: 1_2_̲3");
expect("1_{2+3}^4_5").toFailWithParseError(
"Double subscript at position 9: 1_{2+3}^4̲_5");
"Double subscript at position 10: 1_{2+3}^4_̲5");
});
});
describe("#parseImplicitGroup", function() {
it("reports unknown environments", function() {
expect("\\begin{foo}bar\\end{foo}").toFailWithParseError(
"No such environment: foo at position 11:" +
" \\begin{foo}̲bar\\end{foo}");
"No such environment: foo at position 7:" +
" \\begin{̲f̲o̲o̲}̲bar\\end{foo}");
});
it("reports mismatched environments", function() {
expect("\\begin{pmatrix}1&2\\\\3&4\\end{bmatrix}+5")
.toFailWithParseError(
"Mismatch: \\begin{pmatrix} matched by \\end{bmatrix}");
"Mismatch: \\begin{pmatrix} matched by \\end{bmatrix}" +
" at position 24: …matrix}1&2\\\\3&4\\̲e̲n̲d̲{bmatrix}+5");
});
});
describe("#parseFunction", function() {
it("rejects math-mode functions in text mode", function() {
// TODO: The position info is missing here
expect("\\text{\\sqrt2 is irrational}").toFailWithParseError(
"Can't use function '\\sqrt' in text mode");
"Can't use function '\\sqrt' in text mode" +
" at position 7: \\text{\\̲s̲q̲r̲t̲2 is irrational…");
});
});
describe("#parseArguments", function() {
it("complains about missing argument at end of input", function() {
expect("2\\sqrt").toFailWithParseError(
"Expected group after '\\sqrt' at position 6: 2\\sqrt̲");
"Expected group after '\\sqrt' at end of input: 2\\sqrt");
});
it("complains about missing argument at end of group", function() {
expect("1^{2\\sqrt}").toFailWithParseError(
"Expected group after '\\sqrt' at position 9: 1^{2\\sqrt̲}");
"Expected group after '\\sqrt'" +
" at position 10: 1^{2\\sqrt}̲");
});
it("complains about functions as arguments to others", function() {
// TODO: The position looks pretty wrong here
expect("\\sqrt\\over2").toFailWithParseError(
"Got function '\\over' as argument to '\\sqrt'" +
" at position 9: \\sqrt\\ove̲r2");
" at position 6: \\sqrt\\̲o̲v̲e̲r̲2");
});
});
describe("#parseArguments", function() {
it("complains about missing argument at end of input", function() {
expect("2\\sqrt").toFailWithParseError(
"Expected group after '\\sqrt' at position 6: 2\\sqrt̲");
"Expected group after '\\sqrt' at end of input: 2\\sqrt");
});
it("complains about missing argument at end of group", function() {
expect("1^{2\\sqrt}").toFailWithParseError(
"Expected group after '\\sqrt' at position 9: 1^{2\\sqrt̲}");
"Expected group after '\\sqrt'" +
" at position 10: 1^{2\\sqrt}̲");
});
it("complains about functions as arguments to others", function() {
// TODO: The position looks pretty wrong here
expect("\\sqrt\\over2").toFailWithParseError(
"Got function '\\over' as argument to '\\sqrt'" +
" at position 9: \\sqrt\\ove̲r2");
" at position 6: \\sqrt\\̲o̲v̲e̲r̲2");
});
});
@ -183,12 +183,12 @@ describe("Parser.expect calls:", function() {
});
it("complains about extra \\end", function() {
expect("x\\end{matrix}").toFailWithParseError(
"Expected 'EOF', got '\\end' at position 5:" +
" x\\end̲{matrix}");
"Expected 'EOF', got '\\end' at position 2:" +
" x\\̲e̲n̲d̲{matrix}");
});
it("complains about top-level \\\\", function() {
expect("1\\\\2").toFailWithParseError(
"Expected 'EOF', got '\\\\' at position 3: 1\\\\̲2");
"Expected 'EOF', got '\\\\' at position 2: 1\\̲\\̲2");
});
it("complains about top-level &", function() {
expect("1&2").toFailWithParseError(
@ -199,8 +199,8 @@ describe("Parser.expect calls:", function() {
describe("#parseImplicitGroup expecting \\right", function() {
it("rejects missing \\right", function() {
expect("\\left(1+2)").toFailWithParseError(
"Expected '\\right', got 'EOF' at position 10:" +
" \\left(1+2)̲");
"Expected '\\right', got 'EOF' at end of input:" +
" \\left(1+2)");
});
it("rejects incorrectly scoped \\right", function() {
expect("{\\left(1+2}\\right)").toFailWithParseError(
@ -224,32 +224,42 @@ describe("Parser.expect calls:", function() {
});
// Can't test for the [ of an optional group since it's optional
it("complains about missing } for color", function() {
expect("\\color{#ffffff {text}").toFailWithParseError(
"Expected '}', got '{' at position 16:" +
" color{#ffffff {̲text}");
expect("\\color{#ffffff{text}").toFailWithParseError(
"Invalid color: '#ffffff{text' at position 8:" +
" \\color{#̲f̲f̲f̲f̲f̲f̲{̲t̲e̲x̲t̲}");
});
it("complains about missing ] for size", function() {
expect("\\rule[1em{2em}{3em}").toFailWithParseError(
"Expected ']', got '{' at position 10:" +
" \\rule[1em{̲2em}{3em}");
"Unexpected end of input in size" +
" at position 7: \\rule[1̲e̲m̲{̲2̲e̲m̲}̲{̲3̲e̲m̲}̲");
});
it("complains about missing ] for size at end of input", function() {
expect("\\rule[1em").toFailWithParseError(
"Unexpected end of input in size" +
" at position 7: \\rule[1̲e̲m̲");
});
it("complains about missing } for color at end of input", function() {
expect("\\color{#123456").toFailWithParseError(
"Unexpected end of input in color" +
" at position 8: \\color{#̲1̲2̲3̲4̲5̲6̲");
});
});
describe("#parseGroup expecting }", function() {
it("at end of file", function() {
expect("\\sqrt{2").toFailWithParseError(
"Expected '}', got 'EOF' at position 7: \\sqrt{2̲");
"Expected '}', got 'EOF' at end of input: \\sqrt{2");
});
});
describe("#parseOptionalGroup expecting ]", function() {
it("at end of file", function() {
expect("\\sqrt[3").toFailWithParseError(
"Expected ']', got 'EOF' at position 7: \\sqrt[3̲");
"Expected ']', got 'EOF' at end of input: \\sqrt[3");
});
it("before group", function() {
expect("\\sqrt[3{2}").toFailWithParseError(
"Expected ']', got 'EOF' at position 10: \\sqrt[3{2}̲");
"Expected ']', got 'EOF' at end of input: \\sqrt[3{2}");
});
});
@ -260,13 +270,13 @@ describe("environments.js:", function() {
describe("parseArray", function() {
it("rejects missing \\end", function() {
expect("\\begin{matrix}1").toFailWithParseError(
"Expected & or \\\\ or \\end at position 15:" +
" \\begin{matrix}1̲");
"Expected & or \\\\ or \\end at end of input:" +
" \\begin{matrix}1");
});
it("rejects incorrectly scoped \\end", function() {
expect("{\\begin{matrix}1}\\end{matrix}").toFailWithParseError(
"Expected & or \\\\\ or \\end at position 17:" +
" begin{matrix}1}̲\\end{matrix}");
" …\\begin{matrix}1}̲\\end{matrix}");
});
});
@ -274,8 +284,8 @@ describe("environments.js:", function() {
it("rejects unknown column types", function() {
// TODO: The error position here looks strange
expect("\\begin{array}{cba}\\end{array}").toFailWithParseError(
"Unknown column alignment: b at position 18:" +
" gin{array}{cba}̲\\end{array}");
"Unknown column alignment: b at position 16:" +
" \\begin{array}{cb̲a}\\end{array}");
});
});
@ -298,9 +308,8 @@ describe("functions.js:", function() {
describe("\\begin and \\end", function() {
it("reject invalid environment names", function() {
expect("\\begin{foobar}\\end{foobar}").toFailWithParseError(
"No such environment: foobar at position 14:" +
" \\begin{foobar}̲\\end{foobar}");
expect("\\begin x\\end y").toFailWithParseError(
"Invalid environment name at position 8: \\begin x̲\\end y");
});
});
@ -311,34 +320,35 @@ describe("Lexer:", function() {
describe("#_innerLex", function() {
it("rejects lone surrogate char", function() {
expect("\udcba").toFailWithParseError(
"Unexpected character: '\udcba' at position 0:" +
" \u0332\udcba");
"Unexpected character: '\udcba' at position 1:" +
" \udcba\u0332");
});
it("rejects lone backslash at end of input", function() {
expect("\\").toFailWithParseError(
"Unexpected character: '\\' at position 0: ̲\\");
"Unexpected character: '\\' at position 1: \\̲");
});
});
describe("#_innerLexColor", function() {
it("reject hex notation without #", function() {
expect("\\color{1a2b3c}{foo}").toFailWithParseError(
"Invalid color at position 7: \\color{̲1a2b3c}{foo}");
"Invalid color: '1a2b3c'" +
" at position 8: \\color{1̲a̲2̲b̲3̲c̲}{foo}");
});
});
describe("#_innerLexSize", function() {
it("reject size without unit", function() {
expect("\\rule{0}{2em}").toFailWithParseError(
"Invalid size at position 6: \\rule{̲0}{2em}");
"Invalid size: '0' at position 7: \\rule{0̲}{2em}");
});
it("reject size with bogus unit", function() {
expect("\\rule{1au}{2em}").toFailWithParseError(
"Invalid unit: 'au' at position 6: \\rule{̲1au}{2em}");
"Invalid unit: 'au' at position 7: \\rule{1̲a̲u̲}{2em}");
});
it("reject size without number", function() {
expect("\\rule{em}{2em}").toFailWithParseError(
"Invalid size at position 6: \\rule{̲em}{2em}");
"Invalid size: 'em' at position 7: \\rule{e̲m̲}{2em}");
});
});

View File

@ -57,8 +57,39 @@ var getParsed = function(expr, settings) {
return parseTree(expr, usedSettings);
};
var stripPositions = function(expr) {
if (typeof expr !== "object" || expr === null) {
return expr;
}
if (expr.lexer && typeof expr.start === "number") {
delete expr.lexer;
delete expr.start;
delete expr.end;
}
Object.keys(expr).forEach(function(key) {
stripPositions(expr[key]);
});
return expr;
};
var parseAndSetResult = function(expr, result, settings) {
try {
return parseTree(expr, settings || defaultSettings);
} catch (e) {
result.pass = false;
if (e instanceof ParseError) {
result.message = "'" + expr + "' failed " +
"parsing with error: " + e.message;
} else {
result.message = "'" + expr + "' failed " +
"parsing with unknown error: " + e.message;
}
}
};
beforeEach(function() {
jasmine.addMatchers({
toParse: function() {
return {
compare: function(actual, settings) {
@ -68,20 +99,7 @@ beforeEach(function() {
pass: true,
message: "'" + actual + "' succeeded parsing",
};
try {
parseTree(actual, usedSettings);
} catch (e) {
result.pass = false;
if (e instanceof ParseError) {
result.message = "'" + actual + "' failed " +
"parsing with error: " + e.message;
} else {
result.message = "'" + actual + "' failed " +
"parsing with unknown error: " + e.message;
}
}
parseAndSetResult(actual, result, usedSettings);
return result;
},
};
@ -145,6 +163,36 @@ beforeEach(function() {
},
};
},
toParseLike: function(util, baton) {
return {
compare: function(actual, expected) {
var result = {
pass: true,
message: "Parse trees of '" + actual +
"' and '" + expected + "' are equivalent",
};
var actualTree = parseAndSetResult(actual, result);
if (!actualTree) {
return result;
}
var expectedTree = parseAndSetResult(expected, result);
if (!expectedTree) {
return result;
}
stripPositions(actualTree);
stripPositions(expectedTree);
if (!util.equals(actualTree, expectedTree, baton)) {
result.pass = false;
result.message = "Parse trees of '" + actual +
"' and '" + expected + "' are not equivalent";
}
return result;
},
};
},
});
});
@ -154,8 +202,8 @@ describe("A parser", function() {
});
it("should ignore whitespace", function() {
var parseA = getParsed(" x y ");
var parseB = getParsed("xy");
var parseA = stripPositions(getParsed(" x y "));
var parseB = stripPositions(getParsed("xy"));
expect(parseA).toEqual(parseB);
});
});
@ -340,8 +388,8 @@ describe("A subscript and superscript parser", function() {
});
it("should produce the same thing regardless of order", function() {
var parseA = getParsed("x^2_3");
var parseB = getParsed("x_3^2");
var parseA = stripPositions(getParsed("x^2_3"));
var parseB = stripPositions(getParsed("x_3^2"));
expect(parseA).toEqual(parseB);
});
@ -623,6 +671,13 @@ describe("An over parser", function() {
expect(parse.value.denom).toBeDefined();
});
it("should handle \\textstyle correctly", function() {
expect("\\textstyle 1 \\over 2")
.toParseLike("\\frac{\\textstyle 1}{2}");
expect("{\\textstyle 1} \\over 2")
.toParseLike("\\frac{\\textstyle 1}{2}");
});
it("should handle nested factions", function() {
var nestedOverExpression = "{1 \\over 2} \\over 3";
var parse = getParsed(nestedOverExpression)[0];
@ -1523,7 +1578,7 @@ describe("A markup generator", function() {
describe("A parse tree generator", function() {
it("generates a tree", function() {
var tree = katex.__parse("\\sigma^2");
var tree = stripPositions(katex.__parse("\\sigma^2"));
expect(JSON.stringify(tree)).toEqual(JSON.stringify([
{
"type": "supsub",
@ -1802,3 +1857,24 @@ describe("The symbol table integraty", function() {
.toEqual(getBuilt("\\left\\lt\\frac{1}{x}\\right\\gt"));
});
});
describe("A macro expander", function() {
var compareParseTree = function(actual, expected, macros) {
var settings = new Settings({macros: macros});
actual = stripPositions(parseTree(actual, settings));
expected = stripPositions(parseTree(expected, defaultSettings));
expect(actual).toEqual(expected);
};
it("should produce individual tokens", function() {
compareParseTree("e^\\foo", "e^1 23", {"\\foo": "123"});
});
it("should allow for multiple expansion", function() {
compareParseTree("1\\foo2", "1aa2", {
"\\foo": "\\bar\\bar",
"\\bar": "a",
});
});
});