Add optional arguments

Summary:
Add correct parsing of optional arguments. Now, things like `\rule` can shift
based on its argument, and parsing of `\sqrt[3]{x}` fails (correctly) because we
don't support that yet.

Also, cleaned up the lexing code a bit. There was a vestige of the old types in
the lexer (they have now been completely moved to symbols.js). As a byproduct,
this made it hard to call `expect("]")`, because it would look at the type of
the Token and the type for "]" was "close". Now, all functions just look at the
text of the parsed token, and in special occasions (like in the dimension lexer)
it can return some data along with it.

Test Plan:
 - Make sure tests still work, and new tests work
 - Make sure no huxley screenshots changed
 - Make EXTRA SURE `\sqrt[3]{x}` fails.

Reviewers: alpert

Reviewed By: alpert

Differential Revision: http://phabricator.khanacademy.org/D13505
This commit is contained in:
Emily Eisenberg 2014-10-01 14:20:47 -07:00
parent cba70b51fe
commit def1a47935
6 changed files with 233 additions and 99 deletions

View File

@ -19,37 +19,31 @@ function Lexer(input) {
};
// The resulting token returned from `lex`.
function LexResult(type, text, position) {
this.type = type;
function Token(text, data, position) {
this.text = text;
this.data = data;
this.position = position;
}
// "normal" types of tokens. These are tokens which can be matched by a simple
// regex, and have a type which is listed.
// regex
var mathNormals = [
[/^[/|@."`0-9]/, "textord"],
[/^[a-zA-Z]/, "mathord"],
[/^[*+-]/, "bin"],
[/^[=<>:]/, "rel"],
[/^[,;]/, "punct"],
[/^'/, "'"],
[/^\^/, "^"],
[/^_/, "_"],
[/^{/, "{"],
[/^}/, "}"],
[/^[(\[]/, "open"],
[/^[)\]?!]/, "close"],
[/^~/, "spacing"]
/^[/|@.""`0-9a-zA-Z]/, // ords
/^[*+-]/, // bins
/^[=<>:]/, // rels
/^[,;]/, // punctuation
/^['\^_{}]/, // misc
/^[(\[]/, // opens
/^[)\]?!]/, // closes
/^~/, // spacing
];
// These are "normal" tokens like above, but should instead be parsed in text
// mode.
var textNormals = [
[/^[a-zA-Z0-9`!@*()-=+\[\]'";:?\/.,]/, "textord"],
[/^{/, "{"],
[/^}/, "}"],
[/^~/, "spacing"]
/^[a-zA-Z0-9`!@*()-=+\[\]'";:?\/.,]/, // ords
/^[{}]/, // grouping
/^~/, // spacing
];
// Regexes for matching whitespace
@ -77,29 +71,29 @@ Lexer.prototype._innerLex = function(pos, normals, ignoreWhitespace) {
// Do the funky concatenation of whitespace that happens in text mode.
var whitespace = input.match(whitespaceConcatRegex);
if (whitespace !== null) {
return new LexResult(" ", " ", pos + whitespace[0].length);
return new Token(" ", null, pos + whitespace[0].length);
}
}
// If there's no more input to parse, return an EOF token
if (input.length === 0) {
return new LexResult("EOF", null, pos);
return new Token("EOF", null, pos);
}
var match;
if ((match = input.match(anyFunc))) {
// If we match a function token, return it
return new LexResult(match[0], match[0], pos + match[0].length);
return new Token(match[0], null, pos + match[0].length);
} else {
// Otherwise, we look through the normal token regexes and see if it's
// one of them.
for (var i = 0; i < normals.length; i++) {
var normal = normals[i];
if ((match = input.match(normal[0]))) {
if ((match = input.match(normal))) {
// If it is, return it
return new LexResult(
normal[1], match[0], pos + match[0].length);
return new Token(
match[0], null, pos + match[0].length);
}
}
}
@ -125,7 +119,7 @@ Lexer.prototype._innerLexColor = function(pos) {
var match;
if ((match = input.match(cssColor))) {
// If we look like a color, return a color
return new LexResult("color", match[0], pos + match[0].length);
return new Token(match[0], null, pos + match[0].length);
} else {
throw new ParseError("Invalid color", this, pos);
}
@ -133,7 +127,7 @@ Lexer.prototype._innerLexColor = function(pos) {
// A regex to match a dimension. Dimensions look like
// "1.2em" or ".4pt" or "1 ex"
var sizeRegex = /^(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/;
var sizeRegex = /^(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/;
/**
* This function lexes a dimension.
@ -148,13 +142,13 @@ Lexer.prototype._innerLexSize = function(pos) {
var match;
if ((match = input.match(sizeRegex))) {
var unit = match[2];
var unit = match[3];
// We only currently handle "em" and "ex" units
if (unit !== "em" && unit !== "ex") {
throw new ParseError("Invalid unit: '" + unit + "'", this, pos);
}
return new LexResult("size", {
number: +match[1],
return new Token(match[0], {
number: +(match[1] + match[2]),
unit: unit
}, pos + match[0].length);
}
@ -171,7 +165,7 @@ Lexer.prototype._innerLexWhitespace = function(pos) {
var whitespace = input.match(whitespaceRegex)[0];
pos += whitespace.length;
return new LexResult("whitespace", whitespace, pos);
return new Token(whitespace, null, pos);
};
/**

View File

@ -69,7 +69,7 @@ function ParseResult(result, newPosition) {
* An initial function (without its arguments), or an argument to a function.
* The `result` argument should be a ParseResult.
*/
function ParseFuncOrArgument(result, isFunction, allowedInText, numArgs, argTypes) {
function ParseFuncOrArgument(result, isFunction, allowedInText, numArgs, numOptionalArgs, argTypes) {
this.result = result;
// Is this a function (i.e. is it something defined in functions.js)?
this.isFunction = isFunction;
@ -77,6 +77,8 @@ function ParseFuncOrArgument(result, isFunction, allowedInText, numArgs, argType
this.allowedInText = allowedInText;
// How many arguments?
this.numArgs = numArgs;
// How many optional arguments?
this.numOptionalArgs = numOptionalArgs;
// What types of arguments?
this.argTypes = argTypes;
}
@ -85,10 +87,10 @@ function ParseFuncOrArgument(result, isFunction, allowedInText, numArgs, argType
* Checks a result to make sure it has the right type, and throws an
* appropriate error otherwise.
*/
Parser.prototype.expect = function(result, type) {
if (result.type !== type) {
Parser.prototype.expect = function(result, text) {
if (result.text !== text) {
throw new ParseError(
"Expected '" + type + "', got '" + result.type + "'",
"Expected '" + text + "', got '" + result.text + "'",
this.lexer, result.position
);
}
@ -110,7 +112,7 @@ Parser.prototype.parse = function(input) {
*/
Parser.prototype.parseInput = function(pos, mode) {
// Parse an expression
var expression = this.parseExpression(pos, mode);
var expression = this.parseExpression(pos, mode, false, null);
// If we succeeded, make sure there's an EOF at the end
var EOF = this.lexer.lex(expression.position, mode);
this.expect(EOF, "EOF");
@ -118,25 +120,37 @@ Parser.prototype.parseInput = function(pos, mode) {
};
/**
* Handles a body of an expression.
* Parses an "expression", which is a list of atoms.
*
* @param {boolean} breakOnInfix Should the parsing stop when we hit infix
* nodes? This happens when functions have higher precendence
* than infix nodes in implicit parses.
*
* @param {?string} breakOnToken The token that the expression should end with,
* or `null` if something else should end the expression.
*
* @return {ParseResult}
*/
Parser.prototype.handleExpressionBody = function(pos, mode, breakOnInfix) {
Parser.prototype.parseExpression = function(pos, mode, breakOnInfix, breakOnToken) {
var body = [];
var atom;
// Keep adding atoms to the body until we can't parse any more atoms (either
// we reached the end, a }, or a \right)
while ((atom = this.parseAtom(pos, mode))) {
while (true) {
var lex = this.lexer.lex(pos, mode);
if (breakOnToken != null && lex.text === breakOnToken) {
break;
}
var atom = this.parseAtom(pos, mode);
if (!atom) {
break;
}
if (breakOnInfix && atom.result.type === "infix") {
break;
} else {
body.push(atom.result);
pos = atom.position;
}
body.push(atom.result);
pos = atom.position;
}
return {
body: this.handleInfixNodes(body, mode),
position: pos
};
return new ParseResult(this.handleInfixNodes(body, mode), pos);
};
/**
@ -191,16 +205,6 @@ Parser.prototype.handleInfixNodes = function (body, mode) {
}
};
/**
* Parses an "expression", which is a list of atoms.
*
* @return {ParseResult}
*/
Parser.prototype.parseExpression = function(pos, mode) {
var body = this.handleExpressionBody(pos, mode);
return new ParseResult(body.body, body.position);
};
// The greediness of a superscript or subscript
var SUPSUB_GREEDINESS = 1;
@ -261,27 +265,27 @@ Parser.prototype.parseAtom = function(pos, mode) {
var lex = this.lexer.lex(currPos, mode);
var group;
if (lex.type === "^") {
if (lex.text === "^") {
// We got a superscript start
if (superscript) {
throw new ParseError(
"Double superscript", this.lexer, currPos);
}
var result = this.handleSupSubscript(
lex.position, mode, lex.type, "superscript");
lex.position, mode, lex.text, "superscript");
currPos = result.position;
superscript = result.result;
} else if (lex.type === "_") {
} else if (lex.text === "_") {
// We got a subscript start
if (subscript) {
throw new ParseError(
"Double subscript", this.lexer, currPos);
}
var result = this.handleSupSubscript(
lex.position, mode, lex.type, "subscript");
lex.position, mode, lex.text, "subscript");
currPos = result.position;
subscript = result.result;
} else if (lex.type === "'") {
} else if (lex.text === "'") {
// We got a prime
var prime = new ParseNode("textord", "\\prime", mode);
@ -289,7 +293,7 @@ Parser.prototype.parseAtom = function(pos, mode) {
var primes = [prime];
currPos = lex.position;
// Keep lexing tokens until we get something that's not a prime
while ((lex = this.lexer.lex(currPos, mode)).type === "'") {
while ((lex = this.lexer.lex(currPos, mode)).text === "'") {
// For each one, add another prime to the list
primes.push(prime);
currPos = lex.position;
@ -354,7 +358,7 @@ Parser.prototype.parseImplicitGroup = function(pos, mode) {
// Parse the entire left function (including the delimiter)
var left = this.parseFunction(pos, mode);
// Parse out the implicit body
var body = this.handleExpressionBody(left.position, mode);
var body = this.parseExpression(left.position, mode, false, "}");
// Check the next token
var rightLex = this.parseSymbol(body.position, mode);
@ -364,7 +368,7 @@ Parser.prototype.parseImplicitGroup = function(pos, mode) {
return new ParseResult(
new ParseNode("leftright", {
body: body.body,
body: body.result,
left: left.result.value.value,
right: right.result.value.value
}, mode),
@ -378,23 +382,23 @@ Parser.prototype.parseImplicitGroup = function(pos, mode) {
return null;
} else if (utils.contains(sizeFuncs, func)) {
// If we see a sizing function, parse out the implict body
var body = this.handleExpressionBody(start.result.position, mode);
var body = this.parseExpression(start.result.position, mode, false, "}");
return new ParseResult(
new ParseNode("sizing", {
// Figure out what size to use based on the list of functions above
size: "size" + (utils.indexOf(sizeFuncs, func) + 1),
value: body.body
value: body.result
}, mode),
body.position);
} else if (utils.contains(styleFuncs, func)) {
// If we see a styling function, parse out the implict body
var body = this.handleExpressionBody(start.result.position, mode, true);
var body = this.parseExpression(start.result.position, mode, true, "}");
return new ParseResult(
new ParseNode("styling", {
// Figure out what style to use by pulling out the style from
// the function name
style: func.slice(1, func.length - 5),
value: body.body
value: body.result
}, mode),
body.position);
} else {
@ -422,22 +426,40 @@ Parser.prototype.parseFunction = function(pos, mode) {
var newPos = baseGroup.result.position;
var result;
if (baseGroup.numArgs > 0) {
var totalArgs = baseGroup.numArgs + baseGroup.numOptionalArgs;
if (totalArgs > 0) {
var baseGreediness = functions.getGreediness(func);
var args = [func];
var positions = [newPos];
for (var i = 0; i < baseGroup.numArgs; i++) {
for (var i = 0; i < totalArgs; i++) {
var argType = baseGroup.argTypes && baseGroup.argTypes[i];
if (argType) {
var arg = this.parseSpecialGroup(newPos, argType, mode);
var arg;
if (i < baseGroup.numOptionalArgs) {
if (argType) {
arg = this.parseSpecialGroup(newPos, argType, mode, true);
} else {
arg = this.parseOptionalGroup(newPos, mode);
}
if (!arg) {
args.push(null);
positions.push(newPos);
continue;
}
} else {
var arg = this.parseGroup(newPos, mode);
}
if (!arg) {
throw new ParseError(
"Expected group after '" + baseGroup.result.result +
"'",
this.lexer, newPos);
if (argType) {
arg = this.parseSpecialGroup(newPos, argType, mode);
} else {
arg = this.parseGroup(newPos, mode);
}
if (!arg) {
throw new ParseError(
"Expected group after '" + baseGroup.result.result +
"'",
this.lexer, newPos);
}
}
var argNode;
if (arg.numArgs > 0) {
@ -483,25 +505,39 @@ Parser.prototype.parseFunction = function(pos, mode) {
*
* @return {?ParseFuncOrArgument}
*/
Parser.prototype.parseSpecialGroup = function(pos, mode, outerMode) {
Parser.prototype.parseSpecialGroup = function(pos, mode, outerMode, optional) {
if (mode === "color" || mode === "size") {
// color and size modes are special because they should have braces and
// should only lex a single symbol inside
var openBrace = this.lexer.lex(pos, outerMode);
this.expect(openBrace, "{");
if (optional && openBrace.text !== "[") {
// optional arguments should return null if they don't exist
return null;
}
this.expect(openBrace, optional ? "[" : "{");
var inner = this.lexer.lex(openBrace.position, mode);
var data;
if (mode === "color") {
data = inner.text;
} else {
data = inner.data;
}
var closeBrace = this.lexer.lex(inner.position, outerMode);
this.expect(closeBrace, "}");
this.expect(closeBrace, optional ? "]" : "}");
return new ParseFuncOrArgument(
new ParseResult(
new ParseNode("color", inner.text, outerMode),
new ParseNode(mode, data, outerMode),
closeBrace.position),
false);
} else if (mode === "text") {
// text mode is special because it should ignore the whitespace before
// it
var whitespace = this.lexer.lex(pos, "whitespace");
return this.parseGroup(whitespace.position, mode);
pos = whitespace.position;
}
if (optional) {
return this.parseOptionalGroup(pos, mode);
} else {
return this.parseGroup(pos, mode);
}
@ -516,9 +552,9 @@ Parser.prototype.parseSpecialGroup = function(pos, mode, outerMode) {
Parser.prototype.parseGroup = function(pos, mode) {
var start = this.lexer.lex(pos, mode);
// Try to parse an open brace
if (start.type === "{") {
if (start.text === "{") {
// If we get a brace, parse an expression
var expression = this.parseExpression(start.position, mode);
var expression = this.parseExpression(start.position, mode, false, "}");
// Make sure we get a close brace
var closeBrace = this.lexer.lex(expression.position, mode);
this.expect(closeBrace, "}");
@ -533,6 +569,31 @@ Parser.prototype.parseGroup = function(pos, mode) {
}
};
/**
* Parses a group, which is an expression in brackets (like "[x+y]")
*
* @return {?ParseFuncOrArgument}
*/
Parser.prototype.parseOptionalGroup = function(pos, mode) {
var start = this.lexer.lex(pos, mode);
// Try to parse an open bracket
if (start.text === "[") {
// If we get a brace, parse an expression
var expression = this.parseExpression(start.position, mode, false, "]");
// Make sure we get a close bracket
var closeBracket = this.lexer.lex(expression.position, mode);
this.expect(closeBracket, "]");
return new ParseFuncOrArgument(
new ParseResult(
new ParseNode("ordgroup", expression.result, mode),
closeBracket.position),
false);
} else {
// Otherwise, return null,
return null;
}
};
/**
* Parse a single symbol out of the string. Here, we handle both the functions
* we have defined, as well as the single character symbols
@ -542,9 +603,9 @@ Parser.prototype.parseGroup = function(pos, mode) {
Parser.prototype.parseSymbol = function(pos, mode) {
var nucleus = this.lexer.lex(pos, mode);
if (functions.funcs[nucleus.type]) {
if (functions.funcs[nucleus.text]) {
// If there is a function with this name, we use its data
var func = functions.funcs[nucleus.type];
var func = functions.funcs[nucleus.text];
// Here, we replace "original" argTypes with the current mode
var argTypes = func.argTypes;
@ -558,8 +619,8 @@ Parser.prototype.parseSymbol = function(pos, mode) {
}
return new ParseFuncOrArgument(
new ParseResult(nucleus.type, nucleus.position),
true, func.allowedInText, func.numArgs, argTypes);
new ParseResult(nucleus.text, nucleus.position),
true, func.allowedInText, func.numArgs, func.numOptionalArgs, argTypes);
} else if (symbols[mode][nucleus.text]) {
// Otherwise if this is a no-argument function, find the type it
// corresponds to in the symbols map

View File

@ -876,7 +876,15 @@ var groupTypes = {
// Make an empty span for the rule
var rule = makeSpan(["mord", "rule"], [], options.getColor());
// Calculate the width and height of the rule, and account for units
// Calculate the shift, width, and height of the rule, and account for units
var shift = 0;
if (group.value.shift) {
shift = group.value.shift.number;
if (group.value.shift.unit === "ex") {
shift *= fontMetrics.metrics.xHeight;
}
}
var width = group.value.width.number;
if (group.value.width.unit === "ex") {
width *= fontMetrics.metrics.xHeight;
@ -889,16 +897,19 @@ var groupTypes = {
// The sizes of rules are absolute, so make it larger if we are in a
// smaller style.
shift /= options.style.sizeMultiplier;
width /= options.style.sizeMultiplier;
height /= options.style.sizeMultiplier;
// Style the rule to the right size
rule.style.borderRightWidth = width + "em";
rule.style.borderTopWidth = height + "em";
rule.style.bottom = shift + "em";
// Record the height and width
rule.width = width;
rule.height = height;
rule.height = height + shift;
rule.depth = -shift;
return rule;
},

View File

@ -9,8 +9,9 @@ var ParseError = require("./ParseError");
* The data contains the following keys:
* - numArgs: The number of arguments the function takes.
* - argTypes: (optional) An array corresponding to each argument of the
* function, giving the type of argument that should be parsed.
* Valid types:
* function, giving the type of argument that should be parsed. Its
* length should be equal to `numArgs + numOptionalArgs`. Valid
* types:
* - "size": A size-like thing, such as "1em" or "5ex"
* - "color": An html color, like "#abc" or "blue"
* - "original": The same type as the environment that the
@ -45,6 +46,10 @@ var ParseError = require("./ParseError");
* The default value is `1`
* - allowedInText: (optional) Whether or not the function is allowed inside
* text mode (default false)
* - numOptionalArgs: (optional) The number of optional arguments the function
* should parse. If the optional arguments aren't found,
* `null` will be passed to the handler in their place.
* (default 0)
* - handler: The function that is called to handle this function and its
* arguments. The arguments are:
* - func: the text of the function
@ -65,7 +70,14 @@ var functions = {
// A normal square root
"\\sqrt": {
numArgs: 1,
handler: function(func, body) {
numOptionalArgs: 1,
handler: function(func, optional, body, positions) {
if (optional != null) {
throw new ParseError(
"Optional arguments to \\sqrt aren't supported yet",
this.lexer, positions[1] - 1);
}
return {
type: "sqrt",
body: body
@ -132,10 +144,12 @@ var functions = {
// A box of the width and height
"\\rule": {
numArgs: 2,
argTypes: ["size", "size"],
handler: function(func, width, height) {
numOptionalArgs: 1,
argTypes: ["size", "size", "size"],
handler: function(func, shift, width, height) {
return {
type: "rule",
shift: shift && shift.value,
width: width.value,
height: height.value
};
@ -448,6 +462,23 @@ var getGreediness = function(func) {
}
};
// Set default values of functions
for (var f in functions) {
if (functions.hasOwnProperty(f)) {
var func = functions[f];
functions[f] = {
numArgs: func.numArgs,
argTypes: func.argTypes,
greediness: (func.greediness === undefined) ? 1 : func.greediness,
allowedInText: func.allowedInText ? func.allowedInText : false,
numOptionalArgs: (func.numOptionalArgs === undefined) ? 0 :
func.numOptionalArgs,
handler: func.handler
};
}
}
module.exports = {
funcs: functions,
getGreediness: getGreediness

View File

@ -286,6 +286,7 @@
.rule {
display: inline-block;
border-style: solid;
position: relative;
}
.overline {

View File

@ -804,6 +804,15 @@ describe("A rule parser", function() {
expect(hardNumberParse.value.width.number).toBeCloseTo(1.24);
expect(hardNumberParse.value.height.number).toBeCloseTo(2.45);
});
it("should parse negative sizes", function() {
expect("\\rule{-1em}{- 0.2em}").toParse();
var parse = parseTree("\\rule{-1em}{- 0.2em}")[0];
expect(parse.value.width.number).toBeCloseTo(-1);
expect(parse.value.height.number).toBeCloseTo(-0.2);
});
});
describe("A left/right parser", function() {
@ -1138,3 +1147,30 @@ describe("A parser error", function () {
}
});
});
describe("An optional argument parser", function() {
it("should not fail", function() {
// Note this doesn't actually make an optional argument, but still
// should work
expect("\\frac[1]{2}{3}").toParse();
expect("\\rule[0.2em]{1em}{1em}").toParse();
});
it("should fail on sqrts for now", function() {
expect("\\sqrt[3]{2}").toNotParse();
});
it("should work when the optional argument is missing", function() {
expect("\\sqrt{2}").toParse();
expect("\\rule{1em}{2em}").toParse();
});
it("should fail when the optional argument is malformed", function() {
expect("\\rule[1]{2em}{3em}").toNotParse();
});
it("should not work if the optional argument isn't closed", function() {
expect("\\sqrt[").toNotParse();
});
});