diff --git a/src/Lexer.js b/src/Lexer.js index bee9db0ce..7e0c50999 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -19,37 +19,31 @@ function Lexer(input) { }; // The resulting token returned from `lex`. -function LexResult(type, text, position) { - this.type = type; +function Token(text, data, position) { this.text = text; + this.data = data; this.position = position; } // "normal" types of tokens. These are tokens which can be matched by a simple -// regex, and have a type which is listed. +// regex var mathNormals = [ - [/^[/|@."`0-9]/, "textord"], - [/^[a-zA-Z]/, "mathord"], - [/^[*+-]/, "bin"], - [/^[=<>:]/, "rel"], - [/^[,;]/, "punct"], - [/^'/, "'"], - [/^\^/, "^"], - [/^_/, "_"], - [/^{/, "{"], - [/^}/, "}"], - [/^[(\[]/, "open"], - [/^[)\]?!]/, "close"], - [/^~/, "spacing"] + /^[/|@.""`0-9a-zA-Z]/, // ords + /^[*+-]/, // bins + /^[=<>:]/, // rels + /^[,;]/, // punctuation + /^['\^_{}]/, // misc + /^[(\[]/, // opens + /^[)\]?!]/, // closes + /^~/, // spacing ]; // These are "normal" tokens like above, but should instead be parsed in text // mode. var textNormals = [ - [/^[a-zA-Z0-9`!@*()-=+\[\]'";:?\/.,]/, "textord"], - [/^{/, "{"], - [/^}/, "}"], - [/^~/, "spacing"] + /^[a-zA-Z0-9`!@*()-=+\[\]'";:?\/.,]/, // ords + /^[{}]/, // grouping + /^~/, // spacing ]; // Regexes for matching whitespace @@ -77,29 +71,29 @@ Lexer.prototype._innerLex = function(pos, normals, ignoreWhitespace) { // Do the funky concatenation of whitespace that happens in text mode. var whitespace = input.match(whitespaceConcatRegex); if (whitespace !== null) { - return new LexResult(" ", " ", pos + whitespace[0].length); + return new Token(" ", null, pos + whitespace[0].length); } } // If there's no more input to parse, return an EOF token if (input.length === 0) { - return new LexResult("EOF", null, pos); + return new Token("EOF", null, pos); } var match; if ((match = input.match(anyFunc))) { // If we match a function token, return it - return new LexResult(match[0], match[0], pos + match[0].length); + return new Token(match[0], null, pos + match[0].length); } else { // Otherwise, we look through the normal token regexes and see if it's // one of them. for (var i = 0; i < normals.length; i++) { var normal = normals[i]; - if ((match = input.match(normal[0]))) { + if ((match = input.match(normal))) { // If it is, return it - return new LexResult( - normal[1], match[0], pos + match[0].length); + return new Token( + match[0], null, pos + match[0].length); } } } @@ -125,7 +119,7 @@ Lexer.prototype._innerLexColor = function(pos) { var match; if ((match = input.match(cssColor))) { // If we look like a color, return a color - return new LexResult("color", match[0], pos + match[0].length); + return new Token(match[0], null, pos + match[0].length); } else { throw new ParseError("Invalid color", this, pos); } @@ -133,7 +127,7 @@ Lexer.prototype._innerLexColor = function(pos) { // A regex to match a dimension. Dimensions look like // "1.2em" or ".4pt" or "1 ex" -var sizeRegex = /^(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/; +var sizeRegex = /^(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/; /** * This function lexes a dimension. @@ -148,13 +142,13 @@ Lexer.prototype._innerLexSize = function(pos) { var match; if ((match = input.match(sizeRegex))) { - var unit = match[2]; + var unit = match[3]; // We only currently handle "em" and "ex" units if (unit !== "em" && unit !== "ex") { throw new ParseError("Invalid unit: '" + unit + "'", this, pos); } - return new LexResult("size", { - number: +match[1], + return new Token(match[0], { + number: +(match[1] + match[2]), unit: unit }, pos + match[0].length); } @@ -171,7 +165,7 @@ Lexer.prototype._innerLexWhitespace = function(pos) { var whitespace = input.match(whitespaceRegex)[0]; pos += whitespace.length; - return new LexResult("whitespace", whitespace, pos); + return new Token(whitespace, null, pos); }; /** diff --git a/src/Parser.js b/src/Parser.js index 865fb2b5a..65bbf9caa 100644 --- a/src/Parser.js +++ b/src/Parser.js @@ -69,7 +69,7 @@ function ParseResult(result, newPosition) { * An initial function (without its arguments), or an argument to a function. * The `result` argument should be a ParseResult. */ -function ParseFuncOrArgument(result, isFunction, allowedInText, numArgs, argTypes) { +function ParseFuncOrArgument(result, isFunction, allowedInText, numArgs, numOptionalArgs, argTypes) { this.result = result; // Is this a function (i.e. is it something defined in functions.js)? this.isFunction = isFunction; @@ -77,6 +77,8 @@ function ParseFuncOrArgument(result, isFunction, allowedInText, numArgs, argType this.allowedInText = allowedInText; // How many arguments? this.numArgs = numArgs; + // How many optional arguments? + this.numOptionalArgs = numOptionalArgs; // What types of arguments? this.argTypes = argTypes; } @@ -85,10 +87,10 @@ function ParseFuncOrArgument(result, isFunction, allowedInText, numArgs, argType * Checks a result to make sure it has the right type, and throws an * appropriate error otherwise. */ -Parser.prototype.expect = function(result, type) { - if (result.type !== type) { +Parser.prototype.expect = function(result, text) { + if (result.text !== text) { throw new ParseError( - "Expected '" + type + "', got '" + result.type + "'", + "Expected '" + text + "', got '" + result.text + "'", this.lexer, result.position ); } @@ -110,7 +112,7 @@ Parser.prototype.parse = function(input) { */ Parser.prototype.parseInput = function(pos, mode) { // Parse an expression - var expression = this.parseExpression(pos, mode); + var expression = this.parseExpression(pos, mode, false, null); // If we succeeded, make sure there's an EOF at the end var EOF = this.lexer.lex(expression.position, mode); this.expect(EOF, "EOF"); @@ -118,25 +120,37 @@ Parser.prototype.parseInput = function(pos, mode) { }; /** - * Handles a body of an expression. + * Parses an "expression", which is a list of atoms. + * + * @param {boolean} breakOnInfix Should the parsing stop when we hit infix + * nodes? This happens when functions have higher precendence + * than infix nodes in implicit parses. + * + * @param {?string} breakOnToken The token that the expression should end with, + * or `null` if something else should end the expression. + * + * @return {ParseResult} */ -Parser.prototype.handleExpressionBody = function(pos, mode, breakOnInfix) { +Parser.prototype.parseExpression = function(pos, mode, breakOnInfix, breakOnToken) { var body = []; - var atom; // Keep adding atoms to the body until we can't parse any more atoms (either // we reached the end, a }, or a \right) - while ((atom = this.parseAtom(pos, mode))) { + while (true) { + var lex = this.lexer.lex(pos, mode); + if (breakOnToken != null && lex.text === breakOnToken) { + break; + } + var atom = this.parseAtom(pos, mode); + if (!atom) { + break; + } if (breakOnInfix && atom.result.type === "infix") { break; - } else { - body.push(atom.result); - pos = atom.position; } + body.push(atom.result); + pos = atom.position; } - return { - body: this.handleInfixNodes(body, mode), - position: pos - }; + return new ParseResult(this.handleInfixNodes(body, mode), pos); }; /** @@ -191,16 +205,6 @@ Parser.prototype.handleInfixNodes = function (body, mode) { } }; -/** - * Parses an "expression", which is a list of atoms. - * - * @return {ParseResult} - */ -Parser.prototype.parseExpression = function(pos, mode) { - var body = this.handleExpressionBody(pos, mode); - return new ParseResult(body.body, body.position); -}; - // The greediness of a superscript or subscript var SUPSUB_GREEDINESS = 1; @@ -261,27 +265,27 @@ Parser.prototype.parseAtom = function(pos, mode) { var lex = this.lexer.lex(currPos, mode); var group; - if (lex.type === "^") { + if (lex.text === "^") { // We got a superscript start if (superscript) { throw new ParseError( "Double superscript", this.lexer, currPos); } var result = this.handleSupSubscript( - lex.position, mode, lex.type, "superscript"); + lex.position, mode, lex.text, "superscript"); currPos = result.position; superscript = result.result; - } else if (lex.type === "_") { + } else if (lex.text === "_") { // We got a subscript start if (subscript) { throw new ParseError( "Double subscript", this.lexer, currPos); } var result = this.handleSupSubscript( - lex.position, mode, lex.type, "subscript"); + lex.position, mode, lex.text, "subscript"); currPos = result.position; subscript = result.result; - } else if (lex.type === "'") { + } else if (lex.text === "'") { // We got a prime var prime = new ParseNode("textord", "\\prime", mode); @@ -289,7 +293,7 @@ Parser.prototype.parseAtom = function(pos, mode) { var primes = [prime]; currPos = lex.position; // Keep lexing tokens until we get something that's not a prime - while ((lex = this.lexer.lex(currPos, mode)).type === "'") { + while ((lex = this.lexer.lex(currPos, mode)).text === "'") { // For each one, add another prime to the list primes.push(prime); currPos = lex.position; @@ -354,7 +358,7 @@ Parser.prototype.parseImplicitGroup = function(pos, mode) { // Parse the entire left function (including the delimiter) var left = this.parseFunction(pos, mode); // Parse out the implicit body - var body = this.handleExpressionBody(left.position, mode); + var body = this.parseExpression(left.position, mode, false, "}"); // Check the next token var rightLex = this.parseSymbol(body.position, mode); @@ -364,7 +368,7 @@ Parser.prototype.parseImplicitGroup = function(pos, mode) { return new ParseResult( new ParseNode("leftright", { - body: body.body, + body: body.result, left: left.result.value.value, right: right.result.value.value }, mode), @@ -378,23 +382,23 @@ Parser.prototype.parseImplicitGroup = function(pos, mode) { return null; } else if (utils.contains(sizeFuncs, func)) { // If we see a sizing function, parse out the implict body - var body = this.handleExpressionBody(start.result.position, mode); + var body = this.parseExpression(start.result.position, mode, false, "}"); return new ParseResult( new ParseNode("sizing", { // Figure out what size to use based on the list of functions above size: "size" + (utils.indexOf(sizeFuncs, func) + 1), - value: body.body + value: body.result }, mode), body.position); } else if (utils.contains(styleFuncs, func)) { // If we see a styling function, parse out the implict body - var body = this.handleExpressionBody(start.result.position, mode, true); + var body = this.parseExpression(start.result.position, mode, true, "}"); return new ParseResult( new ParseNode("styling", { // Figure out what style to use by pulling out the style from // the function name style: func.slice(1, func.length - 5), - value: body.body + value: body.result }, mode), body.position); } else { @@ -422,22 +426,40 @@ Parser.prototype.parseFunction = function(pos, mode) { var newPos = baseGroup.result.position; var result; - if (baseGroup.numArgs > 0) { + + var totalArgs = baseGroup.numArgs + baseGroup.numOptionalArgs; + + if (totalArgs > 0) { var baseGreediness = functions.getGreediness(func); var args = [func]; var positions = [newPos]; - for (var i = 0; i < baseGroup.numArgs; i++) { + + for (var i = 0; i < totalArgs; i++) { var argType = baseGroup.argTypes && baseGroup.argTypes[i]; - if (argType) { - var arg = this.parseSpecialGroup(newPos, argType, mode); + var arg; + if (i < baseGroup.numOptionalArgs) { + if (argType) { + arg = this.parseSpecialGroup(newPos, argType, mode, true); + } else { + arg = this.parseOptionalGroup(newPos, mode); + } + if (!arg) { + args.push(null); + positions.push(newPos); + continue; + } } else { - var arg = this.parseGroup(newPos, mode); - } - if (!arg) { - throw new ParseError( - "Expected group after '" + baseGroup.result.result + - "'", - this.lexer, newPos); + if (argType) { + arg = this.parseSpecialGroup(newPos, argType, mode); + } else { + arg = this.parseGroup(newPos, mode); + } + if (!arg) { + throw new ParseError( + "Expected group after '" + baseGroup.result.result + + "'", + this.lexer, newPos); + } } var argNode; if (arg.numArgs > 0) { @@ -483,25 +505,39 @@ Parser.prototype.parseFunction = function(pos, mode) { * * @return {?ParseFuncOrArgument} */ -Parser.prototype.parseSpecialGroup = function(pos, mode, outerMode) { +Parser.prototype.parseSpecialGroup = function(pos, mode, outerMode, optional) { if (mode === "color" || mode === "size") { // color and size modes are special because they should have braces and // should only lex a single symbol inside var openBrace = this.lexer.lex(pos, outerMode); - this.expect(openBrace, "{"); + if (optional && openBrace.text !== "[") { + // optional arguments should return null if they don't exist + return null; + } + this.expect(openBrace, optional ? "[" : "{"); var inner = this.lexer.lex(openBrace.position, mode); + var data; + if (mode === "color") { + data = inner.text; + } else { + data = inner.data; + } var closeBrace = this.lexer.lex(inner.position, outerMode); - this.expect(closeBrace, "}"); + this.expect(closeBrace, optional ? "]" : "}"); return new ParseFuncOrArgument( new ParseResult( - new ParseNode("color", inner.text, outerMode), + new ParseNode(mode, data, outerMode), closeBrace.position), false); } else if (mode === "text") { // text mode is special because it should ignore the whitespace before // it var whitespace = this.lexer.lex(pos, "whitespace"); - return this.parseGroup(whitespace.position, mode); + pos = whitespace.position; + } + + if (optional) { + return this.parseOptionalGroup(pos, mode); } else { return this.parseGroup(pos, mode); } @@ -516,9 +552,9 @@ Parser.prototype.parseSpecialGroup = function(pos, mode, outerMode) { Parser.prototype.parseGroup = function(pos, mode) { var start = this.lexer.lex(pos, mode); // Try to parse an open brace - if (start.type === "{") { + if (start.text === "{") { // If we get a brace, parse an expression - var expression = this.parseExpression(start.position, mode); + var expression = this.parseExpression(start.position, mode, false, "}"); // Make sure we get a close brace var closeBrace = this.lexer.lex(expression.position, mode); this.expect(closeBrace, "}"); @@ -533,6 +569,31 @@ Parser.prototype.parseGroup = function(pos, mode) { } }; +/** + * Parses a group, which is an expression in brackets (like "[x+y]") + * + * @return {?ParseFuncOrArgument} + */ +Parser.prototype.parseOptionalGroup = function(pos, mode) { + var start = this.lexer.lex(pos, mode); + // Try to parse an open bracket + if (start.text === "[") { + // If we get a brace, parse an expression + var expression = this.parseExpression(start.position, mode, false, "]"); + // Make sure we get a close bracket + var closeBracket = this.lexer.lex(expression.position, mode); + this.expect(closeBracket, "]"); + return new ParseFuncOrArgument( + new ParseResult( + new ParseNode("ordgroup", expression.result, mode), + closeBracket.position), + false); + } else { + // Otherwise, return null, + return null; + } +}; + /** * Parse a single symbol out of the string. Here, we handle both the functions * we have defined, as well as the single character symbols @@ -542,9 +603,9 @@ Parser.prototype.parseGroup = function(pos, mode) { Parser.prototype.parseSymbol = function(pos, mode) { var nucleus = this.lexer.lex(pos, mode); - if (functions.funcs[nucleus.type]) { + if (functions.funcs[nucleus.text]) { // If there is a function with this name, we use its data - var func = functions.funcs[nucleus.type]; + var func = functions.funcs[nucleus.text]; // Here, we replace "original" argTypes with the current mode var argTypes = func.argTypes; @@ -558,8 +619,8 @@ Parser.prototype.parseSymbol = function(pos, mode) { } return new ParseFuncOrArgument( - new ParseResult(nucleus.type, nucleus.position), - true, func.allowedInText, func.numArgs, argTypes); + new ParseResult(nucleus.text, nucleus.position), + true, func.allowedInText, func.numArgs, func.numOptionalArgs, argTypes); } else if (symbols[mode][nucleus.text]) { // Otherwise if this is a no-argument function, find the type it // corresponds to in the symbols map diff --git a/src/buildTree.js b/src/buildTree.js index e20d23c9d..b249beacf 100644 --- a/src/buildTree.js +++ b/src/buildTree.js @@ -876,7 +876,15 @@ var groupTypes = { // Make an empty span for the rule var rule = makeSpan(["mord", "rule"], [], options.getColor()); - // Calculate the width and height of the rule, and account for units + // Calculate the shift, width, and height of the rule, and account for units + var shift = 0; + if (group.value.shift) { + shift = group.value.shift.number; + if (group.value.shift.unit === "ex") { + shift *= fontMetrics.metrics.xHeight; + } + } + var width = group.value.width.number; if (group.value.width.unit === "ex") { width *= fontMetrics.metrics.xHeight; @@ -889,16 +897,19 @@ var groupTypes = { // The sizes of rules are absolute, so make it larger if we are in a // smaller style. + shift /= options.style.sizeMultiplier; width /= options.style.sizeMultiplier; height /= options.style.sizeMultiplier; // Style the rule to the right size rule.style.borderRightWidth = width + "em"; rule.style.borderTopWidth = height + "em"; + rule.style.bottom = shift + "em"; // Record the height and width rule.width = width; - rule.height = height; + rule.height = height + shift; + rule.depth = -shift; return rule; }, diff --git a/src/functions.js b/src/functions.js index 0c28dcda1..7a659dc92 100644 --- a/src/functions.js +++ b/src/functions.js @@ -9,8 +9,9 @@ var ParseError = require("./ParseError"); * The data contains the following keys: * - numArgs: The number of arguments the function takes. * - argTypes: (optional) An array corresponding to each argument of the - * function, giving the type of argument that should be parsed. - * Valid types: + * function, giving the type of argument that should be parsed. Its + * length should be equal to `numArgs + numOptionalArgs`. Valid + * types: * - "size": A size-like thing, such as "1em" or "5ex" * - "color": An html color, like "#abc" or "blue" * - "original": The same type as the environment that the @@ -45,6 +46,10 @@ var ParseError = require("./ParseError"); * The default value is `1` * - allowedInText: (optional) Whether or not the function is allowed inside * text mode (default false) + * - numOptionalArgs: (optional) The number of optional arguments the function + * should parse. If the optional arguments aren't found, + * `null` will be passed to the handler in their place. + * (default 0) * - handler: The function that is called to handle this function and its * arguments. The arguments are: * - func: the text of the function @@ -65,7 +70,14 @@ var functions = { // A normal square root "\\sqrt": { numArgs: 1, - handler: function(func, body) { + numOptionalArgs: 1, + handler: function(func, optional, body, positions) { + if (optional != null) { + throw new ParseError( + "Optional arguments to \\sqrt aren't supported yet", + this.lexer, positions[1] - 1); + } + return { type: "sqrt", body: body @@ -132,10 +144,12 @@ var functions = { // A box of the width and height "\\rule": { numArgs: 2, - argTypes: ["size", "size"], - handler: function(func, width, height) { + numOptionalArgs: 1, + argTypes: ["size", "size", "size"], + handler: function(func, shift, width, height) { return { type: "rule", + shift: shift && shift.value, width: width.value, height: height.value }; @@ -448,6 +462,23 @@ var getGreediness = function(func) { } }; +// Set default values of functions +for (var f in functions) { + if (functions.hasOwnProperty(f)) { + var func = functions[f]; + + functions[f] = { + numArgs: func.numArgs, + argTypes: func.argTypes, + greediness: (func.greediness === undefined) ? 1 : func.greediness, + allowedInText: func.allowedInText ? func.allowedInText : false, + numOptionalArgs: (func.numOptionalArgs === undefined) ? 0 : + func.numOptionalArgs, + handler: func.handler + }; + } +} + module.exports = { funcs: functions, getGreediness: getGreediness diff --git a/static/katex.less b/static/katex.less index 669312ce8..fe18a0aff 100644 --- a/static/katex.less +++ b/static/katex.less @@ -286,6 +286,7 @@ .rule { display: inline-block; border-style: solid; + position: relative; } .overline { diff --git a/test/katex-spec.js b/test/katex-spec.js index 0f804f707..2239bbdbf 100644 --- a/test/katex-spec.js +++ b/test/katex-spec.js @@ -804,6 +804,15 @@ describe("A rule parser", function() { expect(hardNumberParse.value.width.number).toBeCloseTo(1.24); expect(hardNumberParse.value.height.number).toBeCloseTo(2.45); }); + + it("should parse negative sizes", function() { + expect("\\rule{-1em}{- 0.2em}").toParse(); + + var parse = parseTree("\\rule{-1em}{- 0.2em}")[0]; + + expect(parse.value.width.number).toBeCloseTo(-1); + expect(parse.value.height.number).toBeCloseTo(-0.2); + }); }); describe("A left/right parser", function() { @@ -1138,3 +1147,30 @@ describe("A parser error", function () { } }); }); + +describe("An optional argument parser", function() { + it("should not fail", function() { + // Note this doesn't actually make an optional argument, but still + // should work + expect("\\frac[1]{2}{3}").toParse(); + + expect("\\rule[0.2em]{1em}{1em}").toParse(); + }); + + it("should fail on sqrts for now", function() { + expect("\\sqrt[3]{2}").toNotParse(); + }); + + it("should work when the optional argument is missing", function() { + expect("\\sqrt{2}").toParse(); + expect("\\rule{1em}{2em}").toParse(); + }); + + it("should fail when the optional argument is malformed", function() { + expect("\\rule[1]{2em}{3em}").toNotParse(); + }); + + it("should not work if the optional argument isn't closed", function() { + expect("\\sqrt[").toNotParse(); + }); +});