
Summary: Make all of the parsing functions keep track of whether they are parsing in math mode or text mode. Then, add a separate lexing function to lex text mode, which is different than the normal mode because it does weird things with spacing and allows a different set of characters. Test Plan: - See that the normal tests work - See that the huxley screenshot looks reasonable - See that none of the other huxley screenshots changed Reviewers: alpert Reviewed By: alpert Differential Revision: http://phabricator.khanacademy.org/D7578
94 lines
2.5 KiB
JavaScript
94 lines
2.5 KiB
JavaScript
var ParseError = require("./ParseError");
|
|
|
|
// The main lexer class
|
|
function Lexer(input) {
|
|
this._input = input;
|
|
};
|
|
|
|
// The result of a single lex
|
|
function LexResult(type, text, position) {
|
|
this.type = type;
|
|
this.text = text;
|
|
this.position = position;
|
|
}
|
|
|
|
// "normal" types of tokens
|
|
var mathNormals = [
|
|
[/^[/|@."`0-9]/, "textord"],
|
|
[/^[a-zA-Z]/, "mathord"],
|
|
[/^[*+-]/, "bin"],
|
|
[/^[=<>:]/, "rel"],
|
|
[/^[,;]/, "punct"],
|
|
[/^'/, "'"],
|
|
[/^\^/, "^"],
|
|
[/^_/, "_"],
|
|
[/^{/, "{"],
|
|
[/^}/, "}"],
|
|
[/^[(\[]/, "open"],
|
|
[/^[)\]?!]/, "close"]
|
|
];
|
|
|
|
var textNormals = [
|
|
[/^[a-zA-Z0-9`!@*()-=+\[\]'";:?\/.,]/, "textord"],
|
|
[/^{/, "{"],
|
|
[/^}/, "}"]
|
|
];
|
|
|
|
// Build a regex to easily parse the functions
|
|
var anyFunc = /^\\(?:[a-zA-Z]+|.)/;
|
|
|
|
Lexer.prototype._innerLex = function(pos, normals, ignoreWhitespace) {
|
|
var input = this._input.slice(pos);
|
|
|
|
// Get rid of whitespace
|
|
if (ignoreWhitespace) {
|
|
var whitespace = input.match(/^\s*/)[0];
|
|
pos += whitespace.length;
|
|
input = input.slice(whitespace.length);
|
|
} else {
|
|
// Do the funky concatenation of whitespace
|
|
var whitespace = input.match(/^( +|\\ +)/);
|
|
if (whitespace !== null) {
|
|
return new LexResult(" ", " ", pos + whitespace[0].length);
|
|
}
|
|
}
|
|
|
|
// If there's no more input to parse, return an EOF token
|
|
if (input.length === 0) {
|
|
return new LexResult("EOF", null, pos);
|
|
}
|
|
|
|
var match;
|
|
if ((match = input.match(anyFunc))) {
|
|
// If we match one of the tokens, extract the type
|
|
return new LexResult(match[0], match[0], pos + match[0].length);
|
|
} else {
|
|
// Otherwise, we look through the normal token regexes and see if it's
|
|
// one of them.
|
|
for (var i = 0; i < normals.length; i++) {
|
|
var normal = normals[i];
|
|
|
|
if ((match = input.match(normal[0]))) {
|
|
// If it is, return it
|
|
return new LexResult(
|
|
normal[1], match[0], pos + match[0].length);
|
|
}
|
|
}
|
|
}
|
|
|
|
// We didn't match any of the tokens, so throw an error.
|
|
throw new ParseError("Unexpected character: '" + input[0] +
|
|
"' at position " + pos);
|
|
}
|
|
|
|
// Lex a single token
|
|
Lexer.prototype.lex = function(pos, mode) {
|
|
if (mode === "math") {
|
|
return this._innerLex(pos, mathNormals, true);
|
|
} else if (mode === "text") {
|
|
return this._innerLex(pos, textNormals, false);
|
|
}
|
|
};
|
|
|
|
module.exports = Lexer;
|