Don't slice in lexer
Summary: Theoretically this allocates way less. In practice it seems to be exactly the same speed. Test Plan: make test Reviewers: emily Reviewed By: emily Differential Revision: https://phabricator.khanacademy.org/D16621
This commit is contained in:
parent
b2fbd08871
commit
0f6530096b
|
@ -25,5 +25,8 @@
|
||||||
"bin": "cli.js",
|
"bin": "cli.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "make lint test"
|
"test": "make lint test"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"match-at": "^0.1.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
72
src/Lexer.js
72
src/Lexer.js
|
@ -11,6 +11,8 @@
|
||||||
* kinds.
|
* kinds.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
var matchAt = require("match-at");
|
||||||
|
|
||||||
var ParseError = require("./ParseError");
|
var ParseError = require("./ParseError");
|
||||||
|
|
||||||
// The main lexer class
|
// The main lexer class
|
||||||
|
@ -28,31 +30,31 @@ function Token(text, data, position) {
|
||||||
// "normal" types of tokens. These are tokens which can be matched by a simple
|
// "normal" types of tokens. These are tokens which can be matched by a simple
|
||||||
// regex
|
// regex
|
||||||
var mathNormals = [
|
var mathNormals = [
|
||||||
/^[/|@.""`0-9a-zA-Z]/, // ords
|
/[/|@.""`0-9a-zA-Z]/, // ords
|
||||||
/^[*+-]/, // bins
|
/[*+-]/, // bins
|
||||||
/^[=<>:]/, // rels
|
/[=<>:]/, // rels
|
||||||
/^[,;]/, // punctuation
|
/[,;]/, // punctuation
|
||||||
/^['\^_{}]/, // misc
|
/['\^_{}]/, // misc
|
||||||
/^[(\[]/, // opens
|
/[(\[]/, // opens
|
||||||
/^[)\]?!]/, // closes
|
/[)\]?!]/, // closes
|
||||||
/^~/ // spacing
|
/~/ // spacing
|
||||||
];
|
];
|
||||||
|
|
||||||
// These are "normal" tokens like above, but should instead be parsed in text
|
// These are "normal" tokens like above, but should instead be parsed in text
|
||||||
// mode.
|
// mode.
|
||||||
var textNormals = [
|
var textNormals = [
|
||||||
/^[a-zA-Z0-9`!@*()-=+\[\]'";:?\/.,]/, // ords
|
/[a-zA-Z0-9`!@*()-=+\[\]'";:?\/.,]/, // ords
|
||||||
/^[{}]/, // grouping
|
/[{}]/, // grouping
|
||||||
/^~/ // spacing
|
/~/ // spacing
|
||||||
];
|
];
|
||||||
|
|
||||||
// Regexes for matching whitespace
|
// Regexes for matching whitespace
|
||||||
var whitespaceRegex = /^\s*/;
|
var whitespaceRegex = /\s*/;
|
||||||
var whitespaceConcatRegex = /^( +|\\ +)/;
|
var whitespaceConcatRegex = / +|\\ +/;
|
||||||
|
|
||||||
// This regex matches any other TeX function, which is a backslash followed by a
|
// This regex matches any other TeX function, which is a backslash followed by a
|
||||||
// word or a single symbol
|
// word or a single symbol
|
||||||
var anyFunc = /^\\(?:[a-zA-Z]+|.)/;
|
var anyFunc = /\\(?:[a-zA-Z]+|.)/;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This function lexes a single normal token. It takes a position, a list of
|
* This function lexes a single normal token. It takes a position, a list of
|
||||||
|
@ -60,29 +62,28 @@ var anyFunc = /^\\(?:[a-zA-Z]+|.)/;
|
||||||
* not.
|
* not.
|
||||||
*/
|
*/
|
||||||
Lexer.prototype._innerLex = function(pos, normals, ignoreWhitespace) {
|
Lexer.prototype._innerLex = function(pos, normals, ignoreWhitespace) {
|
||||||
var input = this._input.slice(pos);
|
var input = this._input;
|
||||||
var whitespace;
|
var whitespace;
|
||||||
|
|
||||||
if (ignoreWhitespace) {
|
if (ignoreWhitespace) {
|
||||||
// Get rid of whitespace.
|
// Get rid of whitespace.
|
||||||
whitespace = input.match(whitespaceRegex)[0];
|
whitespace = matchAt(whitespaceRegex, input, pos)[0];
|
||||||
pos += whitespace.length;
|
pos += whitespace.length;
|
||||||
input = input.slice(whitespace.length);
|
|
||||||
} else {
|
} else {
|
||||||
// Do the funky concatenation of whitespace that happens in text mode.
|
// Do the funky concatenation of whitespace that happens in text mode.
|
||||||
whitespace = input.match(whitespaceConcatRegex);
|
whitespace = matchAt(whitespaceConcatRegex, input, pos);
|
||||||
if (whitespace !== null) {
|
if (whitespace !== null) {
|
||||||
return new Token(" ", null, pos + whitespace[0].length);
|
return new Token(" ", null, pos + whitespace[0].length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If there's no more input to parse, return an EOF token
|
// If there's no more input to parse, return an EOF token
|
||||||
if (input.length === 0) {
|
if (pos === input.length) {
|
||||||
return new Token("EOF", null, pos);
|
return new Token("EOF", null, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
var match;
|
var match;
|
||||||
if ((match = input.match(anyFunc))) {
|
if ((match = matchAt(anyFunc, input, pos))) {
|
||||||
// If we match a function token, return it
|
// If we match a function token, return it
|
||||||
return new Token(match[0], null, pos + match[0].length);
|
return new Token(match[0], null, pos + match[0].length);
|
||||||
} else {
|
} else {
|
||||||
|
@ -91,7 +92,7 @@ Lexer.prototype._innerLex = function(pos, normals, ignoreWhitespace) {
|
||||||
for (var i = 0; i < normals.length; i++) {
|
for (var i = 0; i < normals.length; i++) {
|
||||||
var normal = normals[i];
|
var normal = normals[i];
|
||||||
|
|
||||||
if ((match = input.match(normal))) {
|
if ((match = matchAt(normal, input, pos))) {
|
||||||
// If it is, return it
|
// If it is, return it
|
||||||
return new Token(
|
return new Token(
|
||||||
match[0], null, pos + match[0].length);
|
match[0], null, pos + match[0].length);
|
||||||
|
@ -99,26 +100,26 @@ Lexer.prototype._innerLex = function(pos, normals, ignoreWhitespace) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new ParseError("Unexpected character: '" + input[0] +
|
throw new ParseError(
|
||||||
"'", this, pos);
|
"Unexpected character: '" + input[pos] + "'",
|
||||||
|
this, pos);
|
||||||
};
|
};
|
||||||
|
|
||||||
// A regex to match a CSS color (like #ffffff or BlueViolet)
|
// A regex to match a CSS color (like #ffffff or BlueViolet)
|
||||||
var cssColor = /^(#[a-z0-9]+|[a-z]+)/i;
|
var cssColor = /#[a-z0-9]+|[a-z]+/i;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This function lexes a CSS color.
|
* This function lexes a CSS color.
|
||||||
*/
|
*/
|
||||||
Lexer.prototype._innerLexColor = function(pos) {
|
Lexer.prototype._innerLexColor = function(pos) {
|
||||||
var input = this._input.slice(pos);
|
var input = this._input;
|
||||||
|
|
||||||
// Ignore whitespace
|
// Ignore whitespace
|
||||||
var whitespace = input.match(whitespaceRegex)[0];
|
var whitespace = matchAt(whitespaceRegex, input, pos)[0];
|
||||||
pos += whitespace.length;
|
pos += whitespace.length;
|
||||||
input = input.slice(whitespace.length);
|
|
||||||
|
|
||||||
var match;
|
var match;
|
||||||
if ((match = input.match(cssColor))) {
|
if ((match = matchAt(cssColor, input, pos))) {
|
||||||
// If we look like a color, return a color
|
// If we look like a color, return a color
|
||||||
return new Token(match[0], null, pos + match[0].length);
|
return new Token(match[0], null, pos + match[0].length);
|
||||||
} else {
|
} else {
|
||||||
|
@ -128,21 +129,20 @@ Lexer.prototype._innerLexColor = function(pos) {
|
||||||
|
|
||||||
// A regex to match a dimension. Dimensions look like
|
// A regex to match a dimension. Dimensions look like
|
||||||
// "1.2em" or ".4pt" or "1 ex"
|
// "1.2em" or ".4pt" or "1 ex"
|
||||||
var sizeRegex = /^(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/;
|
var sizeRegex = /(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This function lexes a dimension.
|
* This function lexes a dimension.
|
||||||
*/
|
*/
|
||||||
Lexer.prototype._innerLexSize = function(pos) {
|
Lexer.prototype._innerLexSize = function(pos) {
|
||||||
var input = this._input.slice(pos);
|
var input = this._input;
|
||||||
|
|
||||||
// Ignore whitespace
|
// Ignore whitespace
|
||||||
var whitespace = input.match(whitespaceRegex)[0];
|
var whitespace = matchAt(whitespaceRegex, input, pos)[0];
|
||||||
pos += whitespace.length;
|
pos += whitespace.length;
|
||||||
input = input.slice(whitespace.length);
|
|
||||||
|
|
||||||
var match;
|
var match;
|
||||||
if ((match = input.match(sizeRegex))) {
|
if ((match = matchAt(sizeRegex, input, pos))) {
|
||||||
var unit = match[3];
|
var unit = match[3];
|
||||||
// We only currently handle "em" and "ex" units
|
// We only currently handle "em" and "ex" units
|
||||||
if (unit !== "em" && unit !== "ex") {
|
if (unit !== "em" && unit !== "ex") {
|
||||||
|
@ -161,12 +161,12 @@ Lexer.prototype._innerLexSize = function(pos) {
|
||||||
* This function lexes a string of whitespace.
|
* This function lexes a string of whitespace.
|
||||||
*/
|
*/
|
||||||
Lexer.prototype._innerLexWhitespace = function(pos) {
|
Lexer.prototype._innerLexWhitespace = function(pos) {
|
||||||
var input = this._input.slice(pos);
|
var input = this._input;
|
||||||
|
|
||||||
var whitespace = input.match(whitespaceRegex)[0];
|
var whitespace = matchAt(whitespaceRegex, input, pos)[0];
|
||||||
pos += whitespace.length;
|
pos += whitespace.length;
|
||||||
|
|
||||||
return new Token(whitespace, null, pos);
|
return new Token(whitespace[0], null, pos);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue
Block a user