Add some more symbols (#502)

This adds support for the following input sequences:

    -- --- ` ' `` '' \degree \pounds \maltese

resulting in – — ‘ ’ “ ” ° £ ✠ symbols already present in our fonts.

As part of this modification, the recognition of multiple dashes was moved
from the lexer to the parser.
This is neccessary since in math mode a sequence of hyphens is just a
sequence of minus signs.  Just like a pair of apostrophes in math mode is a
double prime not a right double quotation mark.
To make this easier, parseGroup and parseOptionalGroup have been merged.
This commit is contained in:
Martin von Gagern 2016-07-25 04:56:31 +02:00 committed by Kevin Barabash
parent befe1c1af7
commit 4a9c2acbf7
10 changed files with 73 additions and 44 deletions

View File

@ -63,7 +63,6 @@ Token.prototype.range = function(endToken, text) {
/* The following tokenRegex
* - matches typical whitespace (but not NBSP etc.) using its first group
* - matches symbol combinations which result in a single output character
* - does not match any control character \x00-\x1f except whitespace
* - does not match a bare backslash
* - matches any ASCII character except those just mentioned
@ -78,9 +77,8 @@ Token.prototype.range = function(endToken, text) {
* still reject the input.
*/
var tokenRegex = new RegExp(
"([ \r\n\t]+)|(" + // whitespace
"---?" + // special combinations
"|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
"([ \r\n\t]+)|" + // whitespace
"([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
"|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
"|\\\\(?:[a-zA-Z]+|[^\uD800-\uDFFF])" + // function name
")"

View File

@ -545,7 +545,7 @@ Parser.prototype.parseArguments = function(func, funcData) {
if (argType) {
arg = this.parseGroupOfType(argType, true);
} else {
arg = this.parseOptionalGroup();
arg = this.parseGroup(true);
}
if (!arg) {
args.push(null);
@ -623,12 +623,7 @@ Parser.prototype.parseGroupOfType = function(innerMode, optional) {
}
// By the time we get here, innerMode is one of "text" or "math".
// We switch the mode of the parser, recurse, then restore the old mode.
var res;
if (optional) {
res = this.parseOptionalGroup();
} else {
res = this.parseGroup();
}
var res = this.parseGroup(optional);
this.switchMode(outerMode);
return res;
};
@ -638,7 +633,7 @@ Parser.prototype.parseGroupOfType = function(innerMode, optional) {
* brace-enclosed tokens plus some position information.
*
* @param {string} modeName Used to describe the mode in error messages
* @param {boolean} optional Whether the group is optional or required
* @param {boolean=} optional Whether the group is optional or required
*/
Parser.prototype.parseStringGroup = function(modeName, optional) {
if (optional && this.nextToken.text !== "[") {
@ -707,53 +702,71 @@ Parser.prototype.parseSizeGroup = function(optional) {
};
/**
* Parses a group, which is either a single nucleus (like "x") or an expression
* in braces (like "{x+y}")
* If the argument is false or absent, this parses an ordinary group,
* which is either a single nucleus (like "x") or an expression
* in braces (like "{x+y}").
* If the argument is true, it parses either a bracket-delimited expression
* (like "[x+y]") or returns null to indicate the absence of a
* bracket-enclosed group.
*
* @param {boolean=} optional Whether the group is optional or required
* @return {?ParseFuncOrArgument}
*/
Parser.prototype.parseGroup = function() {
Parser.prototype.parseGroup = function(optional) {
var firstToken = this.nextToken;
// Try to parse an open brace
if (this.nextToken.text === "{") {
if (this.nextToken.text === (optional ? "[" : "{")) {
// If we get a brace, parse an expression
this.consume();
var expression = this.parseExpression(false);
var expression = this.parseExpression(false, optional ? "]" : null);
var lastToken = this.nextToken;
// Make sure we get a close brace
this.expect("}");
this.expect(optional ? "]" : "}");
if (this.mode === "text") {
this.formLigatures(expression);
}
return new ParseFuncOrArgument(
new ParseNode("ordgroup", expression, this.mode,
firstToken, lastToken),
false);
} else {
// Otherwise, just return a nucleus
return this.parseSymbol();
// Otherwise, just return a nucleus, or nothing for an optional group
return optional ? null : this.parseSymbol();
}
};
/**
* Parses a group, which is an expression in brackets (like "[x+y]")
* Form ligature-like combinations of characters for text mode.
* This includes inputs like "--", "---", "``" and "''".
* The result will simply replace multiple textord nodes with a single
* character in each value by a single textord node having multiple
* characters in its value. The representation is still ASCII source.
*
* @return {?ParseFuncOrArgument}
* @param {Array.<ParseNode>} group the nodes of this group,
* list will be moified in place
*/
Parser.prototype.parseOptionalGroup = function() {
var firstToken = this.nextToken;
// Try to parse an open bracket
if (this.nextToken.text === "[") {
// If we get a brace, parse an expression
this.consume();
var expression = this.parseExpression(false, "]");
var lastToken = this.nextToken;
// Make sure we get a close bracket
this.expect("]");
return new ParseFuncOrArgument(
new ParseNode("ordgroup", expression, this.mode,
firstToken, lastToken),
false);
} else {
// Otherwise, return null,
return null;
Parser.prototype.formLigatures = function(group) {
var i;
var n = group.length - 1;
for (i = 0; i < n; ++i) {
var a = group[i];
var v = a.value;
if (v === "-" && group[i + 1].value === "-") {
if (i + 1 < n && group[i + 2].value === "-") {
group.splice(i, 3, new ParseNode(
"textord", "---", "text", a, group[i + 2]));
n -= 2;
} else {
group.splice(i, 2, new ParseNode(
"textord", "--", "text", a, group[i + 1]));
n -= 1;
}
}
if ((v === "'" || v === "`") && group[i + 1].value === v) {
group.splice(i, 2, new ParseNode(
"textord", v + v, "text", a, group[i + 1]));
n -= 1;
}
}
};

View File

@ -23,9 +23,11 @@ var greekCapitals = [
"\\Omega",
];
var dotlessLetters = [
// The following have to be loaded from Main-Italic font, using class mainit
var mainitLetters = [
"\u0131", // dotless i, \imath
"\u0237", // dotless j, \jmath
"\u00a3", // \pounds
];
/**
@ -101,7 +103,7 @@ var mathit = function(value, mode, color, classes) {
if (/[0-9]/.test(value.charAt(0)) ||
// glyphs for \imath and \jmath do not exist in Math-Italic so we
// need to use Main-Italic instead
utils.contains(dotlessLetters, value) ||
utils.contains(mainitLetters, value) ||
utils.contains(greekCapitals, value)) {
return makeSymbol(
value, "Main-Italic", mode, color, classes.concat(["mainit"]));
@ -126,7 +128,7 @@ var makeOrd = function(group, options, type) {
var font = options.font;
if (font) {
if (font === "mathit" || utils.contains(dotlessLetters, value)) {
if (font === "mathit" || utils.contains(mainitLetters, value)) {
return mathit(value, mode, color, classes);
} else {
var fontName = fontMap[font].fontName;

View File

@ -589,6 +589,18 @@ defineSymbol(math, main, accent, "\u02d9", "\\dot");
defineSymbol(math, main, mathord, "\u0131", "\\imath");
defineSymbol(math, main, mathord, "\u0237", "\\jmath");
defineSymbol(text, main, textord, "\u2013", "--");
defineSymbol(text, main, textord, "\u2014", "---");
defineSymbol(text, main, textord, "\u2018", "`");
defineSymbol(text, main, textord, "\u2019", "'");
defineSymbol(text, main, textord, "\u201c", "``");
defineSymbol(text, main, textord, "\u201d", "''");
defineSymbol(math, main, textord, "\u00b0", "\\degree");
defineSymbol(text, main, textord, "\u00b0", "\\degree");
defineSymbol(math, main, mathord, "\u00a3", "\\pounds");
defineSymbol(math, ams, textord, "\u2720", "\\maltese");
defineSymbol(text, ams, textord, "\u2720", "\\maltese");
defineSymbol(text, main, spacing, "\u00a0", "\\ ");
defineSymbol(text, main, spacing, "\u00a0", " ");
defineSymbol(text, main, spacing, "\u00a0", "~");
@ -605,7 +617,7 @@ for (i = 0; i < mathTextSymbols.length; i++) {
}
// All of these are textords in text mode
var textSymbols = "0123456789`!@*()-=+[]'\";:?/.,";
var textSymbols = "0123456789!@*()-=+[]\";:?/.,";
for (i = 0; i < textSymbols.length; i++) {
ch = textSymbols.charAt(i);
defineSymbol(text, main, textord, ch, ch);

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.4 KiB

View File

@ -36,6 +36,7 @@ Cases: |
Colors:
tex: \blue{a}\color{#0f0}{b}\color{red}{c}
nolatex: different syntax and different scope
DashesAndQuotes: \text{``a'' b---c -- d----`e'-{-}-f}--``x''
DeepFontSizing:
tex: |
a^{\big| x^{\big(}}_{\Big\uparrow} +
@ -109,6 +110,9 @@ SupSubHorizSpacing: |
SupSubLeftAlignReset: |
\omega^8_{888} \quad \frac{1}{\hat{\omega}^{8}_{888}} \quad \displaystyle\sum_{\omega^{8}_{888}}
SupSubOffsets: \displaystyle \int_{2+3}x f^{2+3}+3\lim_{2+3+4+5}f
Symbols1: |
\maltese\degree\pounds\$
\text{\maltese\degree}
Text: \frac{a}{b}\text{c~ {ab} \ e}+fg
UnsupportedCmds:
tex: \err\,\frac\fracerr3\,2^\superr_\suberr\,\sqrt\sqrterr

View File

@ -1,6 +1,6 @@
\documentclass[10pt]{article}
\usepackage{amsmath,amssymb}
\usepackage{amsmath,amssymb,textcomp,gensymb}
\usepackage[mathscr]{eucal}
\usepackage{eufrak}
\usepackage[papersize={133pt,100pt},margin=0.5pt]{geometry}