zotero/chrome/content/zotero/xpcom/xregexp/addons/matchrecursive.js
2012-12-19 06:33:13 -06:00

191 lines
7.0 KiB
JavaScript

/*!
* XRegExp.matchRecursive 3.0.0-pre
* <http://xregexp.com/>
* Steven Levithan © 2009-2012 MIT License
*/
(function(XRegExp) {
'use strict';
/**
* Returns a match detail object composed of the provided values.
* @private
*/
function row(name, value, start, end) {
return {
name: name,
value: value,
start: start,
end: end
};
}
/**
* Returns an array of match strings between outermost left and right delimiters, or an array of
* objects with detailed match parts and position data. An error is thrown if delimiters are
* unbalanced within the data.
* @memberOf XRegExp
* @param {String} str String to search.
* @param {String} left Left delimiter as an XRegExp pattern.
* @param {String} right Right delimiter as an XRegExp pattern.
* @param {String} [flags] Any native or XRegExp flags, used for the left and right delimiters.
* @param {Object} [options] Lets you specify `valueNames` and `escapeChar` options.
* @returns {Array} Array of matches, or an empty array.
* @example
*
* // Basic usage
* var str = '(t((e))s)t()(ing)';
* XRegExp.matchRecursive(str, '\\(', '\\)', 'g');
* // -> ['t((e))s', '', 'ing']
*
* // Extended information mode with valueNames
* str = 'Here is <div> <div>an</div></div> example';
* XRegExp.matchRecursive(str, '<div\\s*>', '</div>', 'gi', {
* valueNames: ['between', 'left', 'match', 'right']
* });
* // -> [
* // {name: 'between', value: 'Here is ', start: 0, end: 8},
* // {name: 'left', value: '<div>', start: 8, end: 13},
* // {name: 'match', value: ' <div>an</div>', start: 13, end: 27},
* // {name: 'right', value: '</div>', start: 27, end: 33},
* // {name: 'between', value: ' example', start: 33, end: 41}
* // ]
*
* // Omitting unneeded parts with null valueNames, and using escapeChar
* str = '...{1}\\{{function(x,y){return y+x;}}';
* XRegExp.matchRecursive(str, '{', '}', 'g', {
* valueNames: ['literal', null, 'value', null],
* escapeChar: '\\'
* });
* // -> [
* // {name: 'literal', value: '...', start: 0, end: 3},
* // {name: 'value', value: '1', start: 4, end: 5},
* // {name: 'literal', value: '\\{', start: 6, end: 8},
* // {name: 'value', value: 'function(x,y){return y+x;}', start: 9, end: 35}
* // ]
*
* // Sticky mode via flag y
* str = '<1><<<2>>><3>4<5>';
* XRegExp.matchRecursive(str, '<', '>', 'gy');
* // -> ['1', '<<2>>', '3']
*/
XRegExp.matchRecursive = function(str, left, right, flags, options) {
flags = flags || '';
options = options || {};
var global = flags.indexOf('g') > -1,
sticky = flags.indexOf('y') > -1,
// Flag `y` is controlled internally
basicFlags = flags.replace(/y/g, ''),
escapeChar = options.escapeChar,
vN = options.valueNames,
output = [],
openTokens = 0,
delimStart = 0,
delimEnd = 0,
lastOuterEnd = 0,
outerStart,
innerStart,
leftMatch,
rightMatch,
esc;
left = XRegExp(left, basicFlags);
right = XRegExp(right, basicFlags);
if (escapeChar) {
if (escapeChar.length > 1) {
throw new Error('Cannot use more than one escape character');
}
escapeChar = XRegExp.escape(escapeChar);
// Using `XRegExp.union` safely rewrites backreferences in `left` and `right`
esc = new RegExp(
'(?:' + escapeChar + '[\\S\\s]|(?:(?!' +
XRegExp.union([left, right]).source +
')[^' + escapeChar + '])+)+',
// Flags `gy` not needed here
flags.replace(/[^im]+/g, '')
);
}
while (true) {
// If using an escape character, advance to the delimiter's next starting position,
// skipping any escaped characters in between
if (escapeChar) {
delimEnd += (XRegExp.exec(str, esc, delimEnd, 'sticky') || [''])[0].length;
}
leftMatch = XRegExp.exec(str, left, delimEnd);
rightMatch = XRegExp.exec(str, right, delimEnd);
// Keep the leftmost match only
if (leftMatch && rightMatch) {
if (leftMatch.index <= rightMatch.index) {
rightMatch = null;
} else {
leftMatch = null;
}
}
/* Paths (LM: leftMatch, RM: rightMatch, OT: openTokens):
* LM | RM | OT | Result
* 1 | 0 | 1 | loop
* 1 | 0 | 0 | loop
* 0 | 1 | 1 | loop
* 0 | 1 | 0 | throw
* 0 | 0 | 1 | throw
* 0 | 0 | 0 | break
* Doesn't include the sticky mode special case. The loop ends after the first
* completed match if not `global`.
*/
if (leftMatch || rightMatch) {
delimStart = (leftMatch || rightMatch).index;
delimEnd = delimStart + (leftMatch || rightMatch)[0].length;
} else if (!openTokens) {
break;
}
if (sticky && !openTokens && delimStart > lastOuterEnd) {
break;
}
if (leftMatch) {
if (!openTokens) {
outerStart = delimStart;
innerStart = delimEnd;
}
++openTokens;
} else if (rightMatch && openTokens) {
if (!--openTokens) {
if (vN) {
if (vN[0] && outerStart > lastOuterEnd) {
output.push(row(vN[0], str.slice(lastOuterEnd, outerStart), lastOuterEnd, outerStart));
}
if (vN[1]) {
output.push(row(vN[1], str.slice(outerStart, innerStart), outerStart, innerStart));
}
if (vN[2]) {
output.push(row(vN[2], str.slice(innerStart, delimStart), innerStart, delimStart));
}
if (vN[3]) {
output.push(row(vN[3], str.slice(delimStart, delimEnd), delimStart, delimEnd));
}
} else {
output.push(str.slice(innerStart, delimStart));
}
lastOuterEnd = delimEnd;
if (!global) {
break;
}
}
} else {
throw new Error('Unbalanced delimiter found in string');
}
// If the delimiter matched an empty string, avoid an infinite loop
if (delimStart === delimEnd) {
++delimEnd;
}
}
if (global && !sticky && vN && vN[0] && str.length > lastOuterEnd) {
output.push(row(vN[0], str.slice(lastOuterEnd), lastOuterEnd, str.length));
}
return output;
};
}(XRegExp));