Further improvements to the documentation search.

Revise the subword matching to have three different levels for full
match (= a permutation of the input), a generic match for all parts, and
everything else.
This commit is contained in:
Eli Barzilay 2012-06-14 13:48:05 -04:00
parent ecc43ff104
commit 6b2b784e79

View File

@ -249,10 +249,14 @@ function AdjustResultsNum() {
// Terms are compared using Compare(pat,str), which returns one of several
// constants, in increasing order of success:
// - C_fail: no match
// - C_words: all of the "subwords" in pat matched, where a word is an
// - C_words1: all of the "subwords" in pat matched, where a word is an
// alphanumeric or a punctuation substring (for example, "foo-bar!!" has
// these words: "foo", "-", "bar", "!!"), and a match means a prefix of a
// subword in str matched one of pat's subwords
// - C_words2: same, but all of the subwords in str were matched (so both str
// and pat have the same number of subwords)
// - C_words3: same, but all matches were complete (so str is a permutation of
// pat)
// - C_match: pat matched somewhere in str
// - C_prefix: pat is a prefix of str
// - C_exact: pat is equal to str
@ -265,11 +269,13 @@ function AdjustResultsNum() {
// module that is exactly `foo', but it will return C_exact which means that it
// doesn't change whether the match is considered exact or not).
var C_fail = 0, C_min = 0,
C_words = 1,
C_match = 2,
C_prefix = 3,
C_exact = 4,
C_rexact = 5, C_max = 5;
C_words1 = 1,
C_words2 = 2,
C_words3 = 3,
C_match = 4,
C_prefix = 5,
C_exact = 6,
C_rexact = 7, C_max = 7;
function Compare(pat, str) {
var i = str.indexOf(pat);
@ -278,14 +284,6 @@ function Compare(pat, str) {
if (pat.length == str.length) return C_rexact;
return C_prefix;
}
function CompareRx(pat, str) {
if (!(pat instanceof RegExp)) return Compare(pat,str);
var r = str.match(pat);
if (r == null || r.length == 0) return C_fail;
if (r[0] == str) return C_rexact;
if (str.indexOf(r[0]) == 0) return C_prefix;
return C_match;
}
function MaxCompares(pat, strs) {
var r = C_min, c;
for (var i=0; i<strs.length; i++) {
@ -294,14 +292,76 @@ function MaxCompares(pat, strs) {
}
return r;
}
function MinComparesRx(pats, str) {
var r = C_max, c;
for (var i=0; i<pats.length; i++) {
c = CompareRx(pats[i],str);
if (c < r) { if (c <= C_min) return c; else r = c; }
}
return r;
// Utilities for dealing with "subword" searches
function SortByLength(x, y) {
// use this to sort with descending length and otherwise lexicographically
var xlen = x.length, ylen = y.length;
if (xlen != ylen) return ylen - xlen;
else if (x < y) return -1;
else if (x > y) return +1;
else return 0;
}
function CompileWordCompare(term) {
var words = term.split(/\b/).sort(SortByLength);
var word_num = words.length;
var is_word = new Array(word_num);
for (var i=0; i<word_num; i++) is_word[i] = (words[i].search(/\w/) >= 0);
return function (str) {
var str_words = str.split(/\b/).sort(SortByLength);
var str_word_num = str_words.length;
if (str_word_num < word_num) return C_fail; // can't work
var r = C_words3;
for (var i=0; i<word_num; i++) {
var word = words[i];
var found = -1;
for (var j=0; j<str_word_num; j++) {
var str_word = str_words[j];
if (str_word != null) {
if (word == str_word) {
found = j;
break;
} else {
var k = str_word.indexOf(word);
if (k == 0 || (k > 0 && !is_word[i])) {
found = j;
r = C_words2;
break;
}
}
}
}
if (found >= 0) str_words[found] = null;
else return C_fail;
}
// either C_words3 for a complete permutation, or C_words2 when all of the
// subwords in str were matched
if (word_num == str_word_num) return r;
// otherwise return C_words1 regardless of whether all of the words in term
// matched exactly words in str
else return C_words1;
}
}
// Tests:
// console.log("testing...");
// function t(x,y,e) {
// var r = CompileWordCompare(x)(y);
// if (e != r) {
// console.log('CompileWordCompare("'+x+'")("'+y+'")'+
// " => "+r+" but expected "+e);
// }
// }
// t("x-y","x-y",C_words3);
// t("x-y","y-x",C_words3);
// t("x-y","y--x",C_words2);
// t("x-y","y=-x",C_words2);
// t("xx-yy","x-y",C_fail);
// t("xx-yy","xxx-yyy",C_words2);
// t("x-y-x","xxx-yyy",C_fail);
// t("x-y-x","xxx-yyy-xxx",C_words2);
// t("x-y-x","yyy-xxx-xxx",C_words2);
// t("x-y-xx","xx-y-xxx",C_words2);
// console.log("done");
function NormalizeSpaces(str) {
// use single spaces first, then trim edge spaces
@ -371,18 +431,13 @@ function CompileTerm(term) {
}
/* a case for "Q" is not needed -- same as the default case below */
default:
var words = term.split(/\b/);
for (var i=0; i<words.length; i++)
if (words[i].search(/^\w/) >= 0) words[i] = new RegExp("\\b"+words[i]);
// (note: seems like removing duplicates is not important since search
// strings will not have them, except for dashes in things like x-y-z)
var compare_words = CompileWordCompare(term);
return function(x) {
var r = Compare(term,x[0]);
// only bindings can be used for rexact matches
if (r >= C_rexact) return (x[3] ? r : C_exact);
if (r > C_words) return r;
if (MinComparesRx(words,x[0]) <= C_fail) return r;
return C_words;
if (r > C_words3) return r;
else return compare_words(x[0]);
}
}
}