Further improvements to the documentation search.
Revise the subword matching to have three different levels for full match (= a permutation of the input), a generic match for all parts, and everything else.
This commit is contained in:
parent
ecc43ff104
commit
6b2b784e79
|
@ -249,10 +249,14 @@ function AdjustResultsNum() {
|
|||
// Terms are compared using Compare(pat,str), which returns one of several
|
||||
// constants, in increasing order of success:
|
||||
// - C_fail: no match
|
||||
// - C_words: all of the "subwords" in pat matched, where a word is an
|
||||
// - C_words1: all of the "subwords" in pat matched, where a word is an
|
||||
// alphanumeric or a punctuation substring (for example, "foo-bar!!" has
|
||||
// these words: "foo", "-", "bar", "!!"), and a match means a prefix of a
|
||||
// subword in str matched one of pat's subwords
|
||||
// - C_words2: same, but all of the subwords in str were matched (so both str
|
||||
// and pat have the same number of subwords)
|
||||
// - C_words3: same, but all matches were complete (so str is a permutation of
|
||||
// pat)
|
||||
// - C_match: pat matched somewhere in str
|
||||
// - C_prefix: pat is a prefix of str
|
||||
// - C_exact: pat is equal to str
|
||||
|
@ -265,11 +269,13 @@ function AdjustResultsNum() {
|
|||
// module that is exactly `foo', but it will return C_exact which means that it
|
||||
// doesn't change whether the match is considered exact or not).
|
||||
var C_fail = 0, C_min = 0,
|
||||
C_words = 1,
|
||||
C_match = 2,
|
||||
C_prefix = 3,
|
||||
C_exact = 4,
|
||||
C_rexact = 5, C_max = 5;
|
||||
C_words1 = 1,
|
||||
C_words2 = 2,
|
||||
C_words3 = 3,
|
||||
C_match = 4,
|
||||
C_prefix = 5,
|
||||
C_exact = 6,
|
||||
C_rexact = 7, C_max = 7;
|
||||
|
||||
function Compare(pat, str) {
|
||||
var i = str.indexOf(pat);
|
||||
|
@ -278,14 +284,6 @@ function Compare(pat, str) {
|
|||
if (pat.length == str.length) return C_rexact;
|
||||
return C_prefix;
|
||||
}
|
||||
function CompareRx(pat, str) {
|
||||
if (!(pat instanceof RegExp)) return Compare(pat,str);
|
||||
var r = str.match(pat);
|
||||
if (r == null || r.length == 0) return C_fail;
|
||||
if (r[0] == str) return C_rexact;
|
||||
if (str.indexOf(r[0]) == 0) return C_prefix;
|
||||
return C_match;
|
||||
}
|
||||
function MaxCompares(pat, strs) {
|
||||
var r = C_min, c;
|
||||
for (var i=0; i<strs.length; i++) {
|
||||
|
@ -294,14 +292,76 @@ function MaxCompares(pat, strs) {
|
|||
}
|
||||
return r;
|
||||
}
|
||||
function MinComparesRx(pats, str) {
|
||||
var r = C_max, c;
|
||||
for (var i=0; i<pats.length; i++) {
|
||||
c = CompareRx(pats[i],str);
|
||||
if (c < r) { if (c <= C_min) return c; else r = c; }
|
||||
}
|
||||
return r;
|
||||
|
||||
// Utilities for dealing with "subword" searches
|
||||
function SortByLength(x, y) {
|
||||
// use this to sort with descending length and otherwise lexicographically
|
||||
var xlen = x.length, ylen = y.length;
|
||||
if (xlen != ylen) return ylen - xlen;
|
||||
else if (x < y) return -1;
|
||||
else if (x > y) return +1;
|
||||
else return 0;
|
||||
}
|
||||
function CompileWordCompare(term) {
|
||||
var words = term.split(/\b/).sort(SortByLength);
|
||||
var word_num = words.length;
|
||||
var is_word = new Array(word_num);
|
||||
for (var i=0; i<word_num; i++) is_word[i] = (words[i].search(/\w/) >= 0);
|
||||
return function (str) {
|
||||
var str_words = str.split(/\b/).sort(SortByLength);
|
||||
var str_word_num = str_words.length;
|
||||
if (str_word_num < word_num) return C_fail; // can't work
|
||||
var r = C_words3;
|
||||
for (var i=0; i<word_num; i++) {
|
||||
var word = words[i];
|
||||
var found = -1;
|
||||
for (var j=0; j<str_word_num; j++) {
|
||||
var str_word = str_words[j];
|
||||
if (str_word != null) {
|
||||
if (word == str_word) {
|
||||
found = j;
|
||||
break;
|
||||
} else {
|
||||
var k = str_word.indexOf(word);
|
||||
if (k == 0 || (k > 0 && !is_word[i])) {
|
||||
found = j;
|
||||
r = C_words2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (found >= 0) str_words[found] = null;
|
||||
else return C_fail;
|
||||
}
|
||||
// either C_words3 for a complete permutation, or C_words2 when all of the
|
||||
// subwords in str were matched
|
||||
if (word_num == str_word_num) return r;
|
||||
// otherwise return C_words1 regardless of whether all of the words in term
|
||||
// matched exactly words in str
|
||||
else return C_words1;
|
||||
}
|
||||
}
|
||||
// Tests:
|
||||
// console.log("testing...");
|
||||
// function t(x,y,e) {
|
||||
// var r = CompileWordCompare(x)(y);
|
||||
// if (e != r) {
|
||||
// console.log('CompileWordCompare("'+x+'")("'+y+'")'+
|
||||
// " => "+r+" but expected "+e);
|
||||
// }
|
||||
// }
|
||||
// t("x-y","x-y",C_words3);
|
||||
// t("x-y","y-x",C_words3);
|
||||
// t("x-y","y--x",C_words2);
|
||||
// t("x-y","y=-x",C_words2);
|
||||
// t("xx-yy","x-y",C_fail);
|
||||
// t("xx-yy","xxx-yyy",C_words2);
|
||||
// t("x-y-x","xxx-yyy",C_fail);
|
||||
// t("x-y-x","xxx-yyy-xxx",C_words2);
|
||||
// t("x-y-x","yyy-xxx-xxx",C_words2);
|
||||
// t("x-y-xx","xx-y-xxx",C_words2);
|
||||
// console.log("done");
|
||||
|
||||
function NormalizeSpaces(str) {
|
||||
// use single spaces first, then trim edge spaces
|
||||
|
@ -371,18 +431,13 @@ function CompileTerm(term) {
|
|||
}
|
||||
/* a case for "Q" is not needed -- same as the default case below */
|
||||
default:
|
||||
var words = term.split(/\b/);
|
||||
for (var i=0; i<words.length; i++)
|
||||
if (words[i].search(/^\w/) >= 0) words[i] = new RegExp("\\b"+words[i]);
|
||||
// (note: seems like removing duplicates is not important since search
|
||||
// strings will not have them, except for dashes in things like x-y-z)
|
||||
var compare_words = CompileWordCompare(term);
|
||||
return function(x) {
|
||||
var r = Compare(term,x[0]);
|
||||
// only bindings can be used for rexact matches
|
||||
if (r >= C_rexact) return (x[3] ? r : C_exact);
|
||||
if (r > C_words) return r;
|
||||
if (MinComparesRx(words,x[0]) <= C_fail) return r;
|
||||
return C_words;
|
||||
if (r > C_words3) return r;
|
||||
else return compare_words(x[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user