/.*/
マッチを簡単に実装できない$ deno check --remote -r https://scrapbox.io/api/code/takker/Myersのbit-parallel法/mod.ts
mod.ts/** 検索函数 */
export interface Filter {
/** 与えた文字列に一致する編集距離のリストを計算する
*
* @param text 被検索文字列
* @return i番目に、textのi番目の近似文字列出現の編集距離を格納した配列
*/
(text: string): number[];
}
/** あいまい検索する
*
* @param query 検索文字列
* @return 検索用函数
*/
export const makeFilter = (query: string): Filter => {
// 末尾が最初の文字のビットを表す
const Peq = new Map<string, number>();
const rquery = [...query].reverse(); // 書記素に分割しておく
{
let i = 1;
for (const q of rquery) {
Peq.set(q, (Peq.get(q) ?? 0) | i);
const pil = q.toLowerCase();
Peq.set(pil, (Peq.get(pil) ?? 0) | i);
const piu = q.toUpperCase();
Peq.set(piu, (Peq.get(piu) ?? 0) | i);
i <<= 1;
}
}
const m = rquery.length; //文字列数
const Pv0 = ~(~(0) << m); // 右側からm個のbitsが立ったビット列
const accept = 1 << (m - 1);
return (text: string): number[] => {
let Mv = 0;
let Pv = Pv0;
const rtext = [...text].reverse();
const Cm: number[] = [];
let j = rtext.length;
Cm[j] = m;
for (const t of rtext) {
const Eq = Peq.get(t) ?? 0;
const Xv = Eq | Mv;
const Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq;
const Ph = Mv | ~(Xh | Pv);
const Mh = Pv & Xh;
Cm[j - 1] = Cm[j] +((Ph & accept) !== 0 ? 1 : (Mh & accept) !== 0 ? -1 : 0);
Pv = (Mh << 1) | ~(Xv | (Ph << 1));
Mv = (Ph << 1) & Xv;
j--;
}
return Cm;
};
};
test.jsimport { makeFilter } from "./mod.ts";
const getMaxDistance = [
0, // 空文字のとき
0, 0,
1, 1,
2, 2, 2, 2,
3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
];
window.search = (p, l) => {
const pages = scrapbox.Project.pages.slice(0, l); // 生成にやや時間がかかるので、計測範囲から外す
const tag = `search "/${scrapbox.Project.name}" for "${p}"`;
if (p.trim() === "") return [];
console.time(tag);
const filter = makeFilter(p);
const m = [...p].length;
const max = getMaxDistance[m];
const result = pages.flatMap(({ title, titleLc, updated }) => {
const { dist, pos } = filter(titleLc).reduce((prev, dist, i) => {
if(prev.dist <= dist)return prev;
prev.dist = dist;
prev.pos = i;
return prev;
}, { dist: m, pos: 0 });
return dist <= max ? [{ title, dist, pos, updated }] : [];
})
.sort((a, b) => {
const ldiff = a.dist - b.dist;
if (ldiff !== 0) return ldiff;
const pdiff = a.pos - b.pos;
if (pdiff !== 0) return pdiff;
const udiff = b.updated - a.updated;
if (udiff !== 0) return udiff;
return a.title.length - b.title.length;
});
console.timeEnd(tag);
return result;
};
permutation.jsimport { makeFilter } from "./mod.ts";
const getMaxDistance = [
0, // 空文字のとき
0, 0,
1, 1,
2, 2, 2, 2,
3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
];
const filter = (m, max, filter_, pages) => pages.flatMap(
({ title, updated, dist, matches }) => {
matches ??= [];
dist ??= 0;
const result = filter_(title)
// 別のqueryでマッチした箇所は除く
.flatMap((d, i) =>
dist + d <= max && matches.every(([s, e]) => i + m <= s || e < i) ?
[[i, d]] :
[]
);
if (result.length === 0) return [];
const newMatch = result.reduce((prev, [i, dist]) => {
if(prev.dist <= dist) return prev;
prev.dist = dist;
prev.start = i;
prev.end = i + m -1;
return prev;
}, { dist: m, start: 0, end: m - 1 });
const newDist = newMatch.dist + dist;
if (newDist > max) return [];
matches.push([newMatch.start, newMatch.end]);
return [{ title, updated, dist: newDist, matches }];
}
);
const sorter = (a, b) => {
const ldiff = a.dist - b.dist;
if (ldiff !== 0) return ldiff;
// マッチ位置が早い順に並べる
const sa = a.matches.map(([s]) => s).sort();
const sb = b.matches.map(([s]) => s).sort();
for (let i = 0; i < sa.length; i++) {
const sdiff = sa[i] - (sb[i] ?? sb.length);
if (sdiff !== 0) return sdiff;
}
const udiff = b.updated - a.updated;
if (udiff !== 0) return udiff;
return a.title.length - b.title.length;
};
window.search = (p, l) => {
const pages = scrapbox.Project.pages.slice(0, l); // 生成にやや時間がかかるので、計測範囲から外す
/** キーワードリスト
*
* - 空白は取り除く
* - `_`は空白とみなす
* - 長い順に並び替えておく
* - 長いqueryから検索したほうが、少なく絞り込める
*/
const queries = p.trim().replaceAll("_", " ").split(/\s+/)
.sort((a, b) => b.length -a.length);
if (queries.length === 0 || queries.every((q) => q === "")) return [];
const tag = `search "/${scrapbox.Project.name}" for "${queries}"`;
console.time(tag);
let result = pages;
const max = getMaxDistance[[...queries.join("")].length];
for (const query of queries) {
const m = [...query].length;
result = filter(m, max, makeFilter(query), result);
}
result.sort(sorter)
console.timeEnd(tag);
return result;
};