generated at
asearch

Scrapboxのタイトル検索で利用

>はやい
> byte列を比較しているだけ
> 結果はtrue/falseで返ってくる
> あいまい度は指定できる(0〜3まで)
> 他のライブラリに依存していない、pure javascript


とてもわかり易い仕組みの図解

2021-05-19 21:38:33 Unicode対応した

UserScript/denoから使えるcode
/takker/fizzSearchのコードを改変してある
script.js
export class Asearch { get INITPAT() {return 0x80000000;} // 0100,0000,0000,0000,0000,0000,0000,0000 get MAXCHAR() {return 0x100;} // 256文字が上限 get INITSTATE() {return [this.INITPAT, 0, 0, 0];} constructor(source) { this.shiftpat = Array(this.MAXCHAR).map(_ => 0); this.epsilon = 0; let mask = this.INITPAT; for (const item of this.unpack(source)) { // 0x20 is a space if (item === 0x20) { this.epsilon |= mask; } else { this.shiftpat[item] |= mask; this.shiftpat[this.toupper(item)] |= mask; this.shiftpat[this.tolower(item)] |= mask; mask >>>= 1; } } this.acceptpat = mask; } tolower(c) { return this.isupper(c) ? c + 0x20 : c; } toupper(c) { return this.islower(c) ? c - 0x20 : c; } isupper(c) { // 0x41 = A, 0x5a = Z return (c >= 0x41) && (c <= 0x5a); } islower(c) { // 0x61 = a, 0x7a = z return (c >= 0x61) && (c <= 0x7a); } state(str = '') { let i0 = this.INITSTATE[0]; let i1 = this.INITSTATE[1]; let i2 = this.INITSTATE[2]; let i3 = this.INITSTATE[3]; for (const item of this.unpack(str)) { const mask = this.shiftpat[item]; i3 = (i3 & this.epsilon) | ((i3 & mask) >>> 1) | (i2 >>> 1) | i2; i2 = (i2 & this.epsilon) | ((i2 & mask) >>> 1) | (i1 >>> 1) | i1; i1 = (i1 & this.epsilon) | ((i1 & mask) >>> 1) | (i0 >>> 1) | i0; i0 = (i0 & this.epsilon) | ((i0 & mask) >>> 1); i1 |= i0 >>> 1; i2 |= i1 >>> 1; i3 |= i2 >>> 1; } return [i0, i1, i2, i3]; } match(str, ambig = 0) { ambig = ambig < this.INITSTATE.length ? ambig : this.INITSTATE.length - 1; const s = this.state(str); return (s[ambig] & this.acceptpat) !== 0; } unpack(str) { return str.split('').map(char => char.charCodeAt(0)); } // UTF-16 code pointを上8bitと下8bitとに分割する /*unpack(str) { return str.split('') .flatMap(char => { const code = char.charCodeAt(0); if (code > 0xFF) { return [(code & 0xFF00) >>> 8, code & 0xFF]; } return [code & 0xFF]; // 下の8ビットを取得する }); }*/ }


test code
shokai/node-asearchをJSに書き換えたのを使っている
js
(async () => { const {Asearch} = await import('/api/code/programming-notes/asearch/script.js'); const a = new Asearch('abcde'); assert(a, 'abcde'); assert(a, 'AbCdE'); assert(a, 'abcd'); assert(a, 'abcd', 1); assert(a, 'ab de', 1); assert(a, 'abe', 1); assert(a, 'abe', 2); const b = new Asearch('cheese burger'); assert(b, 'cheese burger'); assert(b, 'chess burger'); assert(b, 'chess burger', 2); assert(b, 'chess', 2); const c = new Asearch('漢字文字列'); assert(c, '漢字文字列'); assert(c, '漢字文字烈'); assert(c, '漢字文字烈', 1); function assert(asearch, text, ambig = 0) { console.log(`text = ${text}, ambig = ${ambig} => ${asearch.match(text, ambig)}`); } })();

WebWorker用
worker.js
class Asearch { get INITPAT() {return 0x80000000;} // 0100,0000,0000,0000,0000,0000,0000,0000 get MAXCHAR() {return 0x100;} // 256文字が上限 get INITSTATE() {return [this.INITPAT, 0, 0, 0];} constructor(source) { this.shiftpat = Array(this.MAXCHAR).map(_ => 0); this.epsilon = 0; let mask = this.INITPAT; for (const item of this.unpack(source)) { // 0x20 is a space if (item === 0x20) { this.epsilon |= mask; } else { this.shiftpat[item] |= mask; this.shiftpat[this.toupper(item)] |= mask; this.shiftpat[this.tolower(item)] |= mask; mask >>>= 1; } } this.acceptpat = mask; } tolower(c) { return this.isupper(c) ? c + 0x20 : c; } toupper(c) { return this.islower(c) ? c - 0x20 : c; } isupper(c) { // 0x41 = A, 0x5a = Z return (c >= 0x41) && (c <= 0x5a); } islower(c) { // 0x61 = a, 0x7a = z return (c >= 0x61) && (c <= 0x7a); } state(str = '') { let i0 = this.INITSTATE[0]; let i1 = this.INITSTATE[1]; let i2 = this.INITSTATE[2]; let i3 = this.INITSTATE[3]; for (const item of this.unpack(str)) { const mask = this.shiftpat[item]; i3 = (i3 & this.epsilon) | ((i3 & mask) >>> 1) | (i2 >>> 1) | i2; i2 = (i2 & this.epsilon) | ((i2 & mask) >>> 1) | (i1 >>> 1) | i1; i1 = (i1 & this.epsilon) | ((i1 & mask) >>> 1) | (i0 >>> 1) | i0; i0 = (i0 & this.epsilon) | ((i0 & mask) >>> 1); i1 |= i0 >>> 1; i2 |= i1 >>> 1; i3 |= i2 >>> 1; } return [i0, i1, i2, i3]; } match(str, ambig = 0) { ambig = ambig < this.INITSTATE.length ? ambig : this.INITSTATE.length - 1; const s = this.state(str); return (s[ambig] & this.acceptpat) !== 0; } // UTF-16 code pointを上8bitと下8bitとに分割する unpack(str) { return str.split('') .flatMap(char => { const code = char.charCodeAt(0); if (code > 0xFF) { return [(code & 0xFF00) >>> 8, code & 0xFF]; } return [code & 0xFF]; // 下の8ビットを取得する }); } }

JavaScript
Deno