>はやい
> byte列を比較しているだけ
> 結果はtrue/falseで返ってくる
> あいまい度は指定できる(0〜3まで)
> 他のライブラリに依存していない、pure javascript
script.jsexport class Asearch {
get INITPAT() {return 0x80000000;} // 0100,0000,0000,0000,0000,0000,0000,0000
get MAXCHAR() {return 0x100;} // 256文字が上限
get INITSTATE() {return [this.INITPAT, 0, 0, 0];}
constructor(source) {
this.shiftpat = Array(this.MAXCHAR).map(_ => 0);
this.epsilon = 0;
let mask = this.INITPAT;
for (const item of this.unpack(source)) {
// 0x20 is a space
if (item === 0x20) {
this.epsilon |= mask;
} else {
this.shiftpat[item] |= mask;
this.shiftpat[this.toupper(item)] |= mask;
this.shiftpat[this.tolower(item)] |= mask;
mask >>>= 1;
}
}
this.acceptpat = mask;
}
tolower(c) {
return this.isupper(c) ? c + 0x20 : c;
}
toupper(c) {
return this.islower(c) ? c - 0x20 : c;
}
isupper(c) {
// 0x41 = A, 0x5a = Z
return (c >= 0x41) && (c <= 0x5a);
}
islower(c) {
// 0x61 = a, 0x7a = z
return (c >= 0x61) && (c <= 0x7a);
}
state(str = '') {
let i0 = this.INITSTATE[0];
let i1 = this.INITSTATE[1];
let i2 = this.INITSTATE[2];
let i3 = this.INITSTATE[3];
for (const item of this.unpack(str)) {
const mask = this.shiftpat[item];
i3 = (i3 & this.epsilon) | ((i3 & mask) >>> 1) | (i2 >>> 1) | i2;
i2 = (i2 & this.epsilon) | ((i2 & mask) >>> 1) | (i1 >>> 1) | i1;
i1 = (i1 & this.epsilon) | ((i1 & mask) >>> 1) | (i0 >>> 1) | i0;
i0 = (i0 & this.epsilon) | ((i0 & mask) >>> 1);
i1 |= i0 >>> 1;
i2 |= i1 >>> 1;
i3 |= i2 >>> 1;
}
return [i0, i1, i2, i3];
}
match(str, ambig = 0) {
ambig = ambig < this.INITSTATE.length ? ambig : this.INITSTATE.length - 1;
const s = this.state(str);
return (s[ambig] & this.acceptpat) !== 0;
}
unpack(str) {
return str.split('').map(char => char.charCodeAt(0));
}
// UTF-16 code pointを上8bitと下8bitとに分割する
/*unpack(str) {
return str.split('')
.flatMap(char => {
const code = char.charCodeAt(0);
if (code > 0xFF) {
return [(code & 0xFF00) >>> 8, code & 0xFF];
}
return [code & 0xFF]; // 下の8ビットを取得する
});
}*/
}
js(async () => {
const {Asearch} = await import('/api/code/programming-notes/asearch/script.js');
const a = new Asearch('abcde');
assert(a, 'abcde');
assert(a, 'AbCdE');
assert(a, 'abcd');
assert(a, 'abcd', 1);
assert(a, 'ab de', 1);
assert(a, 'abe', 1);
assert(a, 'abe', 2);
const b = new Asearch('cheese burger');
assert(b, 'cheese burger');
assert(b, 'chess burger');
assert(b, 'chess burger', 2);
assert(b, 'chess', 2);
const c = new Asearch('漢字文字列');
assert(c, '漢字文字列');
assert(c, '漢字文字烈');
assert(c, '漢字文字烈', 1);
function assert(asearch, text, ambig = 0) {
console.log(`text = ${text}, ambig = ${ambig} => ${asearch.match(text, ambig)}`);
}
})();
worker.jsclass Asearch {
get INITPAT() {return 0x80000000;} // 0100,0000,0000,0000,0000,0000,0000,0000
get MAXCHAR() {return 0x100;} // 256文字が上限
get INITSTATE() {return [this.INITPAT, 0, 0, 0];}
constructor(source) {
this.shiftpat = Array(this.MAXCHAR).map(_ => 0);
this.epsilon = 0;
let mask = this.INITPAT;
for (const item of this.unpack(source)) {
// 0x20 is a space
if (item === 0x20) {
this.epsilon |= mask;
} else {
this.shiftpat[item] |= mask;
this.shiftpat[this.toupper(item)] |= mask;
this.shiftpat[this.tolower(item)] |= mask;
mask >>>= 1;
}
}
this.acceptpat = mask;
}
tolower(c) {
return this.isupper(c) ? c + 0x20 : c;
}
toupper(c) {
return this.islower(c) ? c - 0x20 : c;
}
isupper(c) {
// 0x41 = A, 0x5a = Z
return (c >= 0x41) && (c <= 0x5a);
}
islower(c) {
// 0x61 = a, 0x7a = z
return (c >= 0x61) && (c <= 0x7a);
}
state(str = '') {
let i0 = this.INITSTATE[0];
let i1 = this.INITSTATE[1];
let i2 = this.INITSTATE[2];
let i3 = this.INITSTATE[3];
for (const item of this.unpack(str)) {
const mask = this.shiftpat[item];
i3 = (i3 & this.epsilon) | ((i3 & mask) >>> 1) | (i2 >>> 1) | i2;
i2 = (i2 & this.epsilon) | ((i2 & mask) >>> 1) | (i1 >>> 1) | i1;
i1 = (i1 & this.epsilon) | ((i1 & mask) >>> 1) | (i0 >>> 1) | i0;
i0 = (i0 & this.epsilon) | ((i0 & mask) >>> 1);
i1 |= i0 >>> 1;
i2 |= i1 >>> 1;
i3 |= i2 >>> 1;
}
return [i0, i1, i2, i3];
}
match(str, ambig = 0) {
ambig = ambig < this.INITSTATE.length ? ambig : this.INITSTATE.length - 1;
const s = this.state(str);
return (s[ambig] & this.acceptpat) !== 0;
}
// UTF-16 code pointを上8bitと下8bitとに分割する
unpack(str) {
return str.split('')
.flatMap(char => {
const code = char.charCodeAt(0);
if (code > 0xFF) {
return [(code & 0xFF00) >>> 8, code & 0xFF];
}
return [code & 0xFF]; // 下の8ビットを取得する
});
}
}