generated at
advanced-link-searcher
拡張Scrapbox入力補完で使う、リンクとアイコン検索用module
scrapbox-link-databaseから取得したデータからasearchで検索をかける

interface
script.d.ts
export class SearchEngine { constructor(projects: string[], option: {icon?: boolean; expired?: number}); search(query: string, option?: {timeout?: number; limit?:number; ambig?:number;}): Promise<{state: 'cancelled';} | {result: string[]; state: 'fullfilled';}>; reload(): Promise<void>; }
検索エンジンの作成
const engine = new SearchEngine(projects, {icon: true, expired: 600})
projects に指定したscrapbox projectからリンクを読み込む
instanceを生成した段階で、リンクの読み込みを開始する
icon: true にすると、画像があるリンクのみ取得する
expired に、cacheの寿命を秒単位で指定する
defaultは1時間
検索
engine.search(query, {timeout, limit, ambig})
戻り値
あいまい度順に並べた配列が返ってくる
ts
type Return = { project: string; title: string; }[][];
async-singletonを使うことで、たくさん検索の依頼が来ても、最初と最後のしか検索しないようにした
project は検索しない
title のみをあいまい検索する

実装
補完ソースはscrapbox-link-databaseに格納する
WebWorker用asearchで検索する
設計
workerからUI threadへの通信には文字列配列を渡す
直接object配列を渡しても速度には影響しないかな?

dependencies
script.js
import {CacheStorage} from '../scrapbox-link-database/script.js'; import {openDB} from '../idb/with-async-ittr.js'; import {singletonWorker} from '../singletonWorker/script.js'; // TODO: async-singletonで包んでおく const workers = [...Array(navigator.hardwareConcurrency)].map(_ =>new singletonWorker('/api/code/programming-notes/advanced-link-searcher/worker.js')); // 相対パスは使えないみたい let idCounter = 0; export class SearchEngine { constructor(projects, {icon = false, expired = 3600, verbose = false} = {}) { this._id = idCounter++; this._projects = projects; this._cacheStorage = new CacheStorage(expired); this._icon = icon; this._workerStoreName = this._icon ? SearchEngine.iconStoreName : SearchEngine.linkStoreName; this._verbose = verbose; this._initialized = this._initialize(); } static databaseName = 'UserScript-SearchEngine'; static databaseVersion = 3; static iconStoreName = 'icon-search-list'; static linkStoreName = 'link-search-list'; async search(query, {timeout = 5000, limit = 30, ambig} = {}) { await this._initialized; this._log('search', 'Start a search', {query, timeout, limit, ambig}); const raw = await Promise.all(workers.map((worker, i) => worker.delegate({ id: [this._id, i], databaseName: SearchEngine.databaseName, databaseVersion: SearchEngine.databaseVersion, storeName: this._workerStoreName, query, timeout, limit, ambig, verbose: this._verbose, }) )); // 一つでも'cancelled'が混じっていたら{state: 'cancelled'}を返す if (raw.some(({state}) => state === 'cancelled')) return {state: 'cancelled'}; // 転置して曖昧度順に結果を並べて、limit分だけ返す const results = raw.map(({data: {result}}) => result); this._log('search', 'Finish a search', {raw, results}); return { result: transpose(results).flatMap(lists => lists.flat()).slice(0, limit), state: 'fullfilled', }; } async reload() { const projects = await this.update(this._projects, {icon: this._icon, reload: true}); if (projects.length > 0) await this._createWorkerSource(); } // Workerが検索に使うリストを作る async _createWorkerSource() { const data = await this._cacheStorage.get(this._projects, {icon: this._icon}); const source = this._icon ? data .flatMap(({project, titles}) => titles.map(title => {return {project, title};})) // 長さ順→辞書順に並べ替える .sort((a, b) => a.title.length === b.title.length ? a.title.localeCompare(b.title) : a.title.length - b.title.length ) : // 適当にかき混ぜる shuffle(data.flatMap( ({project, pages}) => { const titles = [...new Set(pages.flatMap(({title, links}) => [title, ...links]))]; return titles.map(title => {return {project, title};}); } )); this._log('_createWorkerSource', `Create ${source.length} search items.`); // 検索用文字列リストをworkers分だけ分割する const chunkLength = Math.floor(source.length/workers.length) + 1; const lists = workers.map((_, i) => source.slice(i * chunkLength, (i + 1) * chunkLength)); // databaseに格納する this._log('_createWorkerSource', `Put ${source.length} search items on Indexed DB.`); const tx = this._workerDB.transaction(this._workerStoreName, 'readwrite'); const store = tx.objectStore(this._workerStoreName); const keys = await store.getAllKeys(); await Promise.all(lists.map((list, index) => { const id = [this._id, index]; return keys.some(key => key[0] === id[0] && key[1] === id[1]) ? store.put({list, id}) : store.add({list, id}); })); await tx.done; this._log('_createWorkerSource', `Finish putting on Indexed DB.`); } async _initialize() { this._log('_initialize', 'Start initializing'); this._workerDB = await openDB(SearchEngine.databaseName, SearchEngine.databaseVersion, { upgrade: (db) => { // Object Storeをすべて消す [...db.objectStoreNames].forEach(storeName => db.deleteObjectStore(storeName) ); db.createObjectStore(SearchEngine.iconStoreName, {keyPath: 'id'}); db.createObjectStore(SearchEngine.linkStoreName, {keyPath: 'id'}); }, }); await this._createWorkerSource(); this._log('_initialize', 'Finish initializing'); } _log(functionName, ...messages) { if (!this._verbose) return; console.log(`[${functionName}@advanced-link-searcher]`, ...messages); } }

script.js
const transpose = a => a[0].map((_, c) => a.map(r => r[c])); function shuffle(array) { let result = array; for (let i = result.length; 1 < i; i--) { const k = Math.floor(Math.random() * i); [result[k], result[i - 1]] = [result[i - 1], result[k]]; } return result; }

workerのcode
worker.js
self.importScripts('../asearch/worker.js'); self.importScripts('../idb/worker.js'); let db = undefined; let verbose = false; self.addEventListener('message', async ({data: {id, databaseName, databaseVersion, storeName, verbose: _verbose, ...rest}}) => { await openDB(databaseName, databaseVersion); const source = (await db.get(storeName, id))?.list ?? []; if (_verbose) verbose = _verbose; self.postMessage({result: search({source, ...rest}), id,}); }); async function openDB(databaseName, databaseVersion) { if (db) return; db = await idb.openDB(databaseName, databaseVersion); } function search({source, query, ambig, limit, timeout}) { if (!source || source.length === 0) return []; // 値のcheck if (typeof query !== 'string') throw Error('query is not a string.'); if (typeof ambig !== 'number' && ambig !== undefined) throw Error('ambig is not a number'); if (typeof limit !== 'number' && limit !== undefined) throw Error('limit is not a number'); if (limit <= 0) throw Error('limit is not more than 0.'); if (typeof timeout !== 'number') throw Error('timeout is not a number.'); if (timeout <= 0) throw Error('timeout is not more than 0.'); // 検索語句が空のときは、検索候補の先頭limit個を取得する if (query.trim() === '') { _log('search', `Finish a search for "${query}"`); return [source.slice(0, limit).map(({project, title}) => `/${project}/${title}`)]; } _log('search', 'Start fuzzy search for ', {query, ambig, limit, timeout, source}); // 空白文字で区切った文字列を並び替え、曖昧検索objectを作る const asearches = getPermutation(query.split(/\s/)) .map(wordList => new Asearch(` ${wordList.join(' ')} `)); // ambigの最大値を計算する let maxAmbig = ambig ?? Math.floor(` ${query} `.length / 4) + 1; maxAmbig = maxAmbig > 4 ? 4 : maxAmbig; // 検索する const result = []; const totalResults = new Set(); const start = (new Date()).getTime(); let matches; let cancel = false; // 計算を中断するflag for (let ambig = 0; ambig < maxAmbig; ambig++) { matches = new Set(); for (const asearch of asearches) { // 検索した文字列を重複を取り除いて追加する for (const {project, title} of source) { if (limit && totalResults.size >= limit) { cancel = true; break; } const path = `/${project}/${title}`; if (totalResults.has(path) || !asearch.match(title, ambig)) continue; matches.add(path); totalResults.add(path); } if (start + timeout < (new Date()).getTime()) { _log('search', `time out`, {query}); cancel = true; } if (cancel) break; } result.push([...matches]); if (cancel) break; } _log('search', `Finish fuzzy search for "${query}": `, result); return result; }

worker.js
// 重複は考慮していない function getPermutation(list) { if (list.length == 0) return list; if (list.length == 1) return [list]; if (list.length == 2) return [[list[0],list[1]],[list[1],list[0]]]; return list.flatMap(first => { const restList = list.filter(item => item !== first); return getPermutation(restList).map(permutation => [first, ...permutation]); }); } function _log(functionName, ...messages) { if (!verbose) return; console.log(`[${functionName}@advanced-link-searcher-worker]`, ...messages); }

test code
08:02:11 大体バグは潰せたと思う
09:04:15 バグつぶし終了
js
import('/api/code/programming-notes/advanced-link-searcher/test1.js')
test1.js
import {SearchEngine} from './script.js'; import {projects} from '../scrapbox-link-database/list.js'; const engine = new SearchEngine(projects); window.engine = engine;
js
import('/api/code/programming-notes/advanced-link-searcher/test2.js')
test2.js
import {SearchEngine} from './script.js'; const engine = new SearchEngine(['icons', 'icons2', 'emoji'], {icon: true}); window.engine = engine;

#2021-03-18 11:48:38
#2021-03-16 06:39:04