generated at
Scrapbox書籍を作るUserScript@0.2.0
scrapbox書籍を作るUserScript

必要なもの
gyazoの画像リスト
JSON形式
Gyazo APIのAccess Token

コードの一例
for Deno
deno-cliffyでUIを作った
なぜかspinnerの大部分が消えずに残ってしまう
しかも3回重複する
バグッてる?
03:05:18 改行が含まれていたのが原因だった
\n を取り除いたら直った
$ deno check --remote -r=https://scrapbox.io https://scrapbox.io/api/code/takker/Scrapbox書籍を作るUserScript@0.2.0/main.ts
$ deno run --allow-read --allow-write --allow-net=api.gyazo.com -r=https://scrapbox.io https://scrapbox.io/api/code/takker/Scrapbox書籍を作るUserScript@0.2.0/main.ts
必須オプションとして -t <gyazoのaccess token> を渡す
引数に①gyazoの画像リストのファイルパス②scrapbox書籍用目次のcsvファイルへのパスを順に渡す
main.ts
/// <reference lib="deno.ns" /> import { getOCRs } from "../GyazoのURLリストからOCRテキストを一括取得するscript/mod.ts"; import { IndexParseStream, PageSortStream, PageStringifyStream, Page, PageSource } from "../Scrapbox書籍のformat@0.2.0/mod.ts"; import { getGyazoToken } from "../scrapbox-userscript-std/rest.ts"; import { CsvParseStream } from "jsr:@std/csv@1/parse-stream"; import type { ImportedLightPage } from "../scrapbox-jp%2Ftypes/rest.ts"; import { isAbsolute, toFileUrl, resolve } from "jsr:@std/path@1/posix"; import { Command } from "jsr:@cliffy/command@1.0.0-rc.7"; import { Spinner } from "jsr:@std/cli@1/unstable-spinner"; import { isErr, unwrapErr, unwrapOk } from "npm:option-t@50/plain_result"; const { args: [gyazoListPath, csvPath], options: { token, offset, outfile } } = await new Command() .name("Scrapbox書籍を作るUserScript") .description("GyazoのURLリストとscrapbox書籍用目次CSVからScrapbox書籍のJSONファイルを作る") .version("v0.2.0") .arguments("<gyazoListPath:string> <csvPath:string>") .option("-t, --token <token:string>", "Gyazo Access Token") .option("-o, --outfile <outfile:string>", "出力先ファイルパス", { default: "scrapbox.json" }) .option("--offset <offset:number>", "scrapbox書籍用目次CSVに書かれたページ番号をoffsetだけずらす", { default: 0 }) .parse(Deno.args); const listURL = toFileUrl( isAbsolute(gyazoListPath) ? gyazoListPath: resolve(Deno.cwd(), gyazoListPath) ); const gyazoList = (await (await fetch(listURL)).json()) as string[]; // OCRを取り込む let counter = 0; const errors = [] as number[]; const spinner = new Spinner(); const source: AsyncIterable<PageSource> = (async function*() { spinner.message = "Download OCRs..."; spinner.start(); const header = () => `${counter - errors.length}/${gyazoList.length} got, ${errors.length} failed: `; for await (const result of getOCRs(gyazoList, token!)) { counter++; if (isErr(result)) { const reason = unwrapErr(result); if (!(reason instanceof Error)) throw result; console.error(result); errors.push(counter); spinner.message = `${header()}${reason}`; continue; } const source = unwrapOk(result); yield source; spinner.message = `${header()}${[...source.text.replaceAll("\n","")].slice(0, 10).join("")}`; } spinner.message = "creating the json file..."; })(); const csvURL = URL.canParse(csvPath) ? new URL(csvPath) : toFileUrl(isAbsolute(csvPath) ? csvPath : resolve(Deno.cwd(), csvPath) ); const pages = await Array.fromAsync((await fetch(csvURL)).body! .pipeThrough(new TextDecoderStream()) // 目次を取り込む .pipeThrough(new CsvParseStream()) .pipeThrough(new IndexParseStream()) .pipeThrough(new PageSortStream()) .pipeThrough(new PageStringifyStream(source)) // JSONを作る .pipeThrough(new TransformStream<string[], ImportedLightPage>({ transform(lines, controller) { controller.enqueue({ title: lines[0], lines }); } })) ); await Deno.writeTextFile(outfile, JSON.stringify({ pages })); spinner.stop(); console.log("created the json file.");
テキストのみ
$ deno check --remote -r=https://scrapbox.io https://scrapbox.io/api/code/takker/Scrapbox書籍を作るUserScript@0.2.0/onlyText.ts
$ deno run --allow-read --allow-write -r=https://scrapbox.io https://scrapbox.io/api/code/takker/Scrapbox書籍を作るUserScript@0.2.0/onlyText.ts
onlyText.ts
import { parse, sort, stringify, Page } from "../Scrapbox書籍のformat@0.2.0/mod.ts"; import Parser from "../papaparse/mod.ts"; import type { ImportedLightPage } from "../scrapbox-jp%2Ftypes/rest.ts"; const ocrs = JSON.parse(await Deno.readTextFile(Deno.args[0])) as string[]; console.log(`creating the json file...`); // 目次を取り込む const csv = await Deno.readTextFile(Deno.args[1]); const { data } = Parser.parse<string[]>(csv); // JSONを作る const pages: ImportedLightPage[] = []; for (const page of sort(parse(data))) { const lines = stringify(page, new Array<string>(ocrs.length).fill(""), ocrs).split("\n"); pages.push({ title: lines[0], lines }); } await Deno.writeTextFile("scrapbox.json", JSON.stringify({ pages })); console.log("created the json file.");
text fileをjsonにする (web browser用)
PDFの場合は、予めPDFからテキスト情報を取得して、テキストファイルに書き込んでおく
$ deno check --remote -r=https://scrapbox.io https://scrapbox.io/api/code/takker/Scrapbox書籍を作るUserScript@0.2.0/makeJson.ts
makeJson.ts
import { useStatusBar } from "../scrapbox-userscript-std/dom.ts"; import { upload } from "../scrapbox-file-uploader/mod.ts"; const main = async () => { const textList = [] as string[]; const { render, dispose } = useStatusBar(); try { const fileList = await upload({ accept: "text/plain", multiple: true }); if (!fileList) return; const compare = new Intl.Collator().compare; const files = Array.from(fileList) .sort((a, b) => compare(a.name, b.name)); let counter = 0; for (const file of files) { textList.push(await file.text()); render( { type: "spinner" }, { type: "text", text: `${files.length} files, ${counter} proceed`, }, ); } render( { type: "check-circle" }, { type: "text", text: `Finish process.` }, ); console.log(textList); const blob = new Blob([JSON.stringify(textList)], { type: "application/json" }); window.open(URL.createObjectURL(blob)); } finally { setTimeout(() => { dispose(); }, 1000); } }; await main();

#2024-11-10 17:07:56 Web Streams APIを使って main.ts を書き換えた
かなり早くなった
#2024-06-21 14:45:25
#2024-02-01 02:30:30 deno-cliffyとdeno_std/cliを入れた
#2023-04-09 10:24:23
#2023-03-08 10:10:27
#2022-11-03 02:05:05
#2022-08-20 14:10:28 json作成時のエラー処理を追加
#2022-08-12 09:55:26 status-bar (scrapbox)に進捗を表示する
#2022-08-04 14:52:54