generated at
PDFjsのviewerから画像データとテキストデータを取得するUserScript
pdf.jsで実装されたviewerから、PDFの画像とテキストデータを取得して、zipにしてdownloadするUserScript
開発コンソールから実行する
Firefox PDF viewerなどで使うことを想定している

script.js
await (async () => {

canvasからblobを取り出す
script.js
const getBlob = (canvas) => new Promise( (resolve, reject) => canvas.toBlob( (blob) => !blob ? reject(new Error("Faild to create Blob from canvas")) : resolve(blob), "image/png", ) );

1枚ずつcanvasとテキストを取得する
canvasが読み込まれるのを待ってから返す
末尾まで読み終わったら終了する
script.js
async function* readPages() { let index = 0; while(true){ const page = PDFViewerApplication.pdfViewer._pages[index]; if (!page) break; page.div.scrollIntoView(); yield await new Promise((resolve) =>{ const timer = setInterval(()=>{ // 読み込みが終わるまで待つ const canvas = page.div.getElementsByTagName("canvas")?.[0]; if (!canvas) return; if (page.div.getElementsByClassName("loadingIcon").length > 0) return; clearInterval(timer); // 描画を待ってから返す setTimeout(() => { const text = page.textLayer.textContentItemsStr?.join?.("\n") ?? ""; resolve({ canvas, text }); }, 2000); },100); }); index++; } }

画像編集
script.js
/** 余白を削る。大きさは比率で指定する */ const trim = (inputCanvas, paddingW , paddingH) => { const canvas = document.createElement("canvas"); canvas.width = inputCanvas.width * (1 - 2 * paddingW); canvas.height = inputCanvas.height * (1 - 2 * paddingH); const ctx =canvas.getContext("2d"); ctx.drawImage( inputCanvas, inputCanvas.width * paddingW, inputCanvas.height * paddingH, canvas.width, canvas.height, 0, 0, canvas.width, canvas.height ); return canvas; };
画像を分割する
script.js
async function* split(inputCanvas, splitW = 1, splitH = 1) { const unitW = inputCanvas.width / splitW; const unitH = inputCanvas.height / splitH; const canvas = document.createElement("canvas"); canvas.width = unitW; canvas.height = unitH; const ctx =canvas.getContext("2d"); for (let y = 0; y + unitH <= inputCanvas.height; y += unitH) { for (let x = 0; x + unitW <= inputCanvas.width; x += unitW) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.drawImage(inputCanvas, x, y, unitW, unitH, 0, 0, unitW, unitH ); yield canvas; } } }
JSZipのinstall
script.js
const installJSZip = () => new Promise((resolve, reject) => { const id = "userscript-jszip"; if (document.getElementById(id)) { resolve(); return; } const script = document.createElement("script"); script.addEventListener('load', () => { resolve(); }); script.addEventListener('error', (e) => { reject(e); }); script.src = "https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.0/jszip.min.js"; script.id = id; document.head.append(script); });

メインプログラム
script.js
await installJSZip(); const zip = new JSZip(); const pages = []; for await (const { canvas, text } of readPages()) { for await (const canvas2 of split( trim(canvas, 0.1, 0.1), 2, 2 )) { pages.push({ image: await getBlob(canvas2), text }); } } for (let i = 0; i < pages.length; i++) { const name = `${ `${i}`.padStart(`${pages.length}`.length, "0") }`; zip.file(`${name}.png`, pages[i].image); zip.file(`${name}.txt`, pages[i].text); } const zipBlob = await zip.generateAsync({ type: "blob", compression: "DEFLATE", compressionOptions: { level: 9, }, }); const a = document.createElement("a"); a.href = URL.createObjectURL(zipBlob); const title = document.title.replace?.(/\.pdf$/, ""); a.download = `${title}.zip`; document.body.append(a); a.click(); a.remove(); })();

#2023-04-11 15:29:47
#2022-09-27 17:53:29