scrapbox-pdftoimage
2022-09-10
11:35:35 async generatorで画像を一枚ずつ返すやつ read
を追加した
$ deno check --remote -r=https://scrapbox.io https://scrapbox.io/api/code/takker/scrapbox-pdftoimage/mod.ts
mod.ts/// <reference no-default-lib="true" />
/// <reference lib="esnext" />
/// <reference lib="dom" />
import { load } from "./load.ts";
import type { Metadata } from "https://raw.githubusercontent.com/takker99/deno-pdfjs-dist-types/0.1.0/src/display/metadata.d.ts";
export interface PdfConverterOptions {
/** cmapsがあるDirectory URL
*
* 既定ではci7lusさんのを使わせていただいている
*/
cMapUrl?: string;
}
export interface ConvertOptions {
/** 画像化するときの解像度
*
* @default 150
*/
printResolution?: number;
/** 画像のmime type
*
* @default "image/png"
*/
mimeType?: "image/png" | "image/jpeg";
/** 画像の品質を表す指数
*
* `mimeType`が`"image/jpeg"`のときのみ有効
*/
quality?: number;
}
/** PDFを画像に変換するやつ */
export interface Converter {
/** PDFのmetadata */
metadata: {
info: Record<string, unknown>;
metadata: Metadata;
};
/** PDFのページ数 */
count: number;
/** 指定したページを画像に変換する
*
* @param pageNum 画像化するページのページ番号。1から始まる
* @param options 変換options
* @return 画像のBlobで解決されるPromise
*/
convert: (pageNum: number, options?: ConvertOptions) => Promise<Blob>;
/** ページを一枚ずつ画像に変換する
*
* `convert`をasync generatorにしただけ
* @param options 変換options
* @return 画像のBlobを返すAsyncGenerator
*/
read(options?: ConvertOptions): AsyncGenerator<Blob, void, unknown>;
}
export const pdfConverter = async (
data: string | Uint8Array | number[],
options?: PdfConverterOptions,
): Promise<Converter> => {
> "Error: The Array.prototype
contains unexpected enumerable properties: getIndexByTitleLc; thus breaking e.g. for...in
iteration of Array
s."
> Array.prototype に変なプロパティが付いてると落ちる謎仕様になっていたのでアップロード中は削除する
Firefoxでは不要
mod.ts // @ts-ignore prototype汚染
const getIndexByTitleLc = Array.prototype.getIndexByTitleLc;
// @ts-ignore prototype汚染
delete Array.prototype.getIndexByTitleLc;
try {
const pdfjsLib = await load("2.13.216");
const pdf = await pdfjsLib.getDocument({
data,
cMapUrl: options?.cMapUrl ??
"https://storage.googleapis.com/chrono-lexica/ci7lus-assets/pdfjs/cmaps/",
cMapPacked: true,
}).promise;
const metadata = await pdf.getMetadata();
const convert = async (pageNum: number, options?: ConvertOptions): Promise<Blob> => {
const page = await pdf.getPage(pageNum);
const viewport = page.getViewport({
scale: window.devicePixelRatio ?? 1.5,
});
const canvas = document.createElement("canvas");
const ctx = canvas.getContext("2d");
if (!ctx) throw Error("2D rendering on <canvas> is not supported");
const PRINT_UNITS = (options?.printResolution ?? 150) / 72.0;
const renderContext = {
canvasContext: ctx,
viewport: viewport,
transform: [PRINT_UNITS, 0, 0, PRINT_UNITS, 0, 0],
};
canvas.height = Math.floor(viewport.height * PRINT_UNITS);
canvas.width = Math.floor(viewport.width * PRINT_UNITS);
await page.render(renderContext).promise;
return new Promise<Blob>(
(resolve, reject) => canvas.toBlob(
(blob) => !blob ? reject(new Error("Faild to create Blob")) : resolve(blob),
options?.mimeType ?? "image/png",
options?.quality,
)
);
};
return {
metadata: {
info: metadata.info as Record<string, unknown>,
metadata: metadata.metadata,
},
count: pdf.numPages,
convert: (pageNum: number, options?: ConvertOptions) =>
convert(Math.min(Math.max(1, pageNum), pdf.numPages), options),
read: async function* (options?: ConvertOptions) {
for (let i = 1; i <= pdf.numPages; i++) {
yield await convert(i, options);
}
},
};
} finally {
// @ts-ignore prototype汚染
Array.prototype.getIndexByTitleLc = getIndexByTitleLc;
}
};
load.ts/// <reference no-default-lib="true" />
/// <reference lib="esnext" />
/// <reference lib="dom" />
import type {
getDocument,
GlobalWorkerOptions,
} from "https://raw.githubusercontent.com/takker99/deno-pdfjs-dist-types/0.1.0/mod.d.ts";
とりあえず使うものだけ定義した
本当はもっといろいろ生えている
load.tsdeclare global {
interface Window {
pdfjsLib?: {
getDocument: typeof getDocument;
GlobalWorkerOptions: typeof GlobalWorkerOptions;
};
}
}
export type PDFjsLib = Required<Window>["pdfjsLib"];
const ensurePDFjsLib = (): Promise<PDFjsLib> => new Promise<PDFjsLib>((resolve) => {
if (window.pdfjsLib) resolve(window.pdfjsLib);
const timer = setInterval(() => {
if (!window.pdfjsLib) return;
clearInterval(timer);
resolve(window.pdfjsLib);
}, 1000);
});
export const load = async (version: string): Promise<PDFjsLib> => {
const src = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${version}/pdf.min.js`;
if (!document.querySelector(`script[src="${src}"]`)) {
const script = document.createElement("script");
script.src = src;
await new Promise<void>((resolve, reject) => {
script.addEventListener("load", () => resolve());
script.addEventListener("error", reject);
document.body.append(script);
});
const pdfjsLib = await ensurePDFjsLib();
pdfjsLib.GlobalWorkerOptions.workerSrc =
`//cdnjs.cloudflare.com/ajax/libs/pdf.js/${version}/pdf.worker.min.js`;
return pdfjsLib;
}
return await ensurePDFjsLib();
};