松岡正剛の千夜千冊をscrapbox書籍にするscript
使い方
ドメインが同じならどのページでも良い
3. かなり待つとzipをdownloadできるようになる
fetchにかなり時間がかるので注意
1秒おきに取得するようにしているのに加えて、server自体の応答が遅い
4. json filesを全部importする
1つだけ欠番があります
code
script.js(() => {
(async () =>{
const latest = 1772;
const {pages, failed} = await fetchReviews(1, latest); // ここで取得するページの範囲を決める
console.log('Fetched: ', pages);
console.log('Pages failed to fetch: ', failed);
const {data, fileName} = await zipAll(convertScrapboxJSON(pages));
downloadBlob(data, fileName);
})();
fetch
script.js const sleep = (milliseconds) => new Promise(resolve => setTimeout(resolve, milliseconds));
async function fetchReviews(from, to) {
const pages = [];
const failed = [];
for (let i = from; i <= to; i++){
const id = `${i}`.padStart(4, '0');
try {
const res = await fetch(`/${id}.html`);
const page = {id, ...parse(await res.text())};
console.log(page);
pages.push(page);
} catch(e) {
console.error(e);
failed.push(id);
}
await sleep(1000);
}
return {pages, failed};
}
parser
htmlから著者名、書籍名、本文を取得する
script.js function parse(html) {
const dom = new DOMParser().parseFromString(html, 'text/html');
const author = dom.querySelector('#default_style_area .min').textContent;
const title = dom.querySelector('#default_style_area .goth').textContent;
const body = [...dom.getElementsByClassName('entry-content')[0].children]
.flatMap(p => {
if (p.classList.contains('fig01')) {
const fig = parseFigure(p);
return fig ? [fig] : [];
}
if (p.localName === 'p') { //なぜかfetch経由だと`.fontL`が外れてしまう
const para = parseParagraph(p)
return para.text === '' ? [] : [para];
}
return [];
});
return {author, title, body};
}
本文を変換する
script.js function parseParagraph(fontL) {
fontL.querySelectorAll('br').forEach(br => br.outerHTML = '\n');
fontL.querySelectorAll('a').forEach(a => {
const match = a.href.match(/(\d+)\.html/);
if (!match) {
a.outerHTML = '';
return;
}
const id = match[1];
if (a.textContent.endsWith('夜')) {
a.outerHTML = `[${id}]`;
return;
}
a.outerHTML = `${a.textContent}([${id}])`;
});
return {type: 'paragraph', text: fontL.innerText};
}
本文中の画像と動画を変換する
script.js function parseFigure(fig01) {
const src = fig01.querySelector('img, iframe')?.src;
if (!src) return;
const fig9 = fig01.getElementsByClassName('fig9')?.[0];
if (!fig9 || fig9.childElementCount === 0) return {type: 'figure', src, caption: '', description: ''};
const caption = fig9.firstChild.textContent ?? '';
const description = fig9.childElementCount > 1 ? fig9.lastChild.textContent : '';
return {type: 'figure', src, caption, description};
}
converter
ファイルサイズが巨大なので、500pagesずつに分割する
でないとimport時にエラーが出てしまう
script.js const format = text => text.replaceAll('[', '[').replaceAll(']', ']').replaceAll('\n', ' ');
function convertScrapboxJSON(pages) {
const titles = Object.fromEntries(
pages.map(({title, author, id}) => [id, `${format(author)}『${format(title)}』`])
);
const json = {pages: pages.map(({author, title, body, id}) => ({
title: titles[id],
lines: [
titles[id],
...body.flatMap(p => {
switch(p.type) {
case 'paragraph':
return [...p.text.split('\n')
.flatMap(line => {
if (line.trim() === '') return [];
return line.replace(/\[(\d+)\]/g, (_, p1) => `[${titles[p1] ?? p1}]`);
}),
'',];
case 'figure':
return [
` ${p.caption}`,
...(p.description === '' ? [] : [` ${p.description}`]),
` [${p.src}]`,
];
}
}),
'',
`#${format(author)}`,
`#${id}`,
]
}))};
const chunkNum = Math.floor(json.pages.length / 500) + 1;
return [...Array(chunkNum).keys()]
.map(i => ({
data: {pages: json.pages.slice(i * 500, (i + 1) * 500)},
fileName: `import${
`${i * 500 + 1}`.padStart(4, '0')
}-${
`${Math.min((i + 1) * 500 + 1, json.pages.length)}`.padStart(4, '0')
}.json`,
}));
}
JSONをBlobに変換する
script.js function toBlob(json) {
return new Blob([JSON.stringify(json)], {type: 'application/json'});
}
downloadする
script.js function downloadBlob(blob, fileName) {
// download linkを生成
const url = URL.createObjectURL(blob);
// 隠しa要素を使ってdownloadする
const a = document.createElement('a');
a.href = url;
a.download = fileName;
a.style.display = 'none';
// downloadを実行
a.click();
// 後始末
URL.revokeObjectURL(url);
}
zipにまとめる
圧縮をかけてある
script.js async function zipAll(data) {
if (!document.getElementById('scrapbox-userscript-jszip')) {
await useJSZip();
}
let zip = new JSZip();
data.forEach(({data, fileName}, i) => zip.file(fileName, toBlob(data)));
return {
data: await zip.generateAsync({type: 'blob', compression: 'DEFLATE',
compressionOptions: {
level: 9, // 最高圧縮率
}}),
fileName: `imports.zip`,
};
}
script.js async function useJSZip() {
const id = 'scrapbox-userscript-jszip';
return new Promise((resolve, reject) => {
const oldScript = document.getElementById(id);
oldScript?.parentNode.removeChild(oldScript);
const script = document.createElement("script");
script.addEventListener('load', () => {
resolve();
});
script.addEventListener('error', (e) => {
reject(e);
});
script.src = '//cdnjs.cloudflare.com/ajax/libs/jszip/3.5.0/jszip.min.js';
script.id = id;
document.head.appendChild(script);
});
}