ePub: download and insert article images before rendering

This includes the feature image as well as other inline images.
This commit is contained in:
Hippo 2021-04-03 21:29:01 +05:30
parent d5fd82e421
commit d475a5bde9
3 changed files with 193 additions and 5 deletions

View file

@ -20,6 +20,7 @@
"babel-preset-env": "^1.7.0",
"body-parser": "^1.19.0",
"bulma": "^0.8.2",
"cheerio": "^1.0.0-rc.5",
"commander": "^3.0.2",
"convict": "^5.2.0",
"css-loader": "^3.5.3",

View file

@ -9,8 +9,10 @@ const readline = require('readline')
const { markdown } = require('markdown')
const GhostAdminAPI = require('@tryghost/admin-api')
const { Canvas, Image } = require('canvas')
const slugify = require('underscore.string/slugify')
const Rembrandt = require('rembrandt')
const nodepub = require('nodepub')
const cheerio = require('cheerio')
const config = require('./config')
@ -742,6 +744,77 @@ class Seance {
allPosts.push(post)
}
// prepare for image downloads
let pics = []
let picFolder = path.join(options.outputFolder, 'seance-images')
if (!fs.existsSync(picFolder)) {
fs.mkdirSync(picFolder, { recursive: true })
}
// prepare array to collect processed posts
let processedPosts = []
for (let post of allPosts) {
// decide a post slug, for future files
let postSlug = slugify(post.title)
// get the cover pic
let featurePicTag
if (!!post.feature_image) {
let imgUrl = post.feature_image
if (/^\/\//i.test(imgUrl)) {
imgUrl = 'https:' + imgUrl
} else if (!/^https?:\/\//i.test(imgUrl)) {
imgUrl = 'https://' + imgUrl
}
let response = await (await r2.get(imgUrl).response).buffer()
let ext = post.feature_image.split('.').pop()
await await fs.promises.writeFile(path.join(picFolder, `${postSlug}.${ext}`), response, 'base64')
featurePicTag = `<img src="../images/${postSlug}.${ext}"/>`
pics.push(`${picFolder}/${postSlug}.${ext}`)
}
let c = cheerio.load(`${featurePicTag}<h1>${post.title}</h1>${post.html}`)
// hunt for other pics
// TODO: make asynchronous
let picCounter = 0
c('img').each(async function() {
// skip if it's a local image
if (c(this).attr('src').indexOf('../images') == 0) {
return
}
// first, process the url
let imgUrl = c(this).attr('src')
console.log('Downloading:', imgUrl)
if (/^\/\//i.test(imgUrl)) {
imgUrl = 'https:' + imgUrl
} else if (!/^https?:\/\//i.test(imgUrl)) {
imgUrl = 'https://' + imgUrl
}
// now decide an output name
let ext = c(this).attr('src').split('.').pop()
let imageFile = path.join(picFolder, `${postSlug}-insert-${picCounter}.${ext}`)
// note down our calculations
c(this).attr('src', `../images/${postSlug}-insert-${picCounter}.${ext}`)
pics.push(imageFile)
picCounter = picCounter + 1
// finally, download the images
let response = await (await r2.get(imgUrl).response).buffer()
await fs.promises.writeFile(imageFile, response, 'base64')
console.log('Downloaded to:', imageFile)
})
processedPosts.push({
title: post.title,
body: c.html(),
})
}
// decide metadata
let metadata = {
id: 'seance-test', // FIXME
@ -751,19 +824,19 @@ class Seance {
contents: 'Table of Contents',
genre: options.genre,
cover: options.coverImage,
images: [],
images: pics,
}
// create the ePub
let epub = nodepub.document(metadata)
for (let post of allPosts) {
console.log(`Adding: ${post.title}`)
epub.addSection(post.title, `<h1>${post.title}</h1>${post.html}`)
// add the documents
for (let post of processedPosts) {
epub.addSection(post.title, post.body)
}
// generate it!
await epub.writeEPUB(options.outputFolder, options.title) // FIXME
await epub.writeEPUB(options.outputFolder, options.title)
}
}

114
yarn.lock
View file

@ -1143,6 +1143,11 @@ body-parser@1.19.0, body-parser@^1.19.0:
raw-body "2.4.0"
type-is "~1.6.17"
boolbase@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e"
integrity sha1-aN/1++YMUes3cl6p4+0xDcwed24=
brace-expansion@^1.1.7:
version "1.1.11"
resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
@ -1444,6 +1449,30 @@ chalk@^2.0.1, chalk@^2.4.1, chalk@^2.4.2:
escape-string-regexp "^1.0.5"
supports-color "^5.3.0"
cheerio-select-tmp@^0.1.0:
version "0.1.1"
resolved "https://registry.yarnpkg.com/cheerio-select-tmp/-/cheerio-select-tmp-0.1.1.tgz#55bbef02a4771710195ad736d5e346763ca4e646"
integrity sha512-YYs5JvbpU19VYJyj+F7oYrIE2BOll1/hRU7rEy/5+v9BzkSo3bK81iAeeQEMI92vRIxz677m72UmJUiVwwgjfQ==
dependencies:
css-select "^3.1.2"
css-what "^4.0.0"
domelementtype "^2.1.0"
domhandler "^4.0.0"
domutils "^2.4.4"
cheerio@^1.0.0-rc.5:
version "1.0.0-rc.5"
resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.5.tgz#88907e1828674e8f9fee375188b27dadd4f0fa2f"
integrity sha512-yoqps/VCaZgN4pfXtenwHROTp8NG6/Hlt4Jpz2FEP0ZJQ+ZUkVDd0hAPDNKhj3nakpfPt/CNs57yEtxD1bXQiw==
dependencies:
cheerio-select-tmp "^0.1.0"
dom-serializer "~1.2.0"
domhandler "^4.0.0"
entities "~2.1.0"
htmlparser2 "^6.0.0"
parse5 "^6.0.0"
parse5-htmlparser2-tree-adapter "^6.0.0"
chokidar@^2.1.8:
version "2.1.8"
resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-2.1.8.tgz#804b3a7b6a99358c3c5c61e71d8728f041cff917"
@ -1916,6 +1945,17 @@ css-loader@^3.5.3:
schema-utils "^2.6.6"
semver "^6.3.0"
css-select@^3.1.2:
version "3.1.2"
resolved "https://registry.yarnpkg.com/css-select/-/css-select-3.1.2.tgz#d52cbdc6fee379fba97fb0d3925abbd18af2d9d8"
integrity sha512-qmss1EihSuBNWNNhHjxzxSfJoFBM/lERB/Q4EnsJQQC62R2evJDW481091oAdOr9uh46/0n4nrg0It5cAnj1RA==
dependencies:
boolbase "^1.0.0"
css-what "^4.0.0"
domhandler "^4.0.0"
domutils "^2.4.3"
nth-check "^2.0.0"
css-selector-tokenizer@^0.7.0:
version "0.7.2"
resolved "https://registry.yarnpkg.com/css-selector-tokenizer/-/css-selector-tokenizer-0.7.2.tgz#11e5e27c9a48d90284f22d45061c303d7a25ad87"
@ -1930,6 +1970,11 @@ css-unit-converter@^1.1.1:
resolved "https://registry.yarnpkg.com/css-unit-converter/-/css-unit-converter-1.1.1.tgz#d9b9281adcfd8ced935bdbaba83786897f64e996"
integrity sha1-2bkoGtz9jO2TW9urqDeGiX9k6ZY=
css-what@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/css-what/-/css-what-4.0.0.tgz#35e73761cab2eeb3d3661126b23d7aa0e8432233"
integrity sha512-teijzG7kwYfNVsUh2H/YN62xW3KK9YhXEgSlbxMlcyjPNvdKJqFx5lrwlJgoFP1ZHlB89iGDlo/JyshKeRhv5A==
cssesc@^3.0.0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/cssesc/-/cssesc-3.0.0.tgz#37741919903b868565e1c09ea747445cd18983ee"
@ -2146,11 +2191,41 @@ diffie-hellman@^5.0.0:
miller-rabin "^4.0.0"
randombytes "^2.0.0"
dom-serializer@^1.0.1, dom-serializer@~1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-1.2.0.tgz#3433d9136aeb3c627981daa385fc7f32d27c48f1"
integrity sha512-n6kZFH/KlCrqs/1GHMOd5i2fd/beQHuehKdWvNNffbGHTr/almdhuVvTVFb3V7fglz+nC50fFusu3lY33h12pA==
dependencies:
domelementtype "^2.0.1"
domhandler "^4.0.0"
entities "^2.0.0"
domain-browser@^1.1.1:
version "1.2.0"
resolved "https://registry.yarnpkg.com/domain-browser/-/domain-browser-1.2.0.tgz#3d31f50191a6749dd1375a7f522e823d42e54eda"
integrity sha512-jnjyiM6eRyZl2H+W8Q/zLMA481hzi0eszAaBUzIVnmYVDBbnLxVNnfu1HgEBvCbL+71FrxMl3E6lpKH7Ge3OXA==
domelementtype@^2.0.1, domelementtype@^2.1.0, domelementtype@^2.2.0:
version "2.2.0"
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.2.0.tgz#9a0b6c2782ed6a1c7323d42267183df9bd8b1d57"
integrity sha512-DtBMo82pv1dFtUmHyr48beiuq792Sxohr+8Hm9zoxklYPfa6n0Z3Byjj2IV7bmr2IyqClnqEQhfgHJJ5QF0R5A==
domhandler@^4.0.0, domhandler@^4.1.0:
version "4.1.0"
resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-4.1.0.tgz#c1d8d494d5ec6db22de99e46a149c2a4d23ddd43"
integrity sha512-/6/kmsGlMY4Tup/nGVutdrK9yQi4YjWVcVeoQmixpzjOUK1U7pQkvAPHBJeUxOgxF0J8f8lwCJSlCfD0V4CMGQ==
dependencies:
domelementtype "^2.2.0"
domutils@^2.4.3, domutils@^2.4.4:
version "2.5.1"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.5.1.tgz#9b8e84b5d9f788499ae77506ea832e9b4f9aa1c0"
integrity sha512-hO1XwHMGAthA/1KL7c83oip/6UWo3FlUNIuWiWKltoiQ5oCOiqths8KknvY2jpOohUoUgnwa/+Rm7UpwpSbY/Q==
dependencies:
dom-serializer "^1.0.1"
domelementtype "^2.2.0"
domhandler "^4.1.0"
dotenv@^8.2.0:
version "8.2.0"
resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-8.2.0.tgz#97e619259ada750eea3e4ea3e26bceea5424b16a"
@ -2240,6 +2315,16 @@ entities@^1.1.1:
resolved "https://registry.yarnpkg.com/entities/-/entities-1.1.2.tgz#bdfa735299664dfafd34529ed4f8522a275fea56"
integrity sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w==
entities@^2.0.0:
version "2.2.0"
resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55"
integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==
entities@~2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/entities/-/entities-2.1.0.tgz#992d3129cf7df6870b96c57858c249a120f8b8b5"
integrity sha512-hCx1oky9PFrJ611mf0ifBLBRW8lUUVRlFolb5gWRfIELabBlbp9xZvrqZLZAs+NxFnbfQoeGd8wDkygjg7U85w==
errno@^0.1.3, errno@~0.1.7:
version "0.1.7"
resolved "https://registry.yarnpkg.com/errno/-/errno-0.1.7.tgz#4684d71779ad39af177e3f007996f7c67c852618"
@ -2899,6 +2984,16 @@ html-comment-regex@^1.1.0:
resolved "https://registry.yarnpkg.com/html-comment-regex/-/html-comment-regex-1.1.2.tgz#97d4688aeb5c81886a364faa0cad1dda14d433a7"
integrity sha512-P+M65QY2JQ5Y0G9KKdlDpo0zK+/OHptU5AaBwUfAIDJZk1MYf32Frm84EcOytfJE0t5JvkAnKlmjsXDnWzCJmQ==
htmlparser2@^6.0.0:
version "6.0.1"
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-6.0.1.tgz#422521231ef6d42e56bd411da8ba40aa36e91446"
integrity sha512-GDKPd+vk4jvSuvCbyuzx/unmXkk090Azec7LovXP8as1Hn8q9p3hbjmDGbUqqhknw0ajwit6LiiWqfiTUPMK7w==
dependencies:
domelementtype "^2.0.1"
domhandler "^4.0.0"
domutils "^2.4.4"
entities "^2.0.0"
http-errors@1.7.2:
version "1.7.2"
resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-1.7.2.tgz#4f5029cf13239f31036e5b2e55292bcfbcc85c8f"
@ -4222,6 +4317,13 @@ npm-packlist@^1.1.6:
gauge "~2.7.3"
set-blocking "~2.0.0"
nth-check@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-2.0.0.tgz#1bb4f6dac70072fc313e8c9cd1417b5074c0a125"
integrity sha512-i4sc/Kj8htBrAiH1viZ0TgU8Y5XqCaV/FziYK6TBczxmeKm3AEFWqqF3195yKudrarqy7Zu80Ra5dobFjn9X/Q==
dependencies:
boolbase "^1.0.0"
null-loader@^0.1.1:
version "0.1.1"
resolved "https://registry.yarnpkg.com/null-loader/-/null-loader-0.1.1.tgz#17be9abfcd3ff0e1512f6fc4afcb1f5039378fae"
@ -4409,6 +4511,18 @@ parse-json@^4.0.0:
error-ex "^1.3.1"
json-parse-better-errors "^1.0.1"
parse5-htmlparser2-tree-adapter@^6.0.0:
version "6.0.1"
resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz#2cdf9ad823321140370d4dbf5d3e92c7c8ddc6e6"
integrity sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==
dependencies:
parse5 "^6.0.1"
parse5@^6.0.0, parse5@^6.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/parse5/-/parse5-6.0.1.tgz#e1a1c085c569b3dc08321184f19a39cc27f7c30b"
integrity sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==
parseurl@~1.3.3:
version "1.3.3"
resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4"