seance/seance.js
Hippo 1ddc5f4e7c Add fonts and styling for articles
Note: Due to nodepub limitations, the fonts are only named, but
not actually embedded. To embed them, you'll have to do something
roundabout like opening the ePub in the Calibre ePub Editor and
embedding them from there. Hopefully this will get resolved at
some point :/
2021-04-03 22:02:21 +05:30

893 lines
25 KiB
JavaScript

const r2 = require('r2')
const path = require('path')
const fs = require('fs')
const { once, EventEmitter } = require('events')
const { inherits } = require('util')
const getPost = require('mediumexporter').getPost
const { createClient } = require('webdav')
const readline = require('readline')
const { markdown } = require('markdown')
const GhostAdminAPI = require('@tryghost/admin-api')
const { Canvas, Image } = require('canvas')
const slugify = require('underscore.string/slugify')
const Rembrandt = require('rembrandt')
const nodepub = require('nodepub')
const cheerio = require('cheerio')
const config = require('./config')
class Seance {
constructor(...args) {
this.MEDIUM_IMG_CDN = 'https://miro.medium.com/fit/c/'
try {
this.ghostAdmin = new GhostAdminAPI({
url: config.ghost.url,
version: config.ghost.version,
key: config.ghost.admin_key,
})
} catch(err) {
console.error('Your Ghost isn\'t configured. Please run `seance setup` to fix this!')
}
/**
* function [fetchFromMedium]
* @returns [string] status
*/
}
async fetchFromMedium (mediumUrl, options = {
json: null,
}) {
this.emit('update', {
status: 'starting',
message: `Fetching: ${mediumUrl}`,
loglevel: 'info'
})
var output = path.join(process.env.PWD, 'content')
var json
if (!options.json) {
json = await this.fetchMediumJSON(mediumUrl)
} else {
json = options.json
}
// use mediumexporter's getPost function to fetch a Medium post
const post = await getPost(mediumUrl, {
returnObject: true,
output: output,
postJSON: json
}).catch((err) => {
return {
error: err,
}
})
// set output folder path
// this is based on what mediumexporter chooses as the output folder
var outputFolder = path.join(output, post.slug)
this.emit('update', {
status: 'normal',
message: `Saving to: ${outputFolder}`,
loglevel: 'info'
})
if (!fs.existsSync(path.join(outputFolder, post.slug))) {
fs.mkdirSync(outputFolder, { recursive: true })
}
// mediumexporter writes a plain .md file if the post has no media
// if that is the case, we should create the subfolder manually
// and copy the data there.
if (fs.existsSync(path.join(output, post.slug + '.md'))) {
fs.renameSync(
path.join(output, post.slug + '.md'),
path.join(outputFolder, 'index.md')
)
}
// generate metadata
const metadata = JSON.stringify({
title: post.title,
subtitle: post.subtitle,
author: post.author || "",
authors: post.authors || [],
date: new Date(post.date),
tags: post.tags,
url: post.url,
slug: post.slug,
images: post.images,
featuredImage: post.featuredImage,
})
// write metadata to output folder
await fs.promises.writeFile(path.join(outputFolder, 'metadata.json'), metadata)
return post
};
/**
* function [pushToGhost]
* @description
* Pre-processes and uploads the given article to Ghost
*
* @param {Boolean} options.noUpload Skip uploading of images
* @param {Boolean} options.noPush Skip pushing to Ghost; just generate the file
* @param {Boolean} options.dryRun Combination of noUpload and noPush
* @returns [object] object containing details of the uploaded Ghost post
*/
async pushToGhost (postSlug, options={}) {
this.emit('update', {
status: 'starting',
message: 'Starting upload: ' + postSlug,
loglevel: 'info'
})
if (!!options.dryRun) {
options.noUpload = true
options.noPush = true
}
console.log('noUpload', options.noUpload)
// Decide working path
var postFolder = path.resolve('content/' + postSlug)
// Verify file exists
if (!fs.existsSync(postFolder)) {
this.emit('error', {
message: 'Could not find post folder! Is it fetched?',
})
return false
}
// Decide file
const postContent = path.join(postFolder, 'index.md')
const postOutput = path.join(postFolder, 'ghost.md')
// Verify post exists
if (!fs.existsSync(postContent)) {
this.emit('error', {
message: "Could not find 'index.md' in " + postSlug + "! Is it fetched?",
})
return false
}
// Decide WebDAV upload path
var current_date = new Date()
const uploadPath = path.join(
current_date.getUTCFullYear().toString(),
(current_date.getUTCMonth() + 1).toString(),
postSlug
)
// Path where WebDAV files will be placed (eg. https://example.com:2078)
const davPath = path.join(config.webdav.path_prefix, uploadPath)
// Public path to upload those files (eg. https://media.example.com/uploads)
// We'll do it directly since path.join mangles the protocol
const uploadedPath = config.webdav.uploaded_path_prefix + '/' + uploadPath
// load metadata file
this.emit('update', {
status: 'starting',
message: 'Loading metadata',
loglevel: 'debug'
})
var postMetaFile = path.join(postFolder, 'metadata.json')
let postMeta = await JSON.parse(await fs.promises.readFile(postMetaFile))
// Process lines
const readInterface = readline.createInterface({
input: fs.createReadStream(postContent),
output: process.stdout,
terminal: false
})
const outStream = fs.createWriteStream(postOutput, { encoding: 'utf-8' })
// We'll calculate these later since Medium messes it up sometimes
let title = null
let subtitle = null
let reImage = new RegExp('^!\\[(.*)\\]\\((\\S+?)\\)(.*)')
let reTitle = new RegExp('^#\ (.*)')
let reSubtitle = new RegExp('^#+\ (.*)$')
// Note down uploaded images
var uploadedImages = []
this.emit('update', {
status: 'progress',
progress: null, // we don't know the percentage
message: 'Parsing post',
loglevel: 'info'
})
for await (const line of readInterface) {
// Line to output
// Default is to make it same as input
var newLine = line
// Skip the header (and preceding blank lines)
if (!title) {
// blanks
if (!line) continue
// starting with a # (must be the title)
let match = await reTitle.exec(line)
if (match) {
title = match[1]
continue // no need to add line; it'll come automatically
}
} else if (!subtitle) {
// check if it's a repeat of the title (Medium does that)
if (line.endsWith(title)) continue
// otherwise set the subtitle if it doesn't exist
// or if it's a repeat of the title (Medium does that too)
if (!subtitle && postMeta.subtitle == postMeta.title) {
let match = await reSubtitle.exec(line)
if (match) {
subtitle = match[1]
postMeta.subtitle = match[1]
}
}
}
// check for images
var m = await reImage.exec(line)
if (m) {
// Get image name
var imageAlt = m[1]
var imageName = m[2].replace('*', '')
var imagePath = path.join(postFolder, 'images', imageName)
if (!fs.existsSync(imagePath)) {
console.warn('Skipping missing image: ' + imageName)
} else {
// check for separator image
var isScissors = await this.checkScissors(imagePath)
if (isScissors) {
newLine = '\n---\n'
} else {
// upload pic to server
console.debug(`Adding to upload queue: ${imageName}`)
uploadedImages.push(imageName)
// Let's wait for the upload, just to avoid conflicts
if (!options.noUpload) {
await this.uploadDav(davPath, imagePath)
}
newLine = '![' + imageAlt + '](' + uploadedPath + '/' + imageName + ')'
}
}
}
outStream.write(newLine + '\n')
}
// Upload feature_image, if required
var featuredImagePath
if (!!postMeta.featuredImage) {
var imageName = postMeta.featuredImage.replace('*', '')
// if the image is listed in postMeta.images, it would have
// already been uploaded
if (uploadedImages.indexOf(imageName) != -1) {
this.emit('update', {
status: 'progress',
progress: 95, // we don't know the percentage
message: `Skipping feature image ${imageName}: already listed for upload`,
loglevel: 'info'
})
} else {
var imagePath = path.join(postFolder, 'images', imageName)
// We can only upload if the file exists!
if (!fs.existsSync(imagePath)) {
this.emit('update', {
status: 'progress',
progress: 95, // we don't know the percentage
message: `Skipping feature image "${imageName}": file not found`,
loglevel: 'warning'
})
} else {
this.emit('update', {
status: 'progress',
progress: 95, // we don't know the percentage
message: `Uploading feature image: ${imageName}`,
loglevel: 'info'
})
if (!options.noUpload) {
this.uploadDav(davPath, imagePath)
}
featuredImagePath = uploadedPath + '/' + imageName
}
}
}
// calculate users
let users = []
postMeta.authors.forEach((user) => {
users.push({slug: user.username})
})
// This will happen once all the line reading is finished
// Uploads will continue in paralell though
this.emit('update', {
status: 'progress',
progress: 100, // we don't know the percentage
message: 'Uploading to Ghost',
loglevel: 'info'
})
if (!options.noPush) {
let res = await this.ghostAdmin.posts.add({
title: postMeta.title,
custom_excerpt: postMeta.subtitle || null,
tags: postMeta.tags,
authors: users,
html: markdown.toHTML(await fs.promises.readFile(postOutput, 'utf-8')),
feature_image: featuredImagePath
}, {source: 'html'})
// Check if user was added
if (res.primary_author.id == 1) {
this.emit('notification', {
message: `WARNING: The admin editor, "${res.primary_author.name}", is set as author for this post. If this is incorrect, there was some problem matching usernames. Please check and set it manually.`,
})
}
this.emit('update', {
status: 'progress',
progress: 100, // we don't know the percentage
message: 'Post conveyed successfully',
loglevel: 'info'
})
return {
slug: res.slug,
id: res.id,
uuid: res.uuid,
preview_url: res.url,
primary_author: res.primary_author,
title: res.title,
subtitle: res.custom_excerpt,
status: res.status,
}
} else {
// just return without pushing to Ghost
return {
slug: postSlug,
id: 0,
uuid: 0,
preview_url: null,
primary_author: {},
title: postMeta.title,
subtitle: postMeta.subtitle,
status: 'none',
}
}
}
/**
* function [mediumToGhost]
* @returns [string] status
*/
mediumToGhost (mediumUrl) {
console.info('Copying: ' + mediumUrl);
}
async fetchMediumJSON(mediumUrl) {
var json
var text
if (mediumUrl.match(/^http/i)) {
// remove the anchors at the end
mediumUrl = mediumUrl.replace(/#.*$/, '')
// intelligently add ?json attribute
if (mediumUrl.indexOf('format=json') == -1) {
if (mediumUrl.indexOf('?') == -1) {
mediumUrl = `${mediumUrl}?format=json`
} else {
mediumUrl = `${mediumUrl}&format=json`
}
}
// let's get it!
const response = await fetch(mediumUrl)
text = await response.text()
} else if (fs.existsSync(mediumUrl)) {
text = (await fs.promises.readFile(mediumUrl)).toString()
} else {
throw { error: 'URL must be a Medium URL or existing JSON file' }
}
try {
json = await JSON.parse(text.substr(text.indexOf('{')))
} catch(err) {
throw { error: 'You JSON seems to be malformed' }
}
return json;
}
/**
* function [checkScissors]
* @returns [boolean] matchStatus
*/
async checkScissors (imagePath) {
// Decide "separator" image
// If set, images matching this will be ignored and replaced
// with a horizontal-rule ("---" in markdown) instead.
let scissors = config.scissors
// if scissors not set, return false
// (it's never a scissors since it never matches)
if (!scissors) {
this.emit('update', {
status: 'normal',
message: '[scissors] No scissors set, so rejecting all images',
loglevel: 'warning'
})
return false
} else {
/* First, check that the image has finished loading
* (we don't use this, because Rembrandt loads it again
* on its own, which is messy but what to do ¯\_(ツ)_/¯
*/
try {
let ctx = new Canvas().getContext('2d')
let img = new Image()
img.src = imagePath
ctx.drawImage(img, 0, 0, img.width, img.height)
} catch (err) {
this.emit('update', {
status: 'normal',
message: `[scissors] Skipping scissors check:${err.message}`,
loglevel: 'warning'
})
return false
}
// Check if given image matches the scissors
try {
let isScissors = new Rembrandt({
imageA: scissors,
imageB: imagePath,
thresholdType: Rembrandt.THRESHOLD_PERCENT,
maxThreshold: 0.1
})
let result = await isScissors.compare()
return result.passed
} catch (err) {
this.emit('update', {
status: 'normal',
message: `[scissors] Skipping scissors check:${err.message}`,
loglevel: 'warning'
})
return false
}
}
}
/**
* function [createUser]
* @returns [object] ghost data json
*/
async generateUserData (mediumUsername, email) {
this.emit('update', {
status: 'starting',
message: `Creating: @${mediumUsername} (email: ${email})`,
loglevel: 'debug'
})
const mediumUrl = `https://medium.com/@${mediumUsername}/?format=json`;
const json = await this.fetchMediumJSON(mediumUrl);
if (!json.success) {
this.emit('error', {
message: `Error: ${json.error}`,
})
return false
}
this.emit('update', {
status: 'normal',
message: `Name: ${json.payload.user.name}`,
loglevel: 'debug'
})
this.emit('update', {
status: 'normal',
message: `Bio: ${json.payload.user.bio}`,
loglevel: 'debug'
})
// Download and upload image
let imageId = json.payload.user.imageId
this.emit('update', {
status: 'normal',
message: `Profile pic: ${imageId}`,
loglevel: 'debug'
})
let imagePath = this.MEDIUM_IMG_CDN + '256/256/' + imageId
let filetype = imageId.split('.')[imageId.split('.').length - 1]
let fileName = `${mediumUsername}.${filetype}`
let filePath = path.join(process.env.PWD, fileName)
this.emit('update', {
status: 'normal',
message: `Fetching profile pic: ${imagePath}`,
loglevel: 'info'
})
const response = await (await r2.get(imagePath).response).buffer()
await await fs.promises.writeFile(filePath, response, 'base64')
this.emit('update', {
status: 'normal',
message: `Uploading profile pic: ${imagePath}`,
loglevel: 'info'
})
await this.uploadDav(path.join(config.webdav.path_prefix,'avatars'),
filePath)
// Generate Ghost JSON
const ghostData = {
data: {
users: [
{
id: 1,
slug: json.payload.user.username,
bio: json.payload.user.bio,
email: email,
name: json.payload.user.name,
profile_image: config.webdav.uploaded_path_prefix + '/avatars/' + fileName
}
]
},
meta: {
exported_on: new Date,
version: '2.14.0'
}
}
return(JSON.stringify(ghostData))
};
async createDirIfNotExist (client, folder) {
// recursively create subfolders if they don't exist.
//safety: don't touch directories outside WEBDAV_PATH_PREFIX
if (!folder.startsWith(config.webdav.path_prefix)) {
throw new Error(`Cannot create directories outside ${config.webdav.path_prefix}`)
}
// check the folder
await client.stat(folder)
.catch(async (err) => {
if (!err.response) {
// no response! Maybe a network error or something :P
console.error(`[dav-upload:folder] Error creating folder "${folder}"`)
console.error(`[dav-upload:folder] ${err.toJSON().message}`)
console.error('[dav-upload:folder] Please check your Internet connection and try again')
return false
} else if (err.response.status == 404) {
// it's a 404, so we'll create the directory
this.emit('update', {
status: 'normal',
message: `Noting missing subdirectory: ${folder}`,
loglevel: 'debug'
})
// first, create the parent directory (if required)
if (!await this.createDirIfNotExist(client, path.dirname(folder))) {
// if not created, we fail too :-/
return false
}
this.emit('update', {
status: 'normal',
message: `Creating missing subdirectory: ${folder}`,
loglevel: 'debug'
})
// then, create the current directory
await client.createDirectory(folder)
.catch(async (err) => {
if (err.response.status == 405) { // Method Not Allowed
// Maybe the directory's already been created in the meantime?
await client.stat(folder)
.catch((err2) => {
// Bad guess. Panic (and raise the original error)
this.emit('update', {
status: 'error',
message: `Error: ${err.toJSON().message}\nWe're not sure what went wrong. Help!`,
loglevel: 'error'
})
throw err
})
} else {
// what's this? Panic!
this.emit('update', {
status: 'error',
message: `Error: ${err.toJSON().message}\nWe're not sure what went wrong. Help!`,
loglevel: 'error'
})
throw err
}
})
} else {
// it's not a 404; we don't know how to handle this. Panic!
this.emit('update', {
status: 'error',
message: 'An unknown error occured. Help!',
loglevel: 'error'
})
console.error(err.toJSON())
throw err
}
})
return true
}
/**
* function [uploadDav]
* @returns [string] status
*/
async uploadDav (dirPath, filePath) {
// connect to webdav
const client = createClient(
config.webdav.server_url,
{
username: config.webdav.username,
password: config.webdav.password,
digest: config.webdav.use_digest
})
// create directory if not exists
console.debug(`[dav-upload] Loading ${dirPath}`)
if (!await this.createDirIfNotExist(client, dirPath)) {
console.error(`[dav-upload] Could not upload ${path.basename(filePath)} :(`)
return false
}
// upload a file
console.debug('Uploading file')
const outStream = client.createWriteStream(
path.join(dirPath, path.basename(filePath))
)
outStream.on('finish', () => console.debug('Uploaded successfully.'))
const inStream = fs.createReadStream(filePath)
.pipe(outStream)
return true
}
/**
* function [fetchToEpub]
* @description fetches posts from Ghost and packs them into an epub
* @options.id unique ID for the generated epub
* @options.title title of the generated epub
* @options.author author of the generated epub
* @options.language language of the book
* @genre genre of the book
* @cover cover image to use
* @returns [string] status
*/
async fetchToEpub (postSlugs, options = {}) {
if (!options.title) options.title = 'Seance Collection'
if (!options.author) options.author = 'Seance'
if (!options.language) options.language = 'en'
if (!options.genre) options.genre = 'Unknown'
if (!options.coverImage) options.coverImage = 'random-cover.jpg'
if (!options.outputFolder) options.outputFolder = '.'
console.log(`Fetching: ${postSlugs}`)
let allPosts = []
// first, fetch all the posts
for (let slug of postSlugs) {
console.log(`Fetching: ${slug}`)
let post = await this.ghostAdmin.posts.read({slug: slug}, {formats: ['html']})
allPosts.push(post)
}
// prepare for image downloads, starting with the scissors!
let pics = [path.join(__dirname, 'scissors.png')]
let picFolder = path.join(options.outputFolder, 'seance-images')
if (!fs.existsSync(picFolder)) {
fs.mkdirSync(picFolder, { recursive: true })
}
// prepare array to collect processed posts
let processedPosts = []
for (let post of allPosts) {
// decide a post slug, for future files
let postSlug = slugify(post.title)
// get the cover pic
let featurePicTag
if (!!post.feature_image) {
let imgUrl = post.feature_image
if (/^\/\//i.test(imgUrl)) {
imgUrl = 'https:' + imgUrl
} else if (!/^https?:\/\//i.test(imgUrl)) {
imgUrl = 'https://' + imgUrl
}
let response = await (await r2.get(imgUrl).response).buffer()
let ext = post.feature_image.split('.').pop()
await await fs.promises.writeFile(path.join(picFolder, `${postSlug}.${ext}`), response, 'base64')
featurePicTag = `<img src="../images/${postSlug}.${ext}"/>`
pics.push(`${picFolder}/${postSlug}.${ext}`)
}
let c = cheerio.load(`${featurePicTag}<h1>${post.title}</h1>${post.html}`)
// hunt for other pics
// TODO: make asynchronous
let picCounter = 0
c('img').each(async function() {
// skip if it's a local image
if (c(this).attr('src').indexOf('../images') == 0) {
return
}
// first, process the url
let imgUrl = c(this).attr('src')
console.log('Downloading:', imgUrl)
if (/^\/\//i.test(imgUrl)) {
imgUrl = 'https:' + imgUrl
} else if (!/^https?:\/\//i.test(imgUrl)) {
imgUrl = 'https://' + imgUrl
}
// now decide an output name
let ext = c(this).attr('src').split('.').pop()
let imageFile = path.join(picFolder, `${postSlug}-insert-${picCounter}.${ext}`)
// note down our calculations
c(this).attr('src', `../images/${postSlug}-insert-${picCounter}.${ext}`)
pics.push(imageFile)
picCounter = picCounter + 1
// finally, download the images
let response = await (await r2.get(imgUrl).response).buffer()
await fs.promises.writeFile(imageFile, response, 'base64')
console.log('Downloaded to:', imageFile)
})
processedPosts.push({
title: post.title,
body: c.html(),
})
}
// decide metadata
let metadata = {
id: 'seance-test', // FIXME
title: options.title,
author: options.author,
language: options.language,
contents: 'Table of Contents',
genre: options.genre,
cover: options.coverImage,
images: pics,
}
// create the ePub
let epub = nodepub.document(metadata)
// add the documents
for (let post of processedPosts) {
epub.addSection(post.title, post.body)
}
// add the styles
epub.addCSS(`
img {
width: 100%;
height: auto;
}
h1 {
font-family: "Abhaya Libre Extrabold";
text-transform: lowercase;
text-align: center;
font-size: 3.6em;
line-height: 1em;
margin-bottom: 0;
}
h1 + h2 {
font-family: "Open Sans Light";
font-variant: small-caps;
text-align: center;
font-size: 1.2em;
line-height: 1em;
margin-bottom: 2.4em;
}
p {
font-family: "Crimson Text Regular";
font-size: 1em;
line-height: 1.2em;
}
hr {
display: block;
border: 0px;
height: 1em;
background-image:url('../images/scissors.png');
background-size: contain;
background-repeat: no-repeat;
background-position: 50%;
margin-top: 1.5em;
margin-bottom: 1.5em;
}
`)
// generate it!
await epub.writeEPUB(options.outputFolder, options.title)
}
}
// Make Seance an EventEmitter
inherits(Seance, EventEmitter)
module.exports = {
Seance
}