seance/seance.js

850 lines
24 KiB
JavaScript
Raw Normal View History

const r2 = require('r2')
const path = require('path')
const fs = require('fs')
const { once, EventEmitter } = require('events')
const { inherits } = require('util')
const getPost = require('mediumexporter').getPost
2019-12-19 07:20:24 -05:00
const { createClient } = require('webdav')
const readline = require('readline')
const { markdown } = require('markdown')
2019-12-24 11:52:58 -05:00
const GhostAdminAPI = require('@tryghost/admin-api')
const { Canvas, Image } = require('canvas')
const slugify = require('underscore.string/slugify')
const Rembrandt = require('rembrandt')
const nodepub = require('nodepub')
const cheerio = require('cheerio')
const config = require('./config')
class Seance {
constructor(...args) {
this.MEDIUM_IMG_CDN = 'https://miro.medium.com/fit/c/'
try {
this.ghostAdmin = new GhostAdminAPI({
url: config.ghost.url,
version: config.ghost.version,
key: config.ghost.admin_key,
})
} catch(err) {
console.error('Your Ghost isn\'t configured. Please run `seance setup` to fix this!')
}
/**
* function [fetchFromMedium]
* @returns [string] status
*/
}
async fetchFromMedium (mediumUrl, options = {
json: null,
}) {
this.emit('update', {
status: 'starting',
message: `Fetching: ${mediumUrl}`,
loglevel: 'info'
})
var output = path.join(process.env.PWD, 'content')
var json
if (!options.json) {
json = await this.fetchMediumJSON(mediumUrl)
} else {
json = options.json
}
// use mediumexporter's getPost function to fetch a Medium post
const post = await getPost(mediumUrl, {
returnObject: true,
output: output,
postJSON: json
}).catch((err) => {
return {
error: err,
}
})
// set output folder path
// this is based on what mediumexporter chooses as the output folder
var outputFolder = path.join(output, post.slug)
this.emit('update', {
status: 'normal',
message: `Saving to: ${outputFolder}`,
loglevel: 'info'
})
if (!fs.existsSync(path.join(outputFolder, post.slug))) {
fs.mkdirSync(outputFolder, { recursive: true })
}
// mediumexporter writes a plain .md file if the post has no media
// if that is the case, we should create the subfolder manually
// and copy the data there.
if (fs.existsSync(path.join(output, post.slug + '.md'))) {
fs.renameSync(
path.join(output, post.slug + '.md'),
path.join(outputFolder, 'index.md')
)
}
// generate metadata
const metadata = JSON.stringify({
title: post.title,
subtitle: post.subtitle,
author: post.author || "",
authors: post.authors || [],
date: new Date(post.date),
tags: post.tags,
url: post.url,
slug: post.slug,
images: post.images,
featuredImage: post.featuredImage,
})
// write metadata to output folder
await fs.promises.writeFile(path.join(outputFolder, 'metadata.json'), metadata)
return post
};
/**
* function [pushToGhost]
* @description
* Pre-processes and uploads the given article to Ghost
*
* @param {Boolean} options.noUpload Skip uploading of images
* @param {Boolean} options.noPush Skip pushing to Ghost; just generate the file
* @param {Boolean} options.dryRun Combination of noUpload and noPush
* @returns [object] object containing details of the uploaded Ghost post
*/
async pushToGhost (postSlug, options={}) {
this.emit('update', {
status: 'starting',
message: 'Starting upload: ' + postSlug,
loglevel: 'info'
})
if (!!options.dryRun) {
options.noUpload = true
options.noPush = true
}
console.log('noUpload', options.noUpload)
// Decide working path
var postFolder = path.resolve('content/' + postSlug)
// Verify file exists
if (!fs.existsSync(postFolder)) {
this.emit('error', {
message: 'Could not find post folder! Is it fetched?',
})
return false
}
// Decide file
const postContent = path.join(postFolder, 'index.md')
const postOutput = path.join(postFolder, 'ghost.md')
// Verify post exists
if (!fs.existsSync(postContent)) {
this.emit('error', {
message: "Could not find 'index.md' in " + postSlug + "! Is it fetched?",
})
return false
}
// Decide WebDAV upload path
var current_date = new Date()
const uploadPath = path.join(
current_date.getUTCFullYear().toString(),
(current_date.getUTCMonth() + 1).toString(),
postSlug
)
// Path where WebDAV files will be placed (eg. https://example.com:2078)
const davPath = path.join(config.webdav.path_prefix, uploadPath)
// Public path to upload those files (eg. https://media.example.com/uploads)
// We'll do it directly since path.join mangles the protocol
const uploadedPath = config.webdav.uploaded_path_prefix + '/' + uploadPath
// load metadata file
this.emit('update', {
status: 'starting',
message: 'Loading metadata',
loglevel: 'debug'
})
var postMetaFile = path.join(postFolder, 'metadata.json')
let postMeta = await JSON.parse(await fs.promises.readFile(postMetaFile))
// Process lines
const readInterface = readline.createInterface({
input: fs.createReadStream(postContent),
output: process.stdout,
terminal: false
})
const outStream = fs.createWriteStream(postOutput, { encoding: 'utf-8' })
// We'll calculate these later since Medium messes it up sometimes
let title = null
let subtitle = null
let reImage = new RegExp('^!\\[(.*)\\]\\((\\S+?)\\)(.*)')
let reTitle = new RegExp('^#\ (.*)')
let reSubtitle = new RegExp('^#+\ (.*)$')
// Note down uploaded images
var uploadedImages = []
this.emit('update', {
status: 'progress',
progress: null, // we don't know the percentage
message: 'Parsing post',
loglevel: 'info'
})
for await (const line of readInterface) {
// Line to output
// Default is to make it same as input
var newLine = line
// Skip the header (and preceding blank lines)
if (!title) {
// blanks
if (!line) continue
// starting with a # (must be the title)
let match = await reTitle.exec(line)
if (match) {
title = match[1]
continue // no need to add line; it'll come automatically
}
} else if (!subtitle) {
// check if it's a repeat of the title (Medium does that)
if (line.endsWith(title)) continue
// otherwise set the subtitle if it doesn't exist
// or if it's a repeat of the title (Medium does that too)
if (!subtitle && postMeta.subtitle == postMeta.title) {
let match = await reSubtitle.exec(line)
if (match) {
subtitle = match[1]
postMeta.subtitle = match[1]
}
}
}
// check for images
var m = await reImage.exec(line)
if (m) {
// Get image name
var imageAlt = m[1]
var imageName = m[2].replace('*', '')
var imagePath = path.join(postFolder, 'images', imageName)
if (!fs.existsSync(imagePath)) {
console.warn('Skipping missing image: ' + imageName)
} else {
// check for separator image
var isScissors = await this.checkScissors(imagePath)
if (isScissors) {
newLine = '\n---\n'
} else {
// upload pic to server
console.debug(`Adding to upload queue: ${imageName}`)
uploadedImages.push(imageName)
// Let's wait for the upload, just to avoid conflicts
if (!options.noUpload) {
await this.uploadDav(davPath, imagePath)
}
newLine = '![' + imageAlt + '](' + uploadedPath + '/' + imageName + ')'
}
}
}
outStream.write(newLine + '\n')
}
// Upload feature_image, if required
var featuredImagePath
if (!!postMeta.featuredImage) {
var imageName = postMeta.featuredImage.replace('*', '')
// if the image is listed in postMeta.images, it would have
// already been uploaded
if (uploadedImages.indexOf(imageName) != -1) {
this.emit('update', {
status: 'progress',
progress: 95, // we don't know the percentage
message: `Skipping feature image ${imageName}: already listed for upload`,
loglevel: 'info'
})
} else {
var imagePath = path.join(postFolder, 'images', imageName)
// We can only upload if the file exists!
if (!fs.existsSync(imagePath)) {
this.emit('update', {
status: 'progress',
progress: 95, // we don't know the percentage
message: `Skipping feature image "${imageName}": file not found`,
loglevel: 'warning'
})
} else {
this.emit('update', {
status: 'progress',
progress: 95, // we don't know the percentage
message: `Uploading feature image: ${imageName}`,
loglevel: 'info'
})
if (!options.noUpload) {
this.uploadDav(davPath, imagePath)
}
featuredImagePath = uploadedPath + '/' + imageName
}
}
}
// calculate users
let users = []
postMeta.authors.forEach((user) => {
users.push({slug: user.username})
})
// This will happen once all the line reading is finished
// Uploads will continue in paralell though
this.emit('update', {
status: 'progress',
progress: 100, // we don't know the percentage
message: 'Uploading to Ghost',
loglevel: 'info'
})
if (!options.noPush) {
let res = await this.ghostAdmin.posts.add({
title: postMeta.title,
custom_excerpt: postMeta.subtitle || null,
tags: postMeta.tags,
authors: users,
html: markdown.toHTML(await fs.promises.readFile(postOutput, 'utf-8')),
feature_image: featuredImagePath
}, {source: 'html'})
// Check if user was added
if (res.primary_author.id == 1) {
this.emit('notification', {
message: `WARNING: The admin editor, "${res.primary_author.name}", is set as author for this post. If this is incorrect, there was some problem matching usernames. Please check and set it manually.`,
})
}
this.emit('update', {
status: 'progress',
progress: 100, // we don't know the percentage
message: 'Post conveyed successfully',
loglevel: 'info'
})
return {
slug: res.slug,
id: res.id,
uuid: res.uuid,
preview_url: res.url,
primary_author: res.primary_author,
title: res.title,
subtitle: res.custom_excerpt,
status: res.status,
}
} else {
// just return without pushing to Ghost
return {
slug: postSlug,
id: 0,
uuid: 0,
preview_url: null,
primary_author: {},
title: postMeta.title,
subtitle: postMeta.subtitle,
status: 'none',
}
}
}
/**
* function [mediumToGhost]
* @returns [string] status
*/
mediumToGhost (mediumUrl) {
console.info('Copying: ' + mediumUrl);
}
async fetchMediumJSON(mediumUrl) {
var json
var text
if (mediumUrl.match(/^http/i)) {
// remove the anchors at the end
mediumUrl = mediumUrl.replace(/#.*$/, '')
// intelligently add ?json attribute
if (mediumUrl.indexOf('format=json') == -1) {
if (mediumUrl.indexOf('?') == -1) {
mediumUrl = `${mediumUrl}?format=json`
} else {
mediumUrl = `${mediumUrl}&format=json`
}
}
// let's get it!
const response = await fetch(mediumUrl)
text = await response.text()
} else if (fs.existsSync(mediumUrl)) {
text = (await fs.promises.readFile(mediumUrl)).toString()
} else {
throw { error: 'URL must be a Medium URL or existing JSON file' }
}
try {
json = await JSON.parse(text.substr(text.indexOf('{')))
} catch(err) {
throw { error: 'You JSON seems to be malformed' }
}
return json;
}
/**
* function [checkScissors]
* @returns [boolean] matchStatus
*/
async checkScissors (imagePath) {
// Decide "separator" image
// If set, images matching this will be ignored and replaced
// with a horizontal-rule ("---" in markdown) instead.
let scissors = config.scissors
// if scissors not set, return false
// (it's never a scissors since it never matches)
if (!scissors) {
this.emit('update', {
status: 'normal',
message: '[scissors] No scissors set, so rejecting all images',
loglevel: 'warning'
})
return false
} else {
/* First, check that the image has finished loading
* (we don't use this, because Rembrandt loads it again
* on its own, which is messy but what to do ¯\_()_/¯
*/
try {
let ctx = new Canvas().getContext('2d')
let img = new Image()
img.src = imagePath
ctx.drawImage(img, 0, 0, img.width, img.height)
} catch (err) {
this.emit('update', {
status: 'normal',
message: `[scissors] Skipping scissors check:${err.message}`,
loglevel: 'warning'
})
return false
}
// Check if given image matches the scissors
try {
let isScissors = new Rembrandt({
imageA: scissors,
imageB: imagePath,
thresholdType: Rembrandt.THRESHOLD_PERCENT,
maxThreshold: 0.1
})
let result = await isScissors.compare()
return result.passed
} catch (err) {
this.emit('update', {
status: 'normal',
message: `[scissors] Skipping scissors check:${err.message}`,
loglevel: 'warning'
})
return false
}
}
}
/**
* function [createUser]
* @returns [object] ghost data json
*/
async generateUserData (mediumUsername, email) {
this.emit('update', {
status: 'starting',
message: `Creating: @${mediumUsername} (email: ${email})`,
loglevel: 'debug'
})
const mediumUrl = `https://medium.com/@${mediumUsername}/?format=json`;
const json = await this.fetchMediumJSON(mediumUrl);
if (!json.success) {
this.emit('error', {
message: `Error: ${json.error}`,
})
return false
}
this.emit('update', {
status: 'normal',
message: `Name: ${json.payload.user.name}`,
loglevel: 'debug'
})
this.emit('update', {
status: 'normal',
message: `Bio: ${json.payload.user.bio}`,
loglevel: 'debug'
})
// Download and upload image
let imageId = json.payload.user.imageId
this.emit('update', {
status: 'normal',
message: `Profile pic: ${imageId}`,
loglevel: 'debug'
})
let imagePath = this.MEDIUM_IMG_CDN + '256/256/' + imageId
let filetype = imageId.split('.')[imageId.split('.').length - 1]
let fileName = `${mediumUsername}.${filetype}`
let filePath = path.join(process.env.PWD, fileName)
this.emit('update', {
status: 'normal',
message: `Fetching profile pic: ${imagePath}`,
loglevel: 'info'
})
const response = await (await r2.get(imagePath).response).buffer()
await await fs.promises.writeFile(filePath, response, 'base64')
this.emit('update', {
status: 'normal',
message: `Uploading profile pic: ${imagePath}`,
loglevel: 'info'
})
await this.uploadDav(path.join(config.webdav.path_prefix,'avatars'),
filePath)
// Generate Ghost JSON
const ghostData = {
data: {
users: [
{
id: 1,
slug: json.payload.user.username,
bio: json.payload.user.bio,
email: email,
name: json.payload.user.name,
profile_image: config.webdav.uploaded_path_prefix + '/avatars/' + fileName
}
]
},
meta: {
exported_on: new Date,
version: '2.14.0'
}
}
return(JSON.stringify(ghostData))
};
2019-10-06 09:57:08 -04:00
async createDirIfNotExist (client, folder) {
// recursively create subfolders if they don't exist.
2019-12-19 07:20:24 -05:00
//safety: don't touch directories outside WEBDAV_PATH_PREFIX
if (!folder.startsWith(config.webdav.path_prefix)) {
throw new Error(`Cannot create directories outside ${config.webdav.path_prefix}`)
}
2019-12-19 07:20:24 -05:00
// check the folder
await client.stat(folder)
.catch(async (err) => {
if (!err.response) {
// no response! Maybe a network error or something :P
console.error(`[dav-upload:folder] Error creating folder "${folder}"`)
console.error(`[dav-upload:folder] ${err.toJSON().message}`)
console.error('[dav-upload:folder] Please check your Internet connection and try again')
return false
} else if (err.response.status == 404) {
2019-12-19 07:20:24 -05:00
// it's a 404, so we'll create the directory
this.emit('update', {
status: 'normal',
message: `Noting missing subdirectory: ${folder}`,
loglevel: 'debug'
})
2019-12-19 07:20:24 -05:00
// first, create the parent directory (if required)
if (!await this.createDirIfNotExist(client, path.dirname(folder))) {
// if not created, we fail too :-/
return false
}
2019-12-19 07:20:24 -05:00
this.emit('update', {
status: 'normal',
message: `Creating missing subdirectory: ${folder}`,
loglevel: 'debug'
})
// then, create the current directory
await client.createDirectory(folder)
.catch(async (err) => {
if (err.response.status == 405) { // Method Not Allowed
// Maybe the directory's already been created in the meantime?
await client.stat(folder)
.catch((err2) => {
// Bad guess. Panic (and raise the original error)
this.emit('update', {
status: 'error',
message: `Error: ${err.toJSON().message}\nWe're not sure what went wrong. Help!`,
loglevel: 'error'
})
throw err
})
} else {
// what's this? Panic!
this.emit('update', {
status: 'error',
message: `Error: ${err.toJSON().message}\nWe're not sure what went wrong. Help!`,
loglevel: 'error'
})
2019-12-19 07:20:24 -05:00
throw err
}
})
} else {
// it's not a 404; we don't know how to handle this. Panic!
this.emit('update', {
status: 'error',
message: 'An unknown error occured. Help!',
loglevel: 'error'
})
console.error(err.toJSON())
throw err
}
})
return true
}
/**
* function [uploadDav]
* @returns [string] status
*/
async uploadDav (dirPath, filePath) {
// connect to webdav
const client = createClient(
config.webdav.server_url,
{
username: config.webdav.username,
password: config.webdav.password,
digest: config.webdav.use_digest
2019-12-19 07:20:24 -05:00
})
// create directory if not exists
console.debug(`[dav-upload] Loading ${dirPath}`)
if (!await this.createDirIfNotExist(client, dirPath)) {
console.error(`[dav-upload] Could not upload ${path.basename(filePath)} :(`)
return false
2019-12-19 07:20:24 -05:00
}
// upload a file
console.debug('Uploading file')
const outStream = client.createWriteStream(
path.join(dirPath, path.basename(filePath))
)
outStream.on('finish', () => console.debug('Uploaded successfully.'))
2019-12-19 07:20:24 -05:00
const inStream = fs.createReadStream(filePath)
.pipe(outStream)
return true
}
2019-12-19 07:20:24 -05:00
/**
* function [fetchToEpub]
* @description fetches posts from Ghost and packs them into an epub
* @options.id unique ID for the generated epub
* @options.title title of the generated epub
* @options.author author of the generated epub
* @options.language language of the book
* @genre genre of the book
* @cover cover image to use
* @returns [string] status
*/
async fetchToEpub (postSlugs, options = {}) {
if (!options.title) options.title = 'Seance Collection'
if (!options.author) options.author = 'Seance'
if (!options.language) options.language = 'en'
if (!options.genre) options.genre = 'Unknown'
if (!options.coverImage) options.coverImage = 'random-cover.jpg'
if (!options.outputFolder) options.outputFolder = '.'
console.log(`Fetching: ${postSlugs}`)
let allPosts = []
// first, fetch all the posts
for (let slug of postSlugs) {
console.log(`Fetching: ${slug}`)
let post = await this.ghostAdmin.posts.read({slug: slug}, {formats: ['html']})
allPosts.push(post)
}
// prepare for image downloads
let pics = []
let picFolder = path.join(options.outputFolder, 'seance-images')
if (!fs.existsSync(picFolder)) {
fs.mkdirSync(picFolder, { recursive: true })
}
// prepare array to collect processed posts
let processedPosts = []
for (let post of allPosts) {
// decide a post slug, for future files
let postSlug = slugify(post.title)
// get the cover pic
let featurePicTag
if (!!post.feature_image) {
let imgUrl = post.feature_image
if (/^\/\//i.test(imgUrl)) {
imgUrl = 'https:' + imgUrl
} else if (!/^https?:\/\//i.test(imgUrl)) {
imgUrl = 'https://' + imgUrl
}
let response = await (await r2.get(imgUrl).response).buffer()
let ext = post.feature_image.split('.').pop()
await await fs.promises.writeFile(path.join(picFolder, `${postSlug}.${ext}`), response, 'base64')
featurePicTag = `<img src="../images/${postSlug}.${ext}"/>`
pics.push(`${picFolder}/${postSlug}.${ext}`)
}
let c = cheerio.load(`${featurePicTag}<h1>${post.title}</h1>${post.html}`)
// hunt for other pics
// TODO: make asynchronous
let picCounter = 0
c('img').each(async function() {
// skip if it's a local image
if (c(this).attr('src').indexOf('../images') == 0) {
return
}
// first, process the url
let imgUrl = c(this).attr('src')
console.log('Downloading:', imgUrl)
if (/^\/\//i.test(imgUrl)) {
imgUrl = 'https:' + imgUrl
} else if (!/^https?:\/\//i.test(imgUrl)) {
imgUrl = 'https://' + imgUrl
}
// now decide an output name
let ext = c(this).attr('src').split('.').pop()
let imageFile = path.join(picFolder, `${postSlug}-insert-${picCounter}.${ext}`)
// note down our calculations
c(this).attr('src', `../images/${postSlug}-insert-${picCounter}.${ext}`)
pics.push(imageFile)
picCounter = picCounter + 1
// finally, download the images
let response = await (await r2.get(imgUrl).response).buffer()
await fs.promises.writeFile(imageFile, response, 'base64')
console.log('Downloaded to:', imageFile)
})
processedPosts.push({
title: post.title,
body: c.html(),
})
}
// decide metadata
let metadata = {
id: 'seance-test', // FIXME
title: options.title,
author: options.author,
language: options.language,
contents: 'Table of Contents',
genre: options.genre,
cover: options.coverImage,
images: pics,
}
// create the ePub
let epub = nodepub.document(metadata)
// add the documents
for (let post of processedPosts) {
epub.addSection(post.title, post.body)
}
// generate it!
await epub.writeEPUB(options.outputFolder, options.title)
}
2019-12-19 07:20:24 -05:00
}
// Make Seance an EventEmitter
inherits(Seance, EventEmitter)
2019-10-06 09:57:08 -04:00
module.exports = {
Seance
2019-10-06 09:57:08 -04:00
}