const r2 = require('r2') const path = require('path') const fs = require('fs') const { once, EventEmitter } = require('events') const { inherits } = require('util') const getPost = require('mediumexporter').getPost const { createClient } = require('webdav') const readline = require('readline') const { markdown } = require('markdown') const GhostAdminAPI = require('@tryghost/admin-api') const { Canvas, Image } = require('canvas') const slugify = require('underscore.string/slugify') const Rembrandt = require('rembrandt') const nodepub = require('nodepub') const cheerio = require('cheerio') const config = require('./config') class Seance { constructor(...args) { this.MEDIUM_IMG_CDN = 'https://miro.medium.com/fit/c/' try { this.ghostAdmin = new GhostAdminAPI({ url: config.ghost.url, version: config.ghost.version, key: config.ghost.admin_key, }) } catch(err) { console.error('Your Ghost isn\'t configured. Please run `seance setup` to fix this!') } /** * function [fetchFromMedium] * @returns [string] status */ } async fetchFromMedium (mediumUrl, options = { json: null, }) { this.emit('update', { status: 'starting', message: `Fetching: ${mediumUrl}`, loglevel: 'info' }) var output = path.join(process.env.PWD, 'content') var json if (!options.json) { json = await this.fetchMediumJSON(mediumUrl) } else { json = options.json } // use mediumexporter's getPost function to fetch a Medium post const post = await getPost(mediumUrl, { returnObject: true, output: output, postJSON: json }).catch((err) => { return { error: err, } }) // set output folder path // this is based on what mediumexporter chooses as the output folder var outputFolder = path.join(output, post.slug) this.emit('update', { status: 'normal', message: `Saving to: ${outputFolder}`, loglevel: 'info' }) if (!fs.existsSync(path.join(outputFolder, post.slug))) { fs.mkdirSync(outputFolder, { recursive: true }) } // mediumexporter writes a plain .md file if the post has no media // if that is the case, we should create the subfolder manually // and copy the data there. if (fs.existsSync(path.join(output, post.slug + '.md'))) { fs.renameSync( path.join(output, post.slug + '.md'), path.join(outputFolder, 'index.md') ) } // generate metadata const metadata = JSON.stringify({ title: post.title, subtitle: post.subtitle, author: post.author || "", authors: post.authors || [], date: new Date(post.date), tags: post.tags, url: post.url, slug: post.slug, images: post.images, featuredImage: post.featuredImage, }) // write metadata to output folder await fs.promises.writeFile(path.join(outputFolder, 'metadata.json'), metadata) return post }; /** * function [pushToGhost] * @description * Pre-processes and uploads the given article to Ghost * * @param {Boolean} options.noUpload Skip uploading of images * @param {Boolean} options.noPush Skip pushing to Ghost; just generate the file * @param {Boolean} options.dryRun Combination of noUpload and noPush * @returns [object] object containing details of the uploaded Ghost post */ async pushToGhost (postSlug, options={}) { this.emit('update', { status: 'starting', message: 'Starting upload: ' + postSlug, loglevel: 'info' }) if (!!options.dryRun) { options.noUpload = true options.noPush = true } console.log('noUpload', options.noUpload) // Decide working path var postFolder = path.resolve('content/' + postSlug) // Verify file exists if (!fs.existsSync(postFolder)) { this.emit('error', { message: 'Could not find post folder! Is it fetched?', }) return false } // Decide file const postContent = path.join(postFolder, 'index.md') const postOutput = path.join(postFolder, 'ghost.md') // Verify post exists if (!fs.existsSync(postContent)) { this.emit('error', { message: "Could not find 'index.md' in " + postSlug + "! Is it fetched?", }) return false } // Decide WebDAV upload path var current_date = new Date() const uploadPath = path.join( current_date.getUTCFullYear().toString(), (current_date.getUTCMonth() + 1).toString(), postSlug ) // Path where WebDAV files will be placed (eg. https://example.com:2078) const davPath = path.join(config.webdav.path_prefix, uploadPath) // Public path to upload those files (eg. https://media.example.com/uploads) // We'll do it directly since path.join mangles the protocol const uploadedPath = config.webdav.uploaded_path_prefix + '/' + uploadPath // load metadata file this.emit('update', { status: 'starting', message: 'Loading metadata', loglevel: 'debug' }) var postMetaFile = path.join(postFolder, 'metadata.json') let postMeta = await JSON.parse(await fs.promises.readFile(postMetaFile)) // Process lines const readInterface = readline.createInterface({ input: fs.createReadStream(postContent), output: process.stdout, terminal: false }) const outStream = fs.createWriteStream(postOutput, { encoding: 'utf-8' }) // We'll calculate these later since Medium messes it up sometimes let title = null let subtitle = null let reImage = new RegExp('^!\\[(.*)\\]\\((\\S+?)\\)(.*)') let reTitle = new RegExp('^#\ (.*)') let reSubtitle = new RegExp('^#+\ (.*)$') // Note down uploaded images var uploadedImages = [] this.emit('update', { status: 'progress', progress: null, // we don't know the percentage message: 'Parsing post', loglevel: 'info' }) for await (const line of readInterface) { // Line to output // Default is to make it same as input var newLine = line // Skip the header (and preceding blank lines) if (!title) { // blanks if (!line) continue // starting with a # (must be the title) let match = await reTitle.exec(line) if (match) { title = match[1] continue // no need to add line; it'll come automatically } } else if (!subtitle) { // check if it's a repeat of the title (Medium does that) if (line.endsWith(title)) continue // otherwise set the subtitle if it doesn't exist // or if it's a repeat of the title (Medium does that too) if (!subtitle && postMeta.subtitle == postMeta.title) { let match = await reSubtitle.exec(line) if (match) { subtitle = match[1] postMeta.subtitle = match[1] } } } // check for images var m = await reImage.exec(line) if (m) { // Get image name var imageAlt = m[1] var imageName = m[2].replace('*', '') var imagePath = path.join(postFolder, 'images', imageName) if (!fs.existsSync(imagePath)) { console.warn('Skipping missing image: ' + imageName) } else { // check for separator image var isScissors = await this.checkScissors(imagePath) if (isScissors) { newLine = '\n---\n' } else { // upload pic to server console.debug(`Adding to upload queue: ${imageName}`) uploadedImages.push(imageName) // Let's wait for the upload, just to avoid conflicts if (!options.noUpload) { await this.uploadDav(davPath, imagePath) } newLine = '![' + imageAlt + '](' + uploadedPath + '/' + imageName + ')' } } } outStream.write(newLine + '\n') } // Upload feature_image, if required var featuredImagePath if (!!postMeta.featuredImage) { var imageName = postMeta.featuredImage.replace('*', '') // if the image is listed in postMeta.images, it would have // already been uploaded if (uploadedImages.indexOf(imageName) != -1) { this.emit('update', { status: 'progress', progress: 95, // we don't know the percentage message: `Skipping feature image ${imageName}: already listed for upload`, loglevel: 'info' }) } else { var imagePath = path.join(postFolder, 'images', imageName) // We can only upload if the file exists! if (!fs.existsSync(imagePath)) { this.emit('update', { status: 'progress', progress: 95, // we don't know the percentage message: `Skipping feature image "${imageName}": file not found`, loglevel: 'warning' }) } else { this.emit('update', { status: 'progress', progress: 95, // we don't know the percentage message: `Uploading feature image: ${imageName}`, loglevel: 'info' }) if (!options.noUpload) { this.uploadDav(davPath, imagePath) } featuredImagePath = uploadedPath + '/' + imageName } } } // calculate users let users = [] postMeta.authors.forEach((user) => { users.push({slug: user.username}) }) // This will happen once all the line reading is finished // Uploads will continue in paralell though this.emit('update', { status: 'progress', progress: 100, // we don't know the percentage message: 'Uploading to Ghost', loglevel: 'info' }) if (!options.noPush) { let res = await this.ghostAdmin.posts.add({ title: postMeta.title, custom_excerpt: postMeta.subtitle || null, tags: postMeta.tags, authors: users, html: markdown.toHTML(await fs.promises.readFile(postOutput, 'utf-8')), feature_image: featuredImagePath }, {source: 'html'}) // Check if user was added if (res.primary_author.id == 1) { this.emit('notification', { message: `WARNING: The admin editor, "${res.primary_author.name}", is set as author for this post. If this is incorrect, there was some problem matching usernames. Please check and set it manually.`, }) } this.emit('update', { status: 'progress', progress: 100, // we don't know the percentage message: 'Post conveyed successfully', loglevel: 'info' }) return { slug: res.slug, id: res.id, uuid: res.uuid, preview_url: res.url, primary_author: res.primary_author, title: res.title, subtitle: res.custom_excerpt, status: res.status, } } else { // just return without pushing to Ghost return { slug: postSlug, id: 0, uuid: 0, preview_url: null, primary_author: {}, title: postMeta.title, subtitle: postMeta.subtitle, status: 'none', } } } /** * function [mediumToGhost] * @returns [string] status */ mediumToGhost (mediumUrl) { console.info('Copying: ' + mediumUrl); } async fetchMediumJSON(mediumUrl) { var json var text if (mediumUrl.match(/^http/i)) { // remove the anchors at the end mediumUrl = mediumUrl.replace(/#.*$/, '') // intelligently add ?json attribute if (mediumUrl.indexOf('format=json') == -1) { if (mediumUrl.indexOf('?') == -1) { mediumUrl = `${mediumUrl}?format=json` } else { mediumUrl = `${mediumUrl}&format=json` } } // let's get it! const response = await fetch(mediumUrl) text = await response.text() } else if (fs.existsSync(mediumUrl)) { text = (await fs.promises.readFile(mediumUrl)).toString() } else { throw { error: 'URL must be a Medium URL or existing JSON file' } } try { json = await JSON.parse(text.substr(text.indexOf('{'))) } catch(err) { throw { error: 'You JSON seems to be malformed' } } return json; } /** * function [checkScissors] * @returns [boolean] matchStatus */ async checkScissors (imagePath) { // Decide "separator" image // If set, images matching this will be ignored and replaced // with a horizontal-rule ("---" in markdown) instead. let scissors = config.scissors // if scissors not set, return false // (it's never a scissors since it never matches) if (!scissors) { this.emit('update', { status: 'normal', message: '[scissors] No scissors set, so rejecting all images', loglevel: 'warning' }) return false } else { /* First, check that the image has finished loading * (we don't use this, because Rembrandt loads it again * on its own, which is messy but what to do ¯\_(ツ)_/¯ */ try { let ctx = new Canvas().getContext('2d') let img = new Image() img.src = imagePath ctx.drawImage(img, 0, 0, img.width, img.height) } catch (err) { this.emit('update', { status: 'normal', message: `[scissors] Skipping scissors check:${err.message}`, loglevel: 'warning' }) return false } // Check if given image matches the scissors try { let isScissors = new Rembrandt({ imageA: scissors, imageB: imagePath, thresholdType: Rembrandt.THRESHOLD_PERCENT, maxThreshold: 0.1 }) let result = await isScissors.compare() return result.passed } catch (err) { this.emit('update', { status: 'normal', message: `[scissors] Skipping scissors check:${err.message}`, loglevel: 'warning' }) return false } } } /** * function [createUser] * @returns [object] ghost data json */ async generateUserData (mediumUsername, email) { this.emit('update', { status: 'starting', message: `Creating: @${mediumUsername} (email: ${email})`, loglevel: 'debug' }) const mediumUrl = `https://medium.com/@${mediumUsername}/?format=json`; const json = await this.fetchMediumJSON(mediumUrl); if (!json.success) { this.emit('error', { message: `Error: ${json.error}`, }) return false } this.emit('update', { status: 'normal', message: `Name: ${json.payload.user.name}`, loglevel: 'debug' }) this.emit('update', { status: 'normal', message: `Bio: ${json.payload.user.bio}`, loglevel: 'debug' }) // Download and upload image let imageId = json.payload.user.imageId this.emit('update', { status: 'normal', message: `Profile pic: ${imageId}`, loglevel: 'debug' }) let imagePath = this.MEDIUM_IMG_CDN + '256/256/' + imageId let filetype = imageId.split('.')[imageId.split('.').length - 1] let fileName = `${mediumUsername}.${filetype}` let filePath = path.join(process.env.PWD, fileName) this.emit('update', { status: 'normal', message: `Fetching profile pic: ${imagePath}`, loglevel: 'info' }) const response = await (await r2.get(imagePath).response).buffer() await await fs.promises.writeFile(filePath, response, 'base64') this.emit('update', { status: 'normal', message: `Uploading profile pic: ${imagePath}`, loglevel: 'info' }) await this.uploadDav(path.join(config.webdav.path_prefix,'avatars'), filePath) // Generate Ghost JSON const ghostData = { data: { users: [ { id: 1, slug: json.payload.user.username, bio: json.payload.user.bio, email: email, name: json.payload.user.name, profile_image: config.webdav.uploaded_path_prefix + '/avatars/' + fileName } ] }, meta: { exported_on: new Date, version: '2.14.0' } } return(JSON.stringify(ghostData)) }; async createDirIfNotExist (client, folder) { // recursively create subfolders if they don't exist. //safety: don't touch directories outside WEBDAV_PATH_PREFIX if (!folder.startsWith(config.webdav.path_prefix)) { throw new Error(`Cannot create directories outside ${config.webdav.path_prefix}`) } // check the folder await client.stat(folder) .catch(async (err) => { if (!err.response) { // no response! Maybe a network error or something :P console.error(`[dav-upload:folder] Error creating folder "${folder}"`) console.error(`[dav-upload:folder] ${err.toJSON().message}`) console.error('[dav-upload:folder] Please check your Internet connection and try again') return false } else if (err.response.status == 404) { // it's a 404, so we'll create the directory this.emit('update', { status: 'normal', message: `Noting missing subdirectory: ${folder}`, loglevel: 'debug' }) // first, create the parent directory (if required) if (!await this.createDirIfNotExist(client, path.dirname(folder))) { // if not created, we fail too :-/ return false } this.emit('update', { status: 'normal', message: `Creating missing subdirectory: ${folder}`, loglevel: 'debug' }) // then, create the current directory await client.createDirectory(folder) .catch(async (err) => { if (err.response.status == 405) { // Method Not Allowed // Maybe the directory's already been created in the meantime? await client.stat(folder) .catch((err2) => { // Bad guess. Panic (and raise the original error) this.emit('update', { status: 'error', message: `Error: ${err.toJSON().message}\nWe're not sure what went wrong. Help!`, loglevel: 'error' }) throw err }) } else { // what's this? Panic! this.emit('update', { status: 'error', message: `Error: ${err.toJSON().message}\nWe're not sure what went wrong. Help!`, loglevel: 'error' }) throw err } }) } else { // it's not a 404; we don't know how to handle this. Panic! this.emit('update', { status: 'error', message: 'An unknown error occured. Help!', loglevel: 'error' }) console.error(err.toJSON()) throw err } }) return true } /** * function [uploadDav] * @returns [string] status */ async uploadDav (dirPath, filePath) { // connect to webdav const client = createClient( config.webdav.server_url, { username: config.webdav.username, password: config.webdav.password, digest: config.webdav.use_digest }) // create directory if not exists console.debug(`[dav-upload] Loading ${dirPath}`) if (!await this.createDirIfNotExist(client, dirPath)) { console.error(`[dav-upload] Could not upload ${path.basename(filePath)} :(`) return false } // upload a file console.debug('Uploading file') const outStream = client.createWriteStream( path.join(dirPath, path.basename(filePath)) ) outStream.on('finish', () => console.debug('Uploaded successfully.')) const inStream = fs.createReadStream(filePath) .pipe(outStream) return true } /** * function [fetchToEpub] * @description fetches posts from Ghost and packs them into an epub * @options.id unique ID for the generated epub * @options.title title of the generated epub * @options.author author of the generated epub * @options.language language of the book * @genre genre of the book * @cover cover image to use * @returns [string] status */ async fetchToEpub (postSlugs, options = {}) { if (!options.title) options.title = 'Seance Collection' if (!options.author) options.author = 'Seance' if (!options.language) options.language = 'en' if (!options.genre) options.genre = 'Unknown' if (!options.coverImage) options.coverImage = 'random-cover.jpg' if (!options.outputFolder) options.outputFolder = '.' console.log(`Fetching: ${postSlugs}`) let allPosts = [] // first, fetch all the posts for (let slug of postSlugs) { console.log(`Fetching: ${slug}`) let post = await this.ghostAdmin.posts.read({slug: slug}, {formats: ['html']}) allPosts.push(post) } // prepare for image downloads let pics = [] let picFolder = path.join(options.outputFolder, 'seance-images') if (!fs.existsSync(picFolder)) { fs.mkdirSync(picFolder, { recursive: true }) } // prepare array to collect processed posts let processedPosts = [] for (let post of allPosts) { // decide a post slug, for future files let postSlug = slugify(post.title) // get the cover pic let featurePicTag if (!!post.feature_image) { let imgUrl = post.feature_image if (/^\/\//i.test(imgUrl)) { imgUrl = 'https:' + imgUrl } else if (!/^https?:\/\//i.test(imgUrl)) { imgUrl = 'https://' + imgUrl } let response = await (await r2.get(imgUrl).response).buffer() let ext = post.feature_image.split('.').pop() await await fs.promises.writeFile(path.join(picFolder, `${postSlug}.${ext}`), response, 'base64') featurePicTag = `` pics.push(`${picFolder}/${postSlug}.${ext}`) } let c = cheerio.load(`${featurePicTag}

${post.title}

${post.html}`) // hunt for other pics // TODO: make asynchronous let picCounter = 0 c('img').each(async function() { // skip if it's a local image if (c(this).attr('src').indexOf('../images') == 0) { return } // first, process the url let imgUrl = c(this).attr('src') console.log('Downloading:', imgUrl) if (/^\/\//i.test(imgUrl)) { imgUrl = 'https:' + imgUrl } else if (!/^https?:\/\//i.test(imgUrl)) { imgUrl = 'https://' + imgUrl } // now decide an output name let ext = c(this).attr('src').split('.').pop() let imageFile = path.join(picFolder, `${postSlug}-insert-${picCounter}.${ext}`) // note down our calculations c(this).attr('src', `../images/${postSlug}-insert-${picCounter}.${ext}`) pics.push(imageFile) picCounter = picCounter + 1 // finally, download the images let response = await (await r2.get(imgUrl).response).buffer() await fs.promises.writeFile(imageFile, response, 'base64') console.log('Downloaded to:', imageFile) }) processedPosts.push({ title: post.title, body: c.html(), }) } // decide metadata let metadata = { id: 'seance-test', // FIXME title: options.title, author: options.author, language: options.language, contents: 'Table of Contents', genre: options.genre, cover: options.coverImage, images: pics, } // create the ePub let epub = nodepub.document(metadata) // add the documents for (let post of processedPosts) { epub.addSection(post.title, post.body) } // generate it! await epub.writeEPUB(options.outputFolder, options.title) } } // Make Seance an EventEmitter inherits(Seance, EventEmitter) module.exports = { Seance }