seance/seance.js
Hippo 1edee70807 Ignore duplicate titles and subtitles
The long-standing "make title proper" bug has finally been fixed.
Yayy! So now, basically, the title and subtitle gets set properly
even if Medium messed it up by showing some of them twice.
2021-03-29 21:52:12 +05:30

679 lines
19 KiB
JavaScript

const r2 = require('r2')
const path = require('path')
const fs = require('fs')
const { once, EventEmitter } = require('events')
const { inherits } = require('util')
const getPost = require('mediumexporter').getPost
const { createClient } = require('webdav')
const readline = require('readline')
const { markdown } = require('markdown')
const GhostAdminAPI = require('@tryghost/admin-api')
const { Canvas, Image } = require('canvas')
const Rembrandt = require('rembrandt')
const config = require('./config')
class Seance {
constructor(...args) {
this.MEDIUM_IMG_CDN = 'https://miro.medium.com/fit/c/'
try {
this.ghostAdmin = new GhostAdminAPI({
url: config.ghost.url,
version: config.ghost.version,
key: config.ghost.admin_key,
})
} catch(err) {
console.error('Your Ghost isn\'t configured. Please run `seance setup` to fix this!')
}
/**
* function [fetchFromMedium]
* @returns [string] status
*/
}
async fetchFromMedium (mediumUrl, options = {
json: null,
}) {
this.emit('update', {
status: 'starting',
message: `Fetching: ${mediumUrl}`,
loglevel: 'info'
})
var output = path.join(process.env.PWD, 'content')
var json
if (!options.json) {
json = await this.fetchMediumJSON(mediumUrl)
} else {
json = options.json
}
// use mediumexporter's getPost function to fetch a Medium post
const post = await getPost(mediumUrl, {
returnObject: true,
output: output,
postJSON: json
}).catch((err) => {
return {
error: err,
}
})
// set output folder path
// this is based on what mediumexporter chooses as the output folder
var outputFolder = path.join(output, post.slug)
this.emit('update', {
status: 'normal',
message: `Saving to: ${outputFolder}`,
loglevel: 'info'
})
if (!fs.existsSync(path.join(outputFolder, post.slug))) {
fs.mkdirSync(outputFolder, { recursive: true })
}
// mediumexporter writes a plain .md file if the post has no media
// if that is the case, we should create the subfolder manually
// and copy the data there.
if (fs.existsSync(path.join(output, post.slug + '.md'))) {
fs.renameSync(
path.join(output, post.slug + '.md'),
path.join(outputFolder, 'index.md')
)
}
// generate metadata
const metadata = JSON.stringify({
title: post.title,
subtitle: post.subtitle,
author: post.author || "",
authors: post.authors || [],
date: new Date(post.date),
tags: post.tags,
url: post.url,
slug: post.slug,
images: post.images,
featuredImage: post.featuredImage,
})
// write metadata to output folder
await fs.promises.writeFile(path.join(outputFolder, 'metadata.json'), metadata)
return post
};
/**
* function [pushToGhost]
* @returns [string] status
*/
async pushToGhost (postSlug) {
this.emit('update', {
status: 'starting',
message: 'Starting upload: ' + postSlug,
loglevel: 'info'
})
// Decide working path
var postFolder = path.resolve('content/' + postSlug)
// Verify file exists
if (!fs.existsSync(postFolder)) {
this.emit('error', {
message: 'Could not find post folder! Is it fetched?',
})
return false
}
// Decide file
const postContent = path.join(postFolder, 'index.md')
const postOutput = path.join(postFolder, 'ghost.md')
// Verify post exists
if (!fs.existsSync(postContent)) {
this.emit('error', {
message: "Could not find 'index.md' in " + postSlug + "! Is it fetched?",
})
return false
}
// Decide WebDAV upload path
var current_date = new Date()
const uploadPath = path.join(
current_date.getUTCFullYear().toString(),
(current_date.getUTCMonth() + 1).toString(),
postSlug
)
// Path where WebDAV files will be placed (eg. https://example.com:2078)
const davPath = path.join(config.webdav.path_prefix, uploadPath)
// Public path to upload those files (eg. https://media.example.com/uploads)
// We'll do it directly since path.join mangles the protocol
const uploadedPath = config.webdav.uploaded_path_prefix + '/' + uploadPath
// load metadata file
this.emit('update', {
status: 'starting',
message: 'Loading metadata',
loglevel: 'debug'
})
var postMetaFile = path.join(postFolder, 'metadata.json')
let postMeta = await JSON.parse(await fs.promises.readFile(postMetaFile))
// Process lines
const readInterface = readline.createInterface({
input: fs.createReadStream(postContent),
output: process.stdout,
terminal: false
})
const outStream = fs.createWriteStream(postOutput, { encoding: 'utf-8' })
// We'll calculate these later since Medium messes it up sometimes
let title = null
let subtitle = null
let reImage = new RegExp('^!\\[(.*)\\]\\((\\S+?)\\)(.*)')
let reTitle = new RegExp('^#\ (.*)')
let reSubtitle = new RegExp('^#+\ (.*)$')
// Note down uploaded images
var uploadedImages = []
this.emit('update', {
status: 'progress',
progress: null, // we don't know the percentage
message: 'Parsing post',
loglevel: 'info'
})
for await (const line of readInterface) {
// Line to output
// Default is to make it same as input
var newLine = line
// Skip the header (and preceding blank lines)
if (!title) {
// blanks
if (!line) continue
// starting with a # (must be the title)
let match = await reTitle.exec(line)
if (match) {
title = match[1]
continue // no need to add line; it'll come automatically
}
} else if (!subtitle) {
// check if it's a repeat of the title (Medium does that)
if (line.endsWith(title)) continue
// otherwise set the subtitle if it doesn't exist
// or if it's a repeat of the title (Medium does that too)
if (!subtitle && postMeta.subtitle == postMeta.title) {
let match = await reSubtitle.exec(line)
if (match) {
subtitle = match[1]
postMeta.subtitle = match[1]
}
}
}
// check for images
var m = await reImage.exec(line)
if (m) {
// Get image name
var imageAlt = m[1]
var imageName = m[2].replace('*', '')
var imagePath = path.join(postFolder, 'images', imageName)
if (!fs.existsSync(imagePath)) {
console.warn('Skipping missing image: ' + imageName)
} else {
// check for separator image
var isScissors = await this.checkScissors(imagePath)
if (isScissors) {
newLine = '\n---\n'
} else {
// upload pic to server
console.debug(`Adding to upload queue: ${imageName}`)
uploadedImages.push(imageName)
// Let's wait for the upload, just to avoid conflicts
await this.uploadDav(davPath, imagePath)
newLine = '![' + imageAlt + '](' + uploadedPath + '/' + imageName + ')'
}
}
}
outStream.write(newLine + '\n')
}
// Upload feature_image, if required
var featuredImagePath
if (!!postMeta.featuredImage) {
var imageName = postMeta.featuredImage.replace('*', '')
// if the image is listed in postMeta.images, it would have
// already been uploaded
if (uploadedImages.indexOf(imageName) != -1) {
this.emit('update', {
status: 'progress',
progress: 95, // we don't know the percentage
message: `Skipping feature image ${imageName}: already listed for upload`,
loglevel: 'info'
})
} else {
var imagePath = path.join(postFolder, 'images', imageName)
// We can only upload if the file exists!
if (!fs.existsSync(imagePath)) {
this.emit('update', {
status: 'progress',
progress: 95, // we don't know the percentage
message: `Skipping feature image "${imageName}": file not found`,
loglevel: 'warning'
})
} else {
this.emit('update', {
status: 'progress',
progress: 95, // we don't know the percentage
message: `Uploading feature image: ${imageName}`,
loglevel: 'info'
})
this.uploadDav(davPath, imagePath)
featuredImagePath = uploadedPath + '/' + imageName
}
}
}
// calculate users
let users = []
postMeta.authors.forEach((user) => {
users.push({slug: user.username})
})
// This will happen once all the line reading is finished
// Uploads will continue in paralell though
this.emit('update', {
status: 'progress',
progress: 100, // we don't know the percentage
message: 'Uploading to Ghost',
loglevel: 'info'
})
this.ghostAdmin.posts.add({
title: postMeta.title,
custom_excerpt: postMeta.subtitle || null,
tags: postMeta.tags,
authors: users,
html: markdown.toHTML(await fs.promises.readFile(postOutput, 'utf-8')),
feature_image: featuredImagePath
}, {source: 'html'})
.then((res) => {
// Check if user was added
if (res.primary_author.id == 1) {
this.emit('notification', {
message: `WARNING: The admin editor, "${res.primary_author.name}", is set as author for this post. If this is incorrect, there was some problem matching usernames. Please check and set it manually.`,
})
}
this.emit('update', {
status: 'progress',
progress: 100, // we don't know the percentage
message: 'Post conveyed successfully',
loglevel: 'info'
})
})
};
/**
* function [mediumToGhost]
* @returns [string] status
*/
mediumToGhost (mediumUrl) {
console.info('Copying: ' + mediumUrl);
}
async fetchMediumJSON(mediumUrl) {
var json
var text
if (mediumUrl.match(/^http/i)) {
// remove the anchors at the end
mediumUrl = mediumUrl.replace(/#.*$/, '')
// intelligently add ?json attribute
if (mediumUrl.indexOf('format=json') == -1) {
if (mediumUrl.indexOf('?') == -1) {
mediumUrl = `${mediumUrl}?format=json`
} else {
mediumUrl = `${mediumUrl}&format=json`
}
}
// let's get it!
const response = await fetch(mediumUrl)
text = await response.text()
} else if (fs.existsSync(mediumUrl)) {
text = (await fs.promises.readFile(mediumUrl)).toString()
} else {
throw { error: 'URL must be a Medium URL or existing JSON file' }
}
try {
json = await JSON.parse(text.substr(text.indexOf('{')))
} catch(err) {
throw { error: 'You JSON seems to be malformed' }
}
return json;
}
/**
* function [checkScissors]
* @returns [boolean] matchStatus
*/
async checkScissors (imagePath) {
// Decide "separator" image
// If set, images matching this will be ignored and replaced
// with a horizontal-rule ("---" in markdown) instead.
let scissors = config.scissors
// if scissors not set, return false
// (it's never a scissors since it never matches)
if (!scissors) {
this.emit('update', {
status: 'normal',
message: '[scissors] No scissors set, so rejecting all images',
loglevel: 'warning'
})
return false
} else {
/* First, check that the image has finished loading
* (we don't use this, because Rembrandt loads it again
* on its own, which is messy but what to do ¯\_(ツ)_/¯
*/
try {
let ctx = new Canvas().getContext('2d')
let img = new Image()
img.src = imagePath
ctx.drawImage(img, 0, 0, img.width, img.height)
} catch (err) {
this.emit('update', {
status: 'normal',
message: `[scissors] Skipping scissors check:${err.message}`,
loglevel: 'warning'
})
return false
}
// Check if given image matches the scissors
try {
let isScissors = new Rembrandt({
imageA: scissors,
imageB: imagePath,
thresholdType: Rembrandt.THRESHOLD_PERCENT,
maxThreshold: 0.1
})
let result = await isScissors.compare()
return result.passed
} catch (err) {
this.emit('update', {
status: 'normal',
message: `[scissors] Skipping scissors check:${err.message}`,
loglevel: 'warning'
})
return false
}
}
}
/**
* function [createUser]
* @returns [object] ghost data json
*/
async generateUserData (mediumUsername, email) {
this.emit('update', {
status: 'starting',
message: `Creating: @${mediumUsername} (email: ${email})`,
loglevel: 'debug'
})
const mediumUrl = `https://medium.com/@${mediumUsername}/?format=json`;
const json = await this.fetchMediumJSON(mediumUrl);
if (!json.success) {
this.emit('error', {
message: `Error: ${json.error}`,
})
return false
}
this.emit('update', {
status: 'normal',
message: `Name: ${json.payload.user.name}`,
loglevel: 'debug'
})
this.emit('update', {
status: 'normal',
message: `Bio: ${json.payload.user.bio}`,
loglevel: 'debug'
})
// Download and upload image
let imageId = json.payload.user.imageId
this.emit('update', {
status: 'normal',
message: `Profile pic: ${imageId}`,
loglevel: 'debug'
})
let imagePath = this.MEDIUM_IMG_CDN + '256/256/' + imageId
let filetype = imageId.split('.')[imageId.split('.').length - 1]
let fileName = `${mediumUsername}.${filetype}`
let filePath = path.join(process.env.PWD, fileName)
this.emit('update', {
status: 'normal',
message: `Fetching profile pic: ${imagePath}`,
loglevel: 'info'
})
const response = await (await r2.get(imagePath).response).buffer()
await await fs.promises.writeFile(filePath, response, 'base64')
this.emit('update', {
status: 'normal',
message: `Uploading profile pic: ${imagePath}`,
loglevel: 'info'
})
await this.uploadDav(path.join(config.webdav.path_prefix,'avatars'),
filePath)
// Generate Ghost JSON
const ghostData = {
data: {
users: [
{
id: 1,
slug: json.payload.user.username,
bio: json.payload.user.bio,
email: email,
name: json.payload.user.name,
profile_image: config.webdav.uploaded_path_prefix + '/avatars/' + fileName
}
]
},
meta: {
exported_on: new Date,
version: '2.14.0'
}
}
return(JSON.stringify(ghostData))
};
async createDirIfNotExist (client, folder) {
// recursively create subfolders if they don't exist.
//safety: don't touch directories outside WEBDAV_PATH_PREFIX
if (!folder.startsWith(config.webdav.path_prefix)) {
throw new Error(`Cannot create directories outside ${config.webdav.path_prefix}`)
}
// check the folder
await client.stat(folder)
.catch(async (err) => {
if (!err.response) {
// no response! Maybe a network error or something :P
console.error(`[dav-upload:folder] Error creating folder "${folder}"`)
console.error(`[dav-upload:folder] ${err.toJSON().message}`)
console.error('[dav-upload:folder] Please check your Internet connection and try again')
return false
} else if (err.response.status == 404) {
// it's a 404, so we'll create the directory
this.emit('update', {
status: 'normal',
message: `Noting missing subdirectory: ${folder}`,
loglevel: 'debug'
})
// first, create the parent directory (if required)
if (!await this.createDirIfNotExist(client, path.dirname(folder))) {
// if not created, we fail too :-/
return false
}
this.emit('update', {
status: 'normal',
message: `Creating missing subdirectory: ${folder}`,
loglevel: 'debug'
})
// then, create the current directory
await client.createDirectory(folder)
.catch(async (err) => {
if (err.response.status == 405) { // Method Not Allowed
// Maybe the directory's already been created in the meantime?
await client.stat(folder)
.catch((err2) => {
// Bad guess. Panic (and raise the original error)
this.emit('update', {
status: 'error',
message: `Error: ${err.toJSON().message}\nWe're not sure what went wrong. Help!`,
loglevel: 'error'
})
throw err
})
} else {
// what's this? Panic!
this.emit('update', {
status: 'error',
message: `Error: ${err.toJSON().message}\nWe're not sure what went wrong. Help!`,
loglevel: 'error'
})
throw err
}
})
} else {
// it's not a 404; we don't know how to handle this. Panic!
this.emit('update', {
status: 'error',
message: 'An unknown error occured. Help!',
loglevel: 'error'
})
console.error(err.toJSON())
throw err
}
})
return true
}
/**
* function [uploadDav]
* @returns [string] status
*/
async uploadDav (dirPath, filePath) {
// connect to webdav
const client = createClient(
config.webdav.server_url,
{
username: config.webdav.username,
password: config.webdav.password,
digest: config.webdav.use_digest
})
// create directory if not exists
console.debug(`[dav-upload] Loading ${dirPath}`)
if (!await this.createDirIfNotExist(client, dirPath)) {
console.error(`[dav-upload] Could not upload ${path.basename(filePath)} :(`)
return false
}
// upload a file
console.debug('Uploading file')
const outStream = client.createWriteStream(
path.join(dirPath, path.basename(filePath))
)
outStream.on('finish', () => console.debug('Uploaded successfully.'))
const inStream = fs.createReadStream(filePath)
.pipe(outStream)
return true
}
}
// Make Seance an EventEmitter
inherits(Seance, EventEmitter)
module.exports = {
Seance
}