Skip to content

Commit

Permalink
下载知乎/掘金/简书文章时增加处理公式
Browse files Browse the repository at this point in the history
  • Loading branch information
kscript committed Feb 22, 2023
1 parent 455ba5a commit cc57ef3
Show file tree
Hide file tree
Showing 7 changed files with 220 additions and 137 deletions.
8 changes: 7 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "markdown-downloader",
"version": "1.0.2",
"version": "1.0.3",
"description": "markdown文章下载",
"main": "dist/index.js",
"scripts": {
Expand Down Expand Up @@ -42,6 +42,7 @@
"cross-env": "^7.0.3",
"html-to-md": "^0.5.3",
"jszip": "^3.7.1",
"mathjax": "^3.2.2",
"md5": "^2.3.0",
"path-browserify": "^1.0.1",
"webpack": "^5.63.0",
Expand Down
168 changes: 39 additions & 129 deletions src/index.js
Original file line number Diff line number Diff line change
@@ -1,103 +1,22 @@
import md5 from 'md5'
import path from 'path-browserify'
import html2markdown from 'html-to-md'
import { websites, hooks } from './websites'
import merge from 'webpack-merge'
import 'mathjax/es5/tex-svg'
import {
isExtension,
getExt,
query,
getText,
getAttribute,
queryAll,
noop,
sendMessage,
formatDate,
insertAfter,
getUrl
} from './utils'

const isBroswer = typeof window !== 'undefined' && window instanceof Object
const isExtension = isBroswer && window.chrome instanceof Object && window.chrome.runtime

const getExt = (fileName) => {
return path.parse(fileName).ext.slice(1)
}

const query = (selector, context = document) => {
if (selector instanceof NodeList || selector instanceof Node) {
return selector
}
return context.querySelector(selector)
}

const getText = (selector, context = document) => {
const el = query(selector, context) || {}
return el.innerText || ''
}

const getAttribute = (val, selector, context = document) => {
const el = query(selector, context)
return el ? el.getAttribute(val) || '' : ''
}

const queryAll = (selector, context = document) => {
return [].slice.apply(context.querySelectorAll(selector))
}

const noop = (func, defaultFunc) => {
return typeof func === 'function' ? func : typeof defaultFunc === 'function' ? defaultFunc : () => {}
}

const encodeUrlData = (data) => {
let body = ''
for (let key in data) {
body += key + '=' + encodeURIComponent(data[key]) + '&'
}
return body.slice(0, -1)
}

const encodeOptionsData = (options) => {
if (options.stringify !== false && typeof options.data === 'object') {
options.data = encodeUrlData(options.data)
}
return options
}

const sendMessage = (options, onsuccess, onerror, retry) => {
if (isExtension) {
retry = isNaN(retry) ? 3 : +retry
encodeOptionsData(options)
chrome.runtime.sendMessage(options, ([error, response, headers, xhr]) => {
if (!error) {
try {
const result = noop(onsuccess)(response, headers, xhr)
if (result === void 0) {
return response
}
// onsuccess返回值不为undefined, 视为调用失败
error = result
} catch (err) {
// 执行onsuccess代码出错
error = err
}
}
if (retry-- > 0) {
sendMessage(options, onsuccess, onerror, retry)
} else {
noop(onerror)(error, headers, xhr)
}
})
}
}

const formatDate = (str, t) => {
t = typeof t === 'string' || !isNaN(t) ? new Date(t) : t
if (t instanceof Date === false) {
t = new Date()
}
const obj = {
yyyyyyyy: t.getFullYear(),
yy: t.getFullYear(),
MM: t.getMonth()+1,
dd: t.getDate(),
HH: t.getHours(),
hh: t.getHours() % 12,
mm: t.getMinutes(),
ss: t.getSeconds(),
ww: '日一二三四五六'.split('')[t.getDay()]
};
return str.replace(/([a-z]+)/ig, function ($1){
return (obj[$1+$1] === 0 ? '0' : obj[$1+$1]) || ('0' + obj[$1]).slice(-2);
});
}

const setInfo = (data) => {
data = Object.assign({
Expand Down Expand Up @@ -129,25 +48,6 @@ const getMarkdown = (markdownBody) => {
// }[s1] || s))
}

const insertAfter = (newElement, targetElement) => {
const parent = targetElement.parentNode
if(parent.lastChild === targetElement){
parent.appendChild(newElement)
}else{
parent.insertBefore(newElement, targetElement.nextSibling)
}
}

const getUrl = (prefix, link) => {
if (!link) return ''
if (/^(http|https)/.test(link)) {
return link
}
if (/^\/\//.test(link)) {
return prefix.split('//')[0] + link
}
return prefix + link
}
const convert = async (options, customOptions) => {
const context = {}
const defaultOptions = {
Expand Down Expand Up @@ -189,6 +89,9 @@ const convert = async (options, customOptions) => {
return result
}
const markdownBody = query(selectors.body, options.context).cloneNode(true)
const fileName = (getText(selectors.title) || document.title)
const realName = fileName.replace(/[\\\/\?<>:'\*\|]/g, '_')
noop(hook.extract)(context, { markdownBody, fileName, realName })
queryAll(selectors.copyBtn, markdownBody).map(item => item.parentElement.removeChild(item))
queryAll('[data-id]', markdownBody).map(item => item.removeAttribute('data-id'))
if (selectors.invalid) {
Expand All @@ -214,9 +117,17 @@ const convert = async (options, customOptions) => {
})
}
const urls = []
const fileName = (getText(selectors.title) || document.title)
const realName = fileName.replace(/[\\\/\?<>:'\*\|]/g, '_')
const files = queryAll('img', markdownBody).map(item => {
const downloadName = item.getAttribute('downloadName')
const downloadUrl = item.getAttribute('downloadUrl')
if (downloadName && downloadUrl) {
item.src = './' + downloadName
options.urls !== false && urls.push(downloadUrl)
return {
name: downloadName,
downloadUrl
}
}
const src = item.getAttribute(options.lazyKey) || item.src
const url = src.replace(/\?$/, '')
const ext = getExt(url)
Expand All @@ -235,11 +146,12 @@ const convert = async (options, customOptions) => {
home: getUrl(location.origin, getAttribute('href', selectors.userLink)),
description: markdownBody.innerText.replace(/^([\n\s]+)/g, '').replace(/\n/g, ' ').slice(0, 50) + '...',
})
noop(hook.extract)(context)
const markdwonDoc = html2markdown(info + getMarkdown(markdownBody), {})
const copyright = '> 当前文档由 [markdown文档下载插件](https://github.com/kscript/markdown-download) 下载, 原文链接: [' + fileName + '](' + location.href + ') '
const content = await noop(hook.formatContent)(context, { markdownBody, markdwonDoc })
files.push({
name: realName + '.md',
content: markdwonDoc + '\n\n' + '> 当前文档由 [markdown文档下载插件](https://github.com/kscript/markdown-download) 下载, 原文链接: [' + fileName + '](' + location.href + ') '
content: (content && typeof content === 'string' ? content: markdwonDoc )+ '\n\n' + copyright
})
files.push({
name: realName + '/urls',
Expand All @@ -259,19 +171,17 @@ const extract = async (options, customOptions) => {
return datas
}

if (isBroswer) {
if (isExtension) {
chrome.runtime.onMessage.addListener(async (message, sender, sendResponse) => {
if (message instanceof Object) {
if (message.type === 'download') {
if (typeof websites[message.website] === 'function') {
await websites[message.website](extract)
}
if (isExtension) {
chrome.runtime.onMessage.addListener(async (message, sender, sendResponse) => {
if (message instanceof Object) {
if (message.type === 'download') {
if (typeof websites[message.website] === 'function') {
await websites[message.website](extract)
}
}
sendResponse('')
})
}
}
sendResponse('')
})
}

export default convert
131 changes: 131 additions & 0 deletions src/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import path from 'path-browserify'

export const isBroswer = typeof window !== 'undefined' && window instanceof Object
export const isExtension = isBroswer && window.chrome instanceof Object && window.chrome.runtime
export const getExt = (fileName) => {
return path.parse(fileName).ext.slice(1)
}
export const query = (selector, context = document) => {
if (selector instanceof NodeList || selector instanceof Node) {
return selector
}
return context.querySelector(selector)
}
export const getText = (selector, context = document) => {
const el = query(selector, context) || {}
return el.innerText || ''
}
export const getAttribute = (val, selector, context = document) => {
const el = query(selector, context)
return el ? el.getAttribute(val) || '' : ''
}
export const queryAll = (selector, context = document) => {
return [].slice.apply(context.querySelectorAll(selector))
}
export const noop = (func, defaultFunc) => {
return typeof func === 'function' ? func : typeof defaultFunc === 'function' ? defaultFunc : () => {}
}
export const encodeUrlData = (data) => {
let body = ''
for (let key in data) {
body += key + '=' + encodeURIComponent(data[key]) + '&'
}
return body.slice(0, -1)
}
export const encodeOptionsData = (options) => {
if (options.stringify !== false && typeof options.data === 'object') {
options.data = encodeUrlData(options.data)
}
return options
}
export const sendMessage = (options, onsuccess, onerror, retry) => {
if (isExtension) {
retry = isNaN(retry) ? 3 : +retry
encodeOptionsData(options)
chrome.runtime.sendMessage(options, ([error, response, headers, xhr]) => {
if (!error) {
try {
const result = noop(onsuccess)(response, headers, xhr)
if (result === void 0) {
return response
}
// onsuccess返回值不为undefined, 视为调用失败
error = result
} catch (err) {
// 执行onsuccess代码出错
error = err
}
}
if (retry-- > 0) {
sendMessage(options, onsuccess, onerror, retry)
} else {
noop(onerror)(error, headers, xhr)
}
})
}
}
export const formatDate = (str, t) => {
t = typeof t === 'string' || !isNaN(t) ? new Date(t) : t
if (t instanceof Date === false) {
t = new Date()
}
const obj = {
yyyyyyyy: t.getFullYear(),
yy: t.getFullYear(),
MM: t.getMonth()+1,
dd: t.getDate(),
HH: t.getHours(),
hh: t.getHours() % 12,
mm: t.getMinutes(),
ss: t.getSeconds(),
ww: '日一二三四五六'.split('')[t.getDay()]
};
return str.replace(/([a-z]+)/ig, function ($1){
return (obj[$1+$1] === 0 ? '0' : obj[$1+$1]) || ('0' + obj[$1]).slice(-2);
});
}
export const insertAfter = (newElement, targetElement) => {
const parent = targetElement.parentNode
if(parent.lastChild === targetElement){
parent.appendChild(newElement)
}else{
parent.insertBefore(newElement, targetElement.nextSibling)
}
}
export const getUrl = (prefix, link) => {
if (!link) return ''
if (/^(http|https)/.test(link)) {
return link
}
if (/^\/\//.test(link)) {
return prefix.split('//')[0] + link
}
return prefix + link
}
export const tex2svg = (markdwonDoc) => {
return markdwonDoc.replace(/<ztext>(.*?)<\/ztext>/g, (s, s1) => {
const tex = decodeURIComponent(s1)
const svg = MathJax.tex2svg(tex)
svg.setAttribute('data-tex', tex)
svg.style.display = 'inline'
return svg.outerHTML
})
}

export default {
isBroswer,
isExtension,
getExt,
query,
getText,
getAttribute,
queryAll,
noop,
encodeUrlData,
encodeOptionsData,
sendMessage,
formatDate,
insertAfter,
getUrl,
tex2svg
}
Loading

0 comments on commit cc57ef3

Please sign in to comment.