diff --git a/package-lock.json b/package-lock.json index cfcf1f5..91a6ba0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "markdown-downloader", - "version": "1.0.0", + "version": "1.0.3", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -2299,6 +2299,12 @@ "semver": "^6.0.0" } }, + "mathjax": { + "version": "3.2.2", + "resolved": "https://registry.npmmirror.com/mathjax/-/mathjax-3.2.2.tgz", + "integrity": "sha512-Bt+SSVU8eBG27zChVewOicYs7Xsdt40qm4+UpHyX7k0/O9NliPc+x77k1/FEsPsjKPZGJvtRZM1vO+geW0OhGw==", + "dev": true + }, "md5": { "version": "2.3.0", "resolved": "https://registry.npmmirror.com/md5/-/md5-2.3.0.tgz", diff --git a/package.json b/package.json index c35602b..8044b21 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "markdown-downloader", - "version": "1.0.2", + "version": "1.0.3", "description": "markdown文章下载", "main": "dist/index.js", "scripts": { @@ -42,6 +42,7 @@ "cross-env": "^7.0.3", "html-to-md": "^0.5.3", "jszip": "^3.7.1", + "mathjax": "^3.2.2", "md5": "^2.3.0", "path-browserify": "^1.0.1", "webpack": "^5.63.0", diff --git a/src/index.js b/src/index.js index 642d2f6..4b12513 100644 --- a/src/index.js +++ b/src/index.js @@ -1,103 +1,22 @@ import md5 from 'md5' -import path from 'path-browserify' import html2markdown from 'html-to-md' import { websites, hooks } from './websites' import merge from 'webpack-merge' +import 'mathjax/es5/tex-svg' +import { + isExtension, + getExt, + query, + getText, + getAttribute, + queryAll, + noop, + sendMessage, + formatDate, + insertAfter, + getUrl +} from './utils' -const isBroswer = typeof window !== 'undefined' && window instanceof Object -const isExtension = isBroswer && window.chrome instanceof Object && window.chrome.runtime - -const getExt = (fileName) => { - return path.parse(fileName).ext.slice(1) -} - -const query = (selector, context = document) => { - if (selector instanceof NodeList || selector instanceof Node) { - return selector - } - return context.querySelector(selector) -} - -const getText = (selector, context = document) => { - const el = query(selector, context) || {} - return el.innerText || '' -} - -const getAttribute = (val, selector, context = document) => { - const el = query(selector, context) - return el ? el.getAttribute(val) || '' : '' -} - -const queryAll = (selector, context = document) => { - return [].slice.apply(context.querySelectorAll(selector)) -} - -const noop = (func, defaultFunc) => { - return typeof func === 'function' ? func : typeof defaultFunc === 'function' ? defaultFunc : () => {} -} - -const encodeUrlData = (data) => { - let body = '' - for (let key in data) { - body += key + '=' + encodeURIComponent(data[key]) + '&' - } - return body.slice(0, -1) -} - -const encodeOptionsData = (options) => { - if (options.stringify !== false && typeof options.data === 'object') { - options.data = encodeUrlData(options.data) - } - return options -} - -const sendMessage = (options, onsuccess, onerror, retry) => { - if (isExtension) { - retry = isNaN(retry) ? 3 : +retry - encodeOptionsData(options) - chrome.runtime.sendMessage(options, ([error, response, headers, xhr]) => { - if (!error) { - try { - const result = noop(onsuccess)(response, headers, xhr) - if (result === void 0) { - return response - } - // onsuccess返回值不为undefined, 视为调用失败 - error = result - } catch (err) { - // 执行onsuccess代码出错 - error = err - } - } - if (retry-- > 0) { - sendMessage(options, onsuccess, onerror, retry) - } else { - noop(onerror)(error, headers, xhr) - } - }) - } -} - -const formatDate = (str, t) => { - t = typeof t === 'string' || !isNaN(t) ? new Date(t) : t - if (t instanceof Date === false) { - t = new Date() - } - const obj = { - yyyyyyyy: t.getFullYear(), - yy: t.getFullYear(), - MM: t.getMonth()+1, - dd: t.getDate(), - HH: t.getHours(), - hh: t.getHours() % 12, - mm: t.getMinutes(), - ss: t.getSeconds(), - ww: '日一二三四五六'.split('')[t.getDay()] - }; - return str.replace(/([a-z]+)/ig, function ($1){ - return (obj[$1+$1] === 0 ? '0' : obj[$1+$1]) || ('0' + obj[$1]).slice(-2); - }); -} const setInfo = (data) => { data = Object.assign({ @@ -129,25 +48,6 @@ const getMarkdown = (markdownBody) => { // }[s1] || s)) } -const insertAfter = (newElement, targetElement) => { - const parent = targetElement.parentNode - if(parent.lastChild === targetElement){ - parent.appendChild(newElement) - }else{ - parent.insertBefore(newElement, targetElement.nextSibling) - } -} - -const getUrl = (prefix, link) => { - if (!link) return '' - if (/^(http|https)/.test(link)) { - return link - } - if (/^\/\//.test(link)) { - return prefix.split('//')[0] + link - } - return prefix + link -} const convert = async (options, customOptions) => { const context = {} const defaultOptions = { @@ -189,6 +89,9 @@ const convert = async (options, customOptions) => { return result } const markdownBody = query(selectors.body, options.context).cloneNode(true) + const fileName = (getText(selectors.title) || document.title) + const realName = fileName.replace(/[\\\/\?<>:'\*\|]/g, '_') + noop(hook.extract)(context, { markdownBody, fileName, realName }) queryAll(selectors.copyBtn, markdownBody).map(item => item.parentElement.removeChild(item)) queryAll('[data-id]', markdownBody).map(item => item.removeAttribute('data-id')) if (selectors.invalid) { @@ -214,9 +117,17 @@ const convert = async (options, customOptions) => { }) } const urls = [] - const fileName = (getText(selectors.title) || document.title) - const realName = fileName.replace(/[\\\/\?<>:'\*\|]/g, '_') const files = queryAll('img', markdownBody).map(item => { + const downloadName = item.getAttribute('downloadName') + const downloadUrl = item.getAttribute('downloadUrl') + if (downloadName && downloadUrl) { + item.src = './' + downloadName + options.urls !== false && urls.push(downloadUrl) + return { + name: downloadName, + downloadUrl + } + } const src = item.getAttribute(options.lazyKey) || item.src const url = src.replace(/\?$/, '') const ext = getExt(url) @@ -235,11 +146,12 @@ const convert = async (options, customOptions) => { home: getUrl(location.origin, getAttribute('href', selectors.userLink)), description: markdownBody.innerText.replace(/^([\n\s]+)/g, '').replace(/\n/g, ' ').slice(0, 50) + '...', }) - noop(hook.extract)(context) const markdwonDoc = html2markdown(info + getMarkdown(markdownBody), {}) + const copyright = '> 当前文档由 [markdown文档下载插件](https://github.com/kscript/markdown-download) 下载, 原文链接: [' + fileName + '](' + location.href + ') ' + const content = await noop(hook.formatContent)(context, { markdownBody, markdwonDoc }) files.push({ name: realName + '.md', - content: markdwonDoc + '\n\n' + '> 当前文档由 [markdown文档下载插件](https://github.com/kscript/markdown-download) 下载, 原文链接: [' + fileName + '](' + location.href + ') ' + content: (content && typeof content === 'string' ? content: markdwonDoc )+ '\n\n' + copyright }) files.push({ name: realName + '/urls', @@ -259,19 +171,17 @@ const extract = async (options, customOptions) => { return datas } -if (isBroswer) { - if (isExtension) { - chrome.runtime.onMessage.addListener(async (message, sender, sendResponse) => { - if (message instanceof Object) { - if (message.type === 'download') { - if (typeof websites[message.website] === 'function') { - await websites[message.website](extract) - } +if (isExtension) { + chrome.runtime.onMessage.addListener(async (message, sender, sendResponse) => { + if (message instanceof Object) { + if (message.type === 'download') { + if (typeof websites[message.website] === 'function') { + await websites[message.website](extract) } } - sendResponse('') - }) - } + } + sendResponse('') + }) } export default convert \ No newline at end of file diff --git a/src/utils.js b/src/utils.js new file mode 100644 index 0000000..3049401 --- /dev/null +++ b/src/utils.js @@ -0,0 +1,131 @@ +import path from 'path-browserify' + +export const isBroswer = typeof window !== 'undefined' && window instanceof Object +export const isExtension = isBroswer && window.chrome instanceof Object && window.chrome.runtime +export const getExt = (fileName) => { + return path.parse(fileName).ext.slice(1) +} +export const query = (selector, context = document) => { + if (selector instanceof NodeList || selector instanceof Node) { + return selector + } + return context.querySelector(selector) +} +export const getText = (selector, context = document) => { + const el = query(selector, context) || {} + return el.innerText || '' +} +export const getAttribute = (val, selector, context = document) => { + const el = query(selector, context) + return el ? el.getAttribute(val) || '' : '' +} +export const queryAll = (selector, context = document) => { + return [].slice.apply(context.querySelectorAll(selector)) +} +export const noop = (func, defaultFunc) => { + return typeof func === 'function' ? func : typeof defaultFunc === 'function' ? defaultFunc : () => {} +} +export const encodeUrlData = (data) => { + let body = '' + for (let key in data) { + body += key + '=' + encodeURIComponent(data[key]) + '&' + } + return body.slice(0, -1) +} +export const encodeOptionsData = (options) => { + if (options.stringify !== false && typeof options.data === 'object') { + options.data = encodeUrlData(options.data) + } + return options +} +export const sendMessage = (options, onsuccess, onerror, retry) => { + if (isExtension) { + retry = isNaN(retry) ? 3 : +retry + encodeOptionsData(options) + chrome.runtime.sendMessage(options, ([error, response, headers, xhr]) => { + if (!error) { + try { + const result = noop(onsuccess)(response, headers, xhr) + if (result === void 0) { + return response + } + // onsuccess返回值不为undefined, 视为调用失败 + error = result + } catch (err) { + // 执行onsuccess代码出错 + error = err + } + } + if (retry-- > 0) { + sendMessage(options, onsuccess, onerror, retry) + } else { + noop(onerror)(error, headers, xhr) + } + }) + } +} +export const formatDate = (str, t) => { + t = typeof t === 'string' || !isNaN(t) ? new Date(t) : t + if (t instanceof Date === false) { + t = new Date() + } + const obj = { + yyyyyyyy: t.getFullYear(), + yy: t.getFullYear(), + MM: t.getMonth()+1, + dd: t.getDate(), + HH: t.getHours(), + hh: t.getHours() % 12, + mm: t.getMinutes(), + ss: t.getSeconds(), + ww: '日一二三四五六'.split('')[t.getDay()] + }; + return str.replace(/([a-z]+)/ig, function ($1){ + return (obj[$1+$1] === 0 ? '0' : obj[$1+$1]) || ('0' + obj[$1]).slice(-2); + }); +} +export const insertAfter = (newElement, targetElement) => { + const parent = targetElement.parentNode + if(parent.lastChild === targetElement){ + parent.appendChild(newElement) + }else{ + parent.insertBefore(newElement, targetElement.nextSibling) + } +} +export const getUrl = (prefix, link) => { + if (!link) return '' + if (/^(http|https)/.test(link)) { + return link + } + if (/^\/\//.test(link)) { + return prefix.split('//')[0] + link + } + return prefix + link +} +export const tex2svg = (markdwonDoc) => { + return markdwonDoc.replace(/(.*?)<\/ztext>/g, (s, s1) => { + const tex = decodeURIComponent(s1) + const svg = MathJax.tex2svg(tex) + svg.setAttribute('data-tex', tex) + svg.style.display = 'inline' + return svg.outerHTML + }) +} + +export default { + isBroswer, + isExtension, + getExt, + query, + getText, + getAttribute, + queryAll, + noop, + encodeUrlData, + encodeOptionsData, + sendMessage, + formatDate, + insertAfter, + getUrl, + tex2svg +} \ No newline at end of file diff --git a/src/websites/jianshu.js b/src/websites/jianshu.js index 0ed0227..db66e33 100644 --- a/src/websites/jianshu.js +++ b/src/websites/jianshu.js @@ -1,3 +1,4 @@ +import md5 from 'md5' export const hosts = ['www.jianshu.com', 'jianshu.com'] export const options = { @@ -7,7 +8,7 @@ export const options = { code: false, selectors: { title: 'section+h1', - body: 'header+div article', + body: 'header+div article,.post .article', copyBtn: '.VJbwyy', userName: '._3U4Smb ._1OhGeD', userLink: '._3U4Smb ._1OhGeD', @@ -16,7 +17,16 @@ export const options = { } } -export const hook = {} +export const hook = { + extract (context, { markdownBody, realName }) { + markdownBody.querySelectorAll('.math-block,.math-inline').forEach(item => { + const ext = 'svg' + const name = realName + '/' + md5(item.src) + (ext ? '.' + ext : '') + item.setAttribute('downloadName', name) + item.setAttribute('downloadUrl', item.src) + }) + } +} export const config = { hosts, diff --git a/src/websites/juejin.js b/src/websites/juejin.js index f0b9411..7a70f3e 100644 --- a/src/websites/juejin.js +++ b/src/websites/juejin.js @@ -1,3 +1,4 @@ +import md5 from 'md5' export const hosts = ['juejin.im', 'juejin.cn'] export const options = { @@ -19,7 +20,18 @@ export const options = { } } -export const hook = {} +export const hook = { + extract (context, { markdownBody, realName }) { + markdownBody.querySelectorAll('img').forEach(item => { + if (typeof item.src === 'string' && /\/equation\?tex=/.test(item.src)) { + const ext = 'svg' + const name = realName + '/' + md5(item.src) + (ext ? '.' + ext : '') + item.setAttribute('downloadName', name) + item.setAttribute('downloadUrl', item.src) + } + }) + } +} export const config = { hosts, diff --git a/src/websites/zhihu.js b/src/websites/zhihu.js index c17a6ac..d13a00b 100644 --- a/src/websites/zhihu.js +++ b/src/websites/zhihu.js @@ -1,3 +1,4 @@ +import { tex2svg } from '../utils' export const hosts = ['zhuanlan.zhihu.com'] export const options = { @@ -5,18 +6,30 @@ export const options = { link: false, br: true, code: false, + lazyKey: 'data-original', selectors: { title: '.Post-Title', body: '.Post-RichText', copyBtn: '.copy-code-btn', - userName: '.AuthorInfo-name .Popover .UserLink-link', - userLink: '.AuthorInfo-name .Popover .UserLink-link', + userName: '.AuthorInfo-name .UserLink-link', + userLink: '.AuthorInfo-name .UserLink-link', invalid: 'noscript,.ZVideoLinkCard-author', unpack: 'p,figure' } } -export const hook = {} +export const hook = { + extract (context, { markdownBody }) { + markdownBody.querySelectorAll('.ztext-math').forEach(item => { + const ztext = document.createElement('ztext') + ztext.innerText = encodeURIComponent(item.getAttribute('data-tex')) + item.parentElement.replaceChild(ztext, item) + }) + }, + formatContent (context, { markdwonDoc }) { + return tex2svg(markdwonDoc) + } +} export const config = { hosts,