From 14cc8bf3b4d45b19cba15a388a243d908a99e1ad Mon Sep 17 00:00:00 2001 From: kscript <1258@live.cn> Date: Sat, 22 Jul 2023 12:25:28 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4=E4=BB=A3=E7=A0=81,=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=E8=AF=B7=E6=B1=82=E9=87=8D=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 2 +- src/broswer.js | 14 ++-- src/download.js | 53 +++++++----- src/index.js | 172 ++------------------------------------ src/markdown.js | 187 ++++++++++++++++++++++++++++++++++++++++++ src/utils.js | 18 ++-- src/websites/index.js | 2 +- 7 files changed, 244 insertions(+), 204 deletions(-) create mode 100644 src/markdown.js diff --git a/package.json b/package.json index 8044b21..2790323 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "markdown-downloader", - "version": "1.0.3", + "version": "1.0.4", "description": "markdown文章下载", "main": "dist/index.js", "scripts": { diff --git a/src/broswer.js b/src/broswer.js index 3d21361..0941176 100644 --- a/src/broswer.js +++ b/src/broswer.js @@ -1,19 +1,21 @@ import convert from './' import download from './download' +import * as downloadMarkdown from './markdown' import { configs as websiteConfigs } from './websites' export { convert, download, websiteConfigs } -export const markdownDownload = (options, customOptions) => { +export const downloader = (options, customOptions) => { const {fileName, files} = convert(options, customOptions) download(fileName, files) } if (typeof window !== 'undefined') { - markdownDownload.websiteConfigs = websiteConfigs - markdownDownload.convert = convert - markdownDownload.download = download - window.markdownDownload = markdownDownload + downloader.websiteConfigs = websiteConfigs + downloader.convert = convert + downloader.download = download + downloader.downloadMarkdown = downloadMarkdown + window.downloader = downloader } -export default markdownDownload \ No newline at end of file +export default downloader \ No newline at end of file diff --git a/src/download.js b/src/download.js index 2413c74..20577ea 100644 --- a/src/download.js +++ b/src/download.js @@ -4,7 +4,8 @@ import FileSaver from 'jszip/vendor/FileSaver' const defaultOptions = { partLimit: 1e3, - requestLimit: 5 + requestLimit: 5, + retry: 3 } const options = Object.assign({}, defaultOptions) @@ -19,30 +20,40 @@ export const noop = (func, defaultFunc) => { } export const ajax = (options) => { - var xhr = new XMLHttpRequest() - options.method = options.method || 'get' - xhr.responseType = options.dataType || 'json'; - xhr.onreadystatechange = () => { - if (xhr.readyState == 4) { - try { - noop(options.success)(xhr.response, xhr) - } catch (err) { + options = Object.assign({}, defaultOptions, options) + const core = (retry = 3) => { + const xhr = new XMLHttpRequest() + options.method = options.method || 'get' + xhr.responseType = options.dataType || 'json'; + xhr.onreadystatechange = () => { + if (xhr.readyState == 4) { + try { + noop(options.success)(xhr.response, xhr) + } catch (err) { + noop(options.error)(err, xhr) + } + } + } + xhr.error = (err) => { + if (retry--) { + console.log(err) noop(options.error)(err, xhr) + } else { + setTimeout(() => { + core(retry - 1) + }, 3e3) } } + if (/post/i.test(options.method)) { + xhr.open(options.method, options.url, options.async !== false) + xhr.setRequestHeader('Content-type', /json/i.test(options.dataType) ? 'application/json' : 'application/x-www-form-urlencoded') + xhr.send(options.data) + } else { + xhr.open(options.method, options.url, options.async !== false) + xhr.send() + } } - xhr.error = (err) => { - console.log(err) - noop(options.error)(err, xhr) - } - if (/post/i.test(options.method)) { - xhr.open(options.method, options.url, options.async !== false) - xhr.setRequestHeader('Content-type', /json/i.test(options.dataType) ? 'application/json' : 'application/x-www-form-urlencoded') - xhr.send(options.data) - } else { - xhr.open(options.method, options.url, options.async !== false) - xhr.send() - } + core(options.retry) } export const fetchBlobFile = (file) =>{ diff --git a/src/index.js b/src/index.js index 4b12513..17a10cb 100644 --- a/src/index.js +++ b/src/index.js @@ -1,173 +1,13 @@ -import md5 from 'md5' -import html2markdown from 'html-to-md' -import { websites, hooks } from './websites' -import merge from 'webpack-merge' -import 'mathjax/es5/tex-svg' +import { websites } from './websites' import { isExtension, - getExt, - query, - getText, - getAttribute, - queryAll, - noop, sendMessage, - formatDate, - insertAfter, - getUrl } from './utils' +import { downloadMarkdown } from './markdown' - -const setInfo = (data) => { - data = Object.assign({ - date: formatDate('yyyy-MM-dd HH:mm:ss'), - coypright: false, - url: location.href, - description: '转载', - }, data instanceof Object ? data : {}) - return `--- - title: {{title}} - date: {{date}} - copyright: {{coypright}} - author: {{author}} - home: {{home}} - origin: {{origin}} - url: {{url}} - tag: {{tag}} - categories: {{categories}} - description: {{description}} - --- - `.replace(/\n\s+/g, '\n').replace(/\{\{(.*?)\}\}/g, (s, s1) => data[s1] === void 0 ? '' : data[s1]) -} - -const getMarkdown = (markdownBody) => { - return markdownBody.innerHTML - // .replace(/<(\/|)(pre|p|figcaption|figure)>/g, '') - // .replace(/(<|>)/g, (s, s1) => ({ - // '<': '<', '>': '>' - // }[s1] || s)) -} - -const convert = async (options, customOptions) => { - const context = {} - const defaultOptions = { - origin: 'juejin', - // 处理链接 - link: true, - // 处理换行 - br: false, - // 处理代码块 - code: false, - lazyKey: 'data-src', - selectors: { - title: '.article-title', - body: '.markdown-body', - copyBtn: '.copy-code-btn', - userName: '.username .name', - userLink: '.username', - invalid: 'style', - unpack: '' - } - } - customOptions = customOptions instanceof Object ? customOptions : {} - options = merge({}, defaultOptions, options instanceof Object ? options : {}, customOptions) - if (options.context) { - if (typeof options.context === 'string') { - const el = document.createElement('div') - el.innerHTML = options.context - options.context = el - } else { - options.context = options.context instanceof Node ? options.context : void 0 - } - } - const {origin, selectors} = options - const hook = hooks[origin] || {} - const result = await noop(hook.beforeExtract)(Object.assign(context, { - options - })) - if (result instanceof Object) { - return result - } - const markdownBody = query(selectors.body, options.context).cloneNode(true) - const fileName = (getText(selectors.title) || document.title) - const realName = fileName.replace(/[\\\/\?<>:'\*\|]/g, '_') - noop(hook.extract)(context, { markdownBody, fileName, realName }) - queryAll(selectors.copyBtn, markdownBody).map(item => item.parentElement.removeChild(item)) - queryAll('[data-id]', markdownBody).map(item => item.removeAttribute('data-id')) - if (selectors.invalid) { - queryAll(selectors.invalid, markdownBody).map(item => item.parentElement.removeChild(item)) - } - if (selectors.unpack) { - queryAll(selectors.unpack, markdownBody).map(item => { - const span = document.createElement('span') - span.innerHTML = item.innerHTML - insertAfter(document.createElement('br'), item) - item.parentElement.replaceChild(span, item) - }) - } - if (options.link) { - queryAll('a', markdownBody).map(item => item.href = item.title) - } - if (options.code) { - queryAll('code', markdownBody).map(item => { - const br = options.br || /copyable/.test(item.className) ? '\n' : '' - const lang = item.getAttribute('lang') || (item.className.split('-') || {})[1] || '' - const text = '```' + (lang ? ' ' + lang : '') + br + item.innerText + br + '```' + br - item.parentElement.replaceChild(document.createTextNode(text), item) - }) - } - const urls = [] - const files = queryAll('img', markdownBody).map(item => { - const downloadName = item.getAttribute('downloadName') - const downloadUrl = item.getAttribute('downloadUrl') - if (downloadName && downloadUrl) { - item.src = './' + downloadName - options.urls !== false && urls.push(downloadUrl) - return { - name: downloadName, - downloadUrl - } - } - const src = item.getAttribute(options.lazyKey) || item.src - const url = src.replace(/\?$/, '') - const ext = getExt(url) - const name = realName + '/' + md5(url) + (ext ? '.' + ext : '') - item.src = './' + name - options.urls !== false && urls.push(url) - return { - name, - downloadUrl: url - } - }) - const info = setInfo({ - title: fileName, - origin: origin, - author: getText(selectors.userName), - home: getUrl(location.origin, getAttribute('href', selectors.userLink)), - description: markdownBody.innerText.replace(/^([\n\s]+)/g, '').replace(/\n/g, ' ').slice(0, 50) + '...', - }) - const markdwonDoc = html2markdown(info + getMarkdown(markdownBody), {}) - const copyright = '> 当前文档由 [markdown文档下载插件](https://github.com/kscript/markdown-download) 下载, 原文链接: [' + fileName + '](' + location.href + ') ' - const content = await noop(hook.formatContent)(context, { markdownBody, markdwonDoc }) - files.push({ - name: realName + '.md', - content: (content && typeof content === 'string' ? content: markdwonDoc )+ '\n\n' + copyright - }) - files.push({ - name: realName + '/urls', - content: urls.join('\n') - }) - noop(hook.afterExtract)(Object.assign(context, { files })) - return { - type: 'download', - fileName, - files - } -} - -const extract = async (options, customOptions) => { - const datas = await convert(options, customOptions) - sendMessage(datas) +const extract = async (options, customOptions, hook) => { + const datas = await downloadMarkdown(options, customOptions, hook) + datas && sendMessage(datas) return datas } @@ -184,4 +24,4 @@ if (isExtension) { }) } -export default convert \ No newline at end of file +export default downloadMarkdown \ No newline at end of file diff --git a/src/markdown.js b/src/markdown.js new file mode 100644 index 0000000..d89b16c --- /dev/null +++ b/src/markdown.js @@ -0,0 +1,187 @@ +import merge from 'webpack-merge' +import md5 from 'md5' +import html2markdown from 'html-to-md' +import 'mathjax/es5/tex-svg' +import { query, getExt, getText, getUrl, queryAll, insertAfter, getAttribute, formatDate, exec } from './utils' + +const setInfo = (data) => { + data = Object.assign({ + date: formatDate('yyyy-MM-dd HH:mm:ss'), + coypright: false, + url: location.href, + description: '转载', + }, data instanceof Object ? data : {}) + return `--- + title: {{title}} + date: {{date}} + copyright: {{coypright}} + author: {{author}} + home: {{home}} + origin: {{origin}} + url: {{url}} + tag: {{tag}} + categories: {{categories}} + description: {{description}} + --- + `.replace(/\n\s+/g, '\n').replace(/\{\{(.*?)\}\}/g, (s, s1) => data[s1] === void 0 ? '' : data[s1]) +} +const formatCopyRight = (fileName) => { + return `> 当前文档由 [markdown文档下载插件](https://github.com/kscript/markdown-download) 下载, 原文链接: [${fileName}](${location.href}) ` +} +const getMarkdown = (markdownBody) => { + return markdownBody.innerHTML + // .replace(/<(\/|)(pre|p|figcaption|figure)>/g, '') + // .replace(/(<|>)/g, (s, s1) => ({ + // '<': '<', '>': '>' + // }[s1] || s)) +} + +export const tex2svg = (markdwonDoc) => { + return markdwonDoc.replace(/(.*?)<\/ztext>/g, (s, s1) => { + const tex = decodeURIComponent(s1) + const svg = MathJax.tex2svg(tex) + svg.setAttribute('data-tex', tex) + svg.style.display = 'inline' + return svg.outerHTML + }) +} + +const formatParams = (options, customOptions, hook) => { + const defaultOptions = { + origin: 'juejin', + // 处理链接 + link: true, + // 处理换行 + br: false, + // 处理代码块 + code: false, + lazyKey: 'data-src', + selectors: { + title: '.article-title', + body: '.markdown-body', + copyBtn: '.copy-code-btn', + userName: '.username .name', + userLink: '.username', + invalid: 'style', + unpack: '' + } + } + customOptions = customOptions instanceof Object ? customOptions : {} + options = merge({}, defaultOptions, options instanceof Object ? options : {}, customOptions) + hook = hook instanceof Object ? hook : {} + return { + options, customOptions, hook + } +} +const getContainer = (container) => { + if (container) { + if (typeof container === 'string') { + const el = document.createElement('div') + el.innerHTML = container + return el + } else { + return container instanceof Node ? container : document + } + } + return document +} + +export const formatMarkdownBody = (container, selectors, options, exec) => { + const markdownBody = query(selectors.body, container).cloneNode(true) + queryAll(selectors.copyBtn, markdownBody).map(item => item.parentElement.removeChild(item)) + queryAll('[data-id]', markdownBody).map(item => item.removeAttribute('data-id')) + if (selectors.invalid) { + queryAll(selectors.invalid, markdownBody).map(item => item.parentElement.removeChild(item)) + } + if (selectors.unpack) { + queryAll(selectors.unpack, markdownBody).map(item => { + const span = document.createElement('span') + span.innerHTML = item.innerHTML + insertAfter(document.createElement('br'), item) + item.parentElement.replaceChild(span, item) + }) + } + if (options.link) { + queryAll('a', markdownBody).map(item => item.href = item.title) + } + if (options.code) { + queryAll('code', markdownBody).map(item => { + const br = options.br || /copyable/.test(item.className) ? '\n' : '' + const lang = item.getAttribute('lang') || (item.className.split('-') || {})[1] || '' + const text = '```' + (lang ? ' ' + lang : '') + br + item.innerText + br + '```' + br + item.parentElement.replaceChild(document.createTextNode(text), item) + }) + } + return markdownBody +} + +const extract = async (markdownBody, selectors, options, exec) => { + const { origin } = options + const fileName = getText(selectors.title) || document.title + const realName = fileName.replace(/[\\\/\?<>:'\*\|]/g, '_') + const files = queryAll('img', markdownBody).map(item => { + const downloadName = item.getAttribute('downloadName') + const downloadUrl = item.getAttribute('downloadUrl') + if (downloadName && downloadUrl) { + item.src = './' + downloadName + return { + name: downloadName, + downloadUrl + } + } + const src = item.getAttribute(options.lazyKey) || item.src + const url = src.replace(/\?$/, '') + const ext = getExt(url) + const name = realName + '/' + md5(url) + (ext ? '.' + ext : '') + item.src = './' + name + return { + name, + downloadUrl: url + } + }) + const info = setInfo({ + title: fileName, + origin: origin, + author: getText(selectors.userName), + home: getUrl(location.origin, getAttribute('href', selectors.userLink)), + description: markdownBody.innerText.replace(/^([\n\s]+)/g, '').replace(/\n/g, ' ').slice(0, 50) + '...', + }) + const markdwonDoc = html2markdown(info + getMarkdown(markdownBody), {}) + const copyright = formatCopyRight(fileName) + const content = await exec('formatContent', { markdownBody, markdwonDoc }) + files.push({ + name: realName + '.md', + content: (content && typeof content === 'string' ? content : markdwonDoc) + '\n\n' + copyright + }) + return { + fileName, + files + } +} + +export const downloadMarkdown = async (...rest) => { + const context = {} + const { options, hook } = formatParams(...rest) + const { selectors, el = document } = options + const container = getContainer(el) + const state = { container, options } + const verify = async (hookName, data) => { + return await exec(hook[hookName], context, Object.assign(state, data instanceof Object ? data : {})) instanceof Object + } + + if (await verify('beforeExtract')) return exec() + + const markdownBody = formatMarkdownBody(container, selectors, options, exec) + if (await verify('extract', { markdownBody })) return exec() + + const { fileName, files } = await extract(markdownBody, selectors, options, exec) + if (await verify('afterExtract', { fileName, files })) return exec() + + return { + type: 'download', + fileName, + files + } +} + +export default downloadMarkdown \ No newline at end of file diff --git a/src/utils.js b/src/utils.js index 3049401..890e4aa 100644 --- a/src/utils.js +++ b/src/utils.js @@ -102,14 +102,14 @@ export const getUrl = (prefix, link) => { } return prefix + link } -export const tex2svg = (markdwonDoc) => { - return markdwonDoc.replace(/(.*?)<\/ztext>/g, (s, s1) => { - const tex = decodeURIComponent(s1) - const svg = MathJax.tex2svg(tex) - svg.setAttribute('data-tex', tex) - svg.style.display = 'inline' - return svg.outerHTML - }) +export const exec = async (...rest) => { + if (!rest.length) return exec.returnValue + exec.returnValue = false + try { + exec.returnValue = typeof rest[0] === 'function' && await rest[0](...rest.slice(1)) + } catch (err) { + console.warn(err) + } } export default { @@ -127,5 +127,5 @@ export default { formatDate, insertAfter, getUrl, - tex2svg + exec } \ No newline at end of file diff --git a/src/websites/index.js b/src/websites/index.js index 2456a1c..73f4f88 100644 --- a/src/websites/index.js +++ b/src/websites/index.js @@ -14,8 +14,8 @@ files.keys().forEach(key => { ].includes(website)) { const config = files(key) const { hook, options, customOptions = {} } = config - websites[website] = (extract) => extract(options, customOptions) hooks[website] = hook instanceof Object ? hook : {} + websites[website] = (extract) => extract(options, customOptions, hooks[website]) if (assigns[website]) { assigns[website] = Object.assign({ website