From 2d41ff767269863cc77f55cacede9d18ea2d1c75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B3=A1=E6=B3=A1=E6=9C=BA=E4=B8=8D=E5=86=92=E6=B3=A1?= =?UTF-8?q?=E4=BA=86?= Date: Wed, 24 Feb 2021 10:12:02 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=8F=91=E5=B8=83?= =?UTF-8?q?=EF=BC=8C=E5=87=8F=E5=B0=91=E5=8F=91=E5=B8=83=E6=97=B6=E9=97=B4?= =?UTF-8?q?=EF=BC=9B=E4=BF=AE=E5=A4=8Ddocker=20image=E7=94=9F=E6=88=90?= =?UTF-8?q?=EF=BC=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 9 ++- backend/package.json | 2 +- config.js | 11 +++- docker-compose.yaml | 7 +- docker_init.sh | 2 +- exec.js | 4 +- package.json | 2 +- server.js | 8 ++- spiders/base.js | 50 +++++++------- spiders/cnblogs.js | 21 +++++- spiders/config.js | 95 +++++++++++++++------------ spiders/devtoutiao.js | 8 ++- spiders/juejin.js | 19 +++--- spiders/oschina.js | 10 ++- spiders/toutiao.js | 3 + src/pages/ArticleList/ArticleList.tsx | 12 ++-- 16 files changed, 161 insertions(+), 102 deletions(-) diff --git a/Dockerfile b/Dockerfile index ac48f14..850fd95 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,19 @@ -FROM jelastic/nodejs:8.17.0-npm AS frontend +FROM jelastic/nodejs:15.9.0-npm AS frontend WORKDIR /app ADD . /app RUN npm install RUN npm run build -FROM jelastic/nodejs:8.17.0-npm +FROM jelastic/nodejs:15.9.0-npm RUN yum install -y nginx && yum clean all COPY --from=frontend /app/dist /frontend WORKDIR /app ADD . /app RUN cp ./backend/package.json . && \ - cp /app/nginx/artipub.conf /etc/nginx/conf.d + cp /app/nginx/artipub.conf /etc/nginx/conf.d/artipub.conf RUN npm install + +EXPOSE 3000 8000 CMD /app/docker_init.sh + diff --git a/backend/package.json b/backend/package.json index 4c6d9cc..9fb5727 100644 --- a/backend/package.json +++ b/backend/package.json @@ -20,7 +20,7 @@ "log4js": "^5.1.0", "mongoose": "^5.6.12", "morgan": "^1.9.1", - "puppeteer-chromium-resolver": "^2.0.1", + "puppeteer-chromium-resolver": "^5.2.0", "cheerio": "^1.0.0-rc.3", "request": "^2.88.0", "request-promise-native": "^1.0.7", diff --git a/config.js b/config.js index 6ce4019..bc43415 100644 --- a/config.js +++ b/config.js @@ -1,8 +1,15 @@ +//docker --link 连接独立部署的mongo容器时使用 +const mongodbContainerAddr = Object.entries(process.env).find((entry) => entry[0].endsWith('_TCP_ADDR')); +const addr = mongodbContainerAddr ? mongodbContainerAddr[1] : '127.0.0.1'; + +const mongodbContainerPort = Object.entries(process.env).find((entry) => entry[0].endsWith('_TCP_PORT')); +const port = mongodbContainerPort ? mongodbContainerPort[1] : '27017'; + module.exports = { HOST: '0.0.0.0', PORT: 3000, - MONGO_HOST: process.env.MONGO_HOST ? process.env.MONGO_HOST : 'localhost', - MONGO_PORT: process.env.MONGO_PORT ? process.env.MONGO_PORT : '27017', + MONGO_HOST: process.env.MONGO_HOST ? process.env.MONGO_HOST : addr, + MONGO_PORT: process.env.MONGO_PORT ? process.env.MONGO_PORT : port, MONGO_DB: process.env.MONGO_DB ? process.env.MONGO_DB : 'artipub', MONGO_USERNAME: process.env.MONGO_USERNAME ? process.env.MONGO_USERNAME : '', MONGO_PASSWORD: process.env.MONGO_PASSWORD ? process.env.MONGO_PASSWORD : '', diff --git a/docker-compose.yaml b/docker-compose.yaml index 2726cc5..9ff6d0f 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,7 +1,7 @@ version: "3.3" services: app: - image: "tikazyq/artipub:latest" + image: "artipub:1.0" environment: MONGO_HOST: "mongo" # MONGO_USERNAME: root @@ -15,8 +15,7 @@ services: mongo: image: mongo:latest restart: always - environment: - # MONGO_INITDB_ROOT_USERNAME: root - # MONGO_INITDB_ROOT_PASSWORD: example + volumes: + - "E:\\mongodb:/data/db" ports: - "27017:27017" diff --git a/docker_init.sh b/docker_init.sh index 78238f8..8863a3c 100755 --- a/docker_init.sh +++ b/docker_init.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash +#!/bin/bash # replace original url to new one if [ "${ARTIPUB_API_ADDRESS}" = "" ]; diff --git a/exec.js b/exec.js index 4fa4fb5..9823a31 100644 --- a/exec.js +++ b/exec.js @@ -11,9 +11,9 @@ const StatsFetcher = require('./lib/StatsFetcher') // mongodb连接 mongoose.Promise = global.Promise if (config.MONGO_USERNAME) { - mongoose.connect(`mongodb://${config.MONGO_USERNAME}:${config.MONGO_PASSWORD}@${config.MONGO_HOST}:${config.MONGO_PORT}/${config.MONGO_DB}?authSource=${config.MONGO_AUTH_DB}`, { useNewUrlParser: true }) + mongoose.connect(`mongodb://${config.MONGO_USERNAME}:${config.MONGO_PASSWORD}@${config.MONGO_HOST}:${config.MONGO_PORT}/${config.MONGO_DB}?authSource=${config.MONGO_AUTH_DB}`, { useNewUrlParser: true , useUnifiedTopology: true}) } else { - mongoose.connect(`mongodb://${config.MONGO_HOST}:${config.MONGO_PORT}/${config.MONGO_DB}`, { useNewUrlParser: true }) + mongoose.connect(`mongodb://${config.MONGO_HOST}:${config.MONGO_PORT}/${config.MONGO_DB}`, { useNewUrlParser: true, useUnifiedTopology: true }) } class Runner { diff --git a/package.json b/package.json index 12425ef..c8e5eec 100644 --- a/package.json +++ b/package.json @@ -103,7 +103,6 @@ "eslint": "^5.16.0", "express": "^4.17.1", "gh-pages": "^2.0.1", - "husky": "^3.0.0", "import-sort-cli": "^6.0.0", "import-sort-parser-babylon": "^6.0.0", "import-sort-parser-typescript": "^6.0.0", @@ -118,6 +117,7 @@ "serverless-http": "^2.0.2", "slash2": "^2.0.0", "stylelint": "^10.1.0", + "typescript": "^4.1.5", "umi-plugin-ga": "^1.1.3", "umi-plugin-pro": "^1.0.2", "umi-types": "^0.3.8", diff --git a/server.js b/server.js index 657f807..ed55320 100644 --- a/server.js +++ b/server.js @@ -19,9 +19,13 @@ console.log(process.env) mongoose.Promise = global.Promise if (config.MONGO_USERNAME) { const mongoUrl = `mongodb://${config.MONGO_USERNAME}:${config.MONGO_PASSWORD}@${config.MONGO_HOST}:${config.MONGO_PORT}/${config.MONGO_DB}?authSource=${config.MONGO_AUTH_DB}` - mongoose.connect(`mongodb://${config.MONGO_USERNAME}:${config.MONGO_PASSWORD}@${config.MONGO_HOST}:${config.MONGO_PORT}/${config.MONGO_DB}?authSource=${config.MONGO_AUTH_DB}`, { useNewUrlParser: true }) + mongoose.connect(mongoUrl, { useNewUrlParser: true, useUnifiedTopology: true } + ); } else { - mongoose.connect(`mongodb://${config.MONGO_HOST}:${config.MONGO_PORT}/${config.MONGO_DB}`, { useNewUrlParser: true }) + mongoose.connect( + `mongodb://${config.MONGO_HOST}:${config.MONGO_PORT}/${config.MONGO_DB}`, + { useNewUrlParser: true, useUnifiedTopology: true } + ); } // bodyParser中间件 diff --git a/spiders/base.js b/spiders/base.js index 035976a..1da9cd6 100644 --- a/spiders/base.js +++ b/spiders/base.js @@ -59,10 +59,9 @@ class BaseSpider { // 打开开发者工具, 当此值为true时, headless总为false devtools: false, // 关闭headless模式, 不会打开浏览器 - headless: enableChromeDebug !== 'Y', - args: [ - '--no-sandbox', - ], + headless: enableChromeDebug !== "Y", + args: ["--no-sandbox", '--start-maximized'], + defaultViewport: null }); // 页面 @@ -76,20 +75,12 @@ class BaseSpider { }; // 配置 - this.config = config[this.platform.name]; - if (!config) { + const platformConfig = config[this.platform.name]; + if (!platformConfig) { throw new Error(`config (platform: ${this.platform.name}) cannot be found`); } - // URL信息 - this.urls = this.config.urls; - - // 登陆选择器 - this.loginSel = this.config.loginSel; - - // 编辑器选择器 - this.editorSel = this.config.editorSel; - + Object.assign(this, platformConfig); // 脚注内容 this.footerContent = { @@ -212,8 +203,13 @@ class BaseSpider { */ async goToEditor() { logger.info(`navigating to ${this.urls.editor}`); - await this.page.goto(this.urls.editor); - await this.page.waitFor(5000); + await Promise.all([ + this.page.goto(this.urls.editor), + this.page.waitForNavigation({ + waitUntil: ['load', 'domcontentloaded', 'networkidle2'] + }) + ]); + await this.afterGoToEditor(); } @@ -243,7 +239,7 @@ class BaseSpider { const el = document.querySelector(editorSel.content); el.focus(); try { - HTMLPreElement.prototype.select = function() { + HTMLPreElement.prototype.select = function () { let range = document.createRange(); range.selectNodeContents(this); @@ -277,7 +273,6 @@ class BaseSpider { logger.info(`input editor title`); // 输入标题 await this.page.evaluate(this.inputTitle, this.article, this.editorSel, this.task); - await this.page.waitFor(3000); // 输入内容 logger.info(`input editor content`); @@ -288,8 +283,6 @@ class BaseSpider { await this.page.evaluate(this.inputFooter, this.article, this.editorSel); await this.page.waitFor(3000); - await this.page.waitFor(10000); - // 后续处理 await this.afterInputEditor(); } @@ -309,7 +302,14 @@ class BaseSpider { logger.info(`publishing article`); // 发布文章 const elPub = await this.page.$(this.editorSel.publish); - await elPub.click(); + + //发布后地址会变更用waitForNavigation,不会变更用固定时间,尽量减少等待时间 + await Promise.all([ + this.page.$eval(this.editorSel.publish, submit => submit.click()), + this.publishNavigationChange + ? this.page.waitForNavigation() + : this.page.waitForTimeout(1500) + ]); // 后续处理 await this.afterPublish(); @@ -434,7 +434,7 @@ class BaseSpider { console.log(url); let text = res.data; if (this.platform.name === constants.platform.TOUTIAO) { - this.platform.loggedIn = text.includes('userName'); + this.platform.loggedIn = !text.includes('login-button'); } else if (this.platform.name === constants.platform.CSDN) { text = text.message this.platform.loggedIn = text.includes('成功'); @@ -459,6 +459,10 @@ class BaseSpider { } console.log(this.platform.loggedIn); await this.platform.save(); + }) + .catch(error => { + console.error(`${url} 登录态校验异常`); + console.error(error); }); } diff --git a/spiders/cnblogs.js b/spiders/cnblogs.js index 57b41ec..ac59065 100644 --- a/spiders/cnblogs.js +++ b/spiders/cnblogs.js @@ -13,8 +13,25 @@ class CnblogsSpider extends BaseSpider { // iframeWindow.document.execCommand('insertHTML', false, content) // } - async inputFooter(article, editorSel) { - // do nothing + async afterGoToEditor() { + const isMarkdownEditor = await this.page.evaluate(() => { + return document.querySelector('#editor-switcher').innerText.includes('markdown'); + }); + + //切换到markdown编辑器 + if (!isMarkdownEditor) { + await this.page.click('#editor-switcher'); + await this.page.click('#dropdown-menu > button:nth-child(2)'); + } + + //推荐到首页候选区,需要满足字数 + // await this.page.click('#site-publish-candidate'); + + } + + async afterInputEditor() { + //点击预览,触发编辑器事件,不然保存时取不到文本域的值 + await this.page.click('.tab-bar li:nth-child(2)'); } async afterPublish() { diff --git a/spiders/config.js b/spiders/config.js index da14776..b67a956 100644 --- a/spiders/config.js +++ b/spiders/config.js @@ -10,10 +10,11 @@ module.exports = { submit: '.btn:nth-child(3)' }, editorSel: { - title: '.title-input', - content: '.CodeMirror', - publish: '.publish-btn' - } + title: ".title-input", + content: ".CodeMirror", + publish: ".publish-btn" + }, + publishNavigationChange: true }, segmentfault: { @@ -27,10 +28,11 @@ module.exports = { submit: 'button[type="submit"]' }, editorSel: { - title: '#title', - content: '.CodeMirror', - publish: '#sureSubmitBtn' - } + title: "input[name=title]", + content: ".CodeMirror", + publish: "#sureSubmitBtn" + }, + publishNavigationChange: true }, jianshu: { @@ -47,7 +49,8 @@ module.exports = { title: 'input:not([name="name"])', content: '#arthur-editor', publish: 'a[data-action="publicize"]' - } + }, + publishNavigationChange: false }, csdn: { @@ -60,10 +63,11 @@ module.exports = { password: '' }, editorSel: { - title: '.article-bar__title--input', - content: '.editor__inner', - publish: '.btn-b-red' - } + title: ".article-bar__title--input", + content: ".editor__inner", + publish: ".btn-b-red" + }, + publishNavigationChange: false }, devtoutiao: { @@ -79,7 +83,8 @@ module.exports = { title: '#post_title', content: '.CodeMirror', publish: 'input[type=submit]' - } + }, + publishNavigationChange: true }, '51cto': { @@ -95,7 +100,8 @@ module.exports = { title: '#title', content: '.CodeMirror', publish: '#submit' - } + }, + publishNavigationChange: true }, zhihu: { @@ -108,10 +114,11 @@ module.exports = { password: '' }, editorSel: { - title: '.WriteIndex-titleInput > .Input', - content: '.public-DraftEditor-content', - publish: '.PublishPanel-stepTwoButton' - } + title: ".WriteIndex-titleInput > .Input", + content: ".public-DraftEditor-content", + publish: ".PublishPanel-stepTwoButton" + }, + publishNavigationChange: true }, oschina: { @@ -125,9 +132,10 @@ module.exports = { }, editorSel: { title: 'input[name="title"]', - content: '.cke_editable', - publish: '.submit' - } + content: ".cke_editable", + publish: ".submit" + }, + publishNavigationChange: true }, toutiao: { @@ -143,7 +151,8 @@ module.exports = { title: ".editor-title textarea", content: ".ProseMirror", publish: ".publish-btn-last" - } + }, + publishNavigationChange: true }, cnblogs: { @@ -156,10 +165,11 @@ module.exports = { password: '' }, editorSel: { - title: '#post-title', - content: '#md-editor', - publish: 'button[data-el-locator="publishBtn"]' - } + title: "#post-title", + content: "#md-editor", + publish: 'button[cnbellocator="publishBtn"]' + }, + publishNavigationChange: true }, baijiahao: { @@ -176,8 +186,8 @@ module.exports = { title: ".input-container .ant-input", content: "ueditor", publish: '.op-publish' - } - + }, + publishNavigationChange: true }, v2ex: { @@ -190,10 +200,11 @@ module.exports = { password: '' }, editorSel: { - title: '#topic_title', - content: '#editor', - publish: '.super.normal.button' - } + title: "#topic_title", + content: "#editor", + publish: ".super.normal.button" + }, + publishNavigationChange: true }, wechat: { @@ -207,10 +218,11 @@ module.exports = { password: '' }, editorSel: { - title: '', - content: '', - publish: '' - } + title: "", + content: "", + publish: "" + }, + publishNavigationChange: true }, aliyun: { urls: { @@ -222,9 +234,10 @@ module.exports = { password: '' }, editorSel: { - title: '#title', - content: '.textarea', - publish: '.next-btn-primary' - } + title: "#title", + content: ".textarea", + publish: ".next-btn-primary" + }, + publishNavigationChange: true } } diff --git a/spiders/devtoutiao.js b/spiders/devtoutiao.js index 8307a85..9e4d12f 100644 --- a/spiders/devtoutiao.js +++ b/spiders/devtoutiao.js @@ -12,7 +12,7 @@ class DevTouTiaoSpider extends BaseSpider { } async afterInputEditor() { - + } async inputFooter(article, editorSel) { @@ -26,7 +26,11 @@ class DevTouTiaoSpider extends BaseSpider { async afterPublish() { await this.page.waitForSelector('.user-nav-tabs'); - await this.page.click('.user-nav-tabs li:nth-child(2) a'); + await Promise.all([ + this.page.click('.user-nav-tabs li:nth-child(2) a'), + this.page.waitForNavigation() + ]); + const articleLink = await this.page.$('.posts .post:nth-child(1) .title a'); const url = await (await articleLink.getProperty('href')).jsonValue(); diff --git a/spiders/juejin.js b/spiders/juejin.js index 15231f0..2b75b14 100644 --- a/spiders/juejin.js +++ b/spiders/juejin.js @@ -13,16 +13,14 @@ class JuejinSpider extends BaseSpider { async inputFooter(article, editorSel) { // do nothing } - async afterGoToEditor() { - await this.page.goto(this.urls.editor) - await this.page.waitFor(5000) - } + async afterInputEditor() { // 点击发布文章 - const elPubBtn = await this.page.$('.publish-popup') - await elPubBtn.click() - await this.page.waitFor(5000) + await this.page.click('.publish-popup'); + await this.page.waitForSelector('.publish-popup .panel', { + visible: true + }); // 选择类别 await this.page.evaluate((task) => { @@ -32,7 +30,6 @@ class JuejinSpider extends BaseSpider { } }) }, this.task) - await this.page.waitFor(5000) // 选择标签 const elTagButton = await this.page.$('.add-btn-item') @@ -40,11 +37,12 @@ class JuejinSpider extends BaseSpider { const elTagInput = await this.page.$('.tag-input > input') console.log(this.task.tag) await elTagInput.type(this.task.tag) - await this.page.waitFor(5000) + await this.page.waitForSelector('.suggested-tag-list > .tag:nth-child(1)'); await this.page.evaluate(() => { document.querySelector('.suggested-tag-list > .tag:nth-child(1)').click() }) - await this.page.waitFor(5000) + //要等会才能点按钮, 选择完标签后,发布按钮会变成disabled,然后又马上变回可以点击 + await this.page.waitFor(1000) } async afterPublish() { @@ -53,6 +51,7 @@ class JuejinSpider extends BaseSpider { return 'https://juejin.cn' + el.getAttribute('href') }) this.task.updateTs = new Date() + this.task.error = null; this.task.status = constants.status.FINISHED await this.task.save() } diff --git a/spiders/oschina.js b/spiders/oschina.js index 8ecce0c..e392be0 100644 --- a/spiders/oschina.js +++ b/spiders/oschina.js @@ -19,8 +19,14 @@ class OschinaSpider extends BaseSpider { if (!url) throw new Error('editor url cannot be empty') - await this.page.goto(url) - await this.page.waitFor(5000) + await Promise.all([ + this.page.goto(url), + this.page.waitForNavigation() + ]); + + //切换到HTML编辑器 + await this.page.click('#editorTabList a'); + await this.page.waitForSelector('.cke_wysiwyg_frame'); } async inputContent(article, editorSel) { diff --git a/spiders/toutiao.js b/spiders/toutiao.js index 3ae1cc2..6584d38 100644 --- a/spiders/toutiao.js +++ b/spiders/toutiao.js @@ -11,6 +11,9 @@ class ToutiaoSpider extends BaseSpider { } async afterGoToEditor() { + await this.page.waitForSelector(this.editorSel.title); + + //关闭模态提醒框 const modalTip = await this.page.evaluate(() => { let title = document.querySelector('.byte-modal-title')?.innerText; let text = document.querySelector('.byte-modal-content')?.innerText; diff --git a/src/pages/ArticleList/ArticleList.tsx b/src/pages/ArticleList/ArticleList.tsx index 367c597..1ade3a3 100644 --- a/src/pages/ArticleList/ArticleList.tsx +++ b/src/pages/ArticleList/ArticleList.tsx @@ -450,7 +450,7 @@ const ArticleList: React.FC = props => { - 标题字符数区间[5,30]; + 标题字符数区间[5,30]; 正文要有图片; 正文不能包含非图片的外链 @@ -480,9 +480,9 @@ const ArticleList: React.FC = props => { - 只支持登陆验证; - 需先实名认证; - 每天最多发布5篇; + 只支持登陆验证; + 需先实名认证; + 每天最多发布5篇; 且文章正文有符合尺寸的图片 } @@ -603,7 +603,7 @@ const ArticleList: React.FC = props => { /> - + ); }, @@ -883,7 +883,7 @@ const ArticleList: React.FC = props => { - +
record._id}/> ); From 882bf2bab7d508010286f9eea631d7bfcbec5459 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B3=A1=E6=B3=A1=E6=9C=BA=E4=B8=8D=E5=86=92=E6=B3=A1?= =?UTF-8?q?=E4=BA=86?= Date: Wed, 24 Feb 2021 10:32:01 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=80=9D=E5=90=A6?= =?UTF-8?q?=E5=8F=91=E5=B8=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spiders/baijiahao.js | 2 +- spiders/config.js | 2 +- spiders/segmentfault.js | 110 ++++++++++++++++++++++++---------------- 3 files changed, 69 insertions(+), 45 deletions(-) diff --git a/spiders/baijiahao.js b/spiders/baijiahao.js index ae640de..e612cf5 100644 --- a/spiders/baijiahao.js +++ b/spiders/baijiahao.js @@ -77,7 +77,7 @@ class BaiJiaHaoSpider extends BaseSpider { if (codeClose) { const boundingBox = await codeClose.boundingBox(); - //移动鼠标去关闭验证弹框 + //移动鼠标去关闭验证弹框, 估计关闭事件绑定在伪元素上,用了坐标判断,元素click方法没法触发 await this.page.mouse.move(boundingBox.x, boundingBox.y, { steps: 20 }); diff --git a/spiders/config.js b/spiders/config.js index b67a956..730e3c2 100644 --- a/spiders/config.js +++ b/spiders/config.js @@ -158,7 +158,7 @@ module.exports = { cnblogs: { urls: { login: '', - editor: 'https://i.cnblogs.com/articles/edit' + editor: 'https://i.cnblogs.com/posts/edit' }, loginSel: { username: '', diff --git a/spiders/segmentfault.js b/spiders/segmentfault.js index 14e62c9..d4602f0 100644 --- a/spiders/segmentfault.js +++ b/spiders/segmentfault.js @@ -3,10 +3,21 @@ const constants = require('../constants') class SegmentfaultSpider extends BaseSpider { async inputContent(article, editorSel) { - const footerContent = `
本篇文章由一文多发平台ArtiPub自动发布` - const content = article.content + footerContent - const el = document.querySelector('.CodeMirror') - el.CodeMirror.setValue(content) + //关掉提示 “你的专栏文章正在审核中,请耐心等待" + let warnWindow = document.querySelector(".btn-secondary"); + if(warnWindow) { + warnWindow.click(); + } + // const footerContent = `
本篇文章由一文多发平台ArtiPub自动发布`; + const footerContent = ""; + const content = article.content + footerContent; + console.log(content); + const el = document.querySelector(editorSel.content); + el.CodeMirror.setValue(content); + //输入tab,触发markdown 更新 + el.CodeMirror.execCommand("goDocEnd"); + el.CodeMirror.execCommand("insertTab"); + } async inputFooter(article, editorSel) { @@ -15,70 +26,83 @@ class SegmentfaultSpider extends BaseSpider { async afterInputEditor() { // 点击添加标签 - await this.page.click('#add-tag-btn') - await this.page.waitFor(3000) + await this.page.click(".tag-wrap"); + await this.page.waitFor(3000); // 输入并选择标签 - const tags = this.task.tag.split(',') - const elTagInput = await this.page.$('#searchTag') + const tags = this.task.tag.split(","); + const elTagInput = await this.page.$("#searchTag"); for (const tag of tags) { // 清除已有内容 await this.page.evaluate(() => { - const el = document.querySelector('#searchTag') - el.select() - document.execCommand('delete', false) - }) - await this.page.waitFor(1000) + const el = document.querySelector("#searchTag"); + el.select(); + document.execCommand("delete", false); + }); + await this.page.waitFor(1000); // 输入标签 - await elTagInput.type(tag) - await this.page.waitFor(3000) + await elTagInput.type(tag); + await this.page.waitFor(3000); await this.page.evaluate(() => { - const el = document.querySelector('#tagSearchResult > a:nth-child(1)') + const el = document.querySelector("#tagSearchResult > a:nth-child(1)"); if (el) { - el.click() + el.click(); } - }) - await this.page.waitFor(3000) + }); + await this.page.waitFor(3000); } - // 点击发布文章 - await this.page.click('#submitDiv > button') - await this.page.waitFor(1000) + + } + + async publish() { + // 发布文章 + //触发"发布文章"按钮的下拉列表 + await this.page.click(".show ,.dropdown"); + await this.page.waitFor(1000); + // 点击"确认发布" + await this.page.click("#sureSubmitBtn"); + await this.page.waitFor(10000); + + // 后续处理 + await this.afterPublish(); } async afterPublish() { - this.task.url = this.page.url() - this.task.updateTs = new Date() - this.task.status = constants.status.FINISHED - if (this.task.url.includes('https://segmentfault.com/a/')) { - await this.task.save() + this.task.url = this.page.url(); + this.task.updateTs = new Date(); + this.task.status = constants.status.FINISHED; + if (this.task.url.includes("https://segmentfault.com/a/")) { + await this.task.save(); } } async fetchStats() { - if (!this.task.url) return - await this.page.goto(this.task.url, { timeout: 60000 }) - await this.page.waitFor(5000) + if (!this.task.url) return; + await this.page.goto(this.task.url, { timeout: 60000 }); + await this.page.waitFor(5000); const stats = await this.page.evaluate(() => { - const text = document.querySelector('body').innerText - const mRead = text.match(/(\d+) 次阅读/) - const mComment = text.match(/(\d+) 条评论/) - const readNum = mRead ? Number(mRead[1]) : 0 - const likeNum = Number(document.querySelector('#side-widget-votes-num').innerText) - const commentNum = mComment ? Number(mComment[1]) : 0 + const text = document.querySelector("body").innerText; + const mRead = text.match(/(\d+) 次阅读/); + const mComment = text.match(/(\d+) 条评论/); + const readNum = mRead ? Number(mRead[1]) : 0; + const likeNum = Number( + document.querySelector("#side-widget-votes-num").innerText + ); + const commentNum = mComment ? Number(mComment[1]) : 0; return { readNum, likeNum, commentNum - } - }) - this.task.readNum = stats.readNum - this.task.likeNum = stats.likeNum - this.task.commentNum = stats.commentNum - await this.task.save() - await this.page.waitFor(3000) + }; + }); + this.task.readNum = stats.readNum; + this.task.likeNum = stats.likeNum; + this.task.commentNum = stats.commentNum; + await this.task.save(); + await this.page.waitFor(3000); } } From 0a28581519c00db547d7788c2caab048fe3ca05f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B3=A1=E6=B3=A1=E6=9C=BA=E4=B8=8D=E5=86=92=E6=B3=A1?= =?UTF-8?q?=E4=BA=86?= Date: Wed, 24 Feb 2021 13:41:47 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E5=85=BC=E5=AE=B9headless=E6=A8=A1?= =?UTF-8?q?=E5=BC=8F=E5=A4=B4=E6=9D=A1=E5=8F=91=E5=B8=83=EF=BC=8C=E6=9B=B4?= =?UTF-8?q?=E6=96=B0=E9=95=9C=E5=83=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 28 +++++++++++++++++++++------- docker-compose.yaml | 6 +++--- spiders/oschina.js | 1 + spiders/toutiao.js | 16 ++++++++++++++-- 4 files changed, 39 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 5ba50c0..1c8931f 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ ArtiPub 目前支持文章编辑、文章发布、数据统计的功能,后期 #### NPM 或源码安装 - MongoDB: 3.6+ -- NodeJS: 8.12+ +- NodeJS: 10+ ## 安装方式 @@ -71,18 +71,22 @@ ArtiPub 提供 3 种安装方式如下。 ### 通过 Docker 安装 -通过 Docker,可以免去安装 MongoDB 的步骤,也是我们最推荐的安装方式。使用 Docker 安装 ArtiPub 前,请确保您安装了 Docker 以及 Docker Compose。 +通过 Docker,可以免去安装 MongoDB 的步骤,也是我们最推荐的安装方式。使用 Docker 安装 ArtiPub 前,请确保您安装了 Docker 以及 Docker Compose。docker运行 ArtiPub 有两种方式。 -在您的项目目录下创建 `docker-compose.yaml` 文件,输入如下内容。 +- 通过 docker-compose.yaml 启动 + +适用于你本地之前没有运行 `mongodb` 容器。 在您的项目目录下创建 `docker-compose.yaml` 文件,输入如下内容。 ```yaml -version: '3.3' +version: "3.3" services: app: - image: "tikazyq/artipub:latest" + image: "tanliyuan123/artipub:1.0" environment: MONGO_HOST: "mongo" - ARTIPUB_API_ADDRESS: "http://localhost:3000" # 后端 API 地址,如果安装地址不在本机,请修改为协议 + 服务器 IP 地址 + 端口号(默认为 3000) + # MONGO_USERNAME: root + # MONGO_PASSWORD: example + ARTIPUB_API_ADDRESS: "http://localhost:3000" # 后端API地址,如果安装地址不在本机,请修改为协议+服务器IP地址+端口号(默认为3000) ports: - "8000:8000" # frontend - "3000:3000" # backend @@ -91,11 +95,13 @@ services: mongo: image: mongo:latest restart: always + #volumes: + # - "E:\\mongodb:/data/db" ports: - "27017:27017" ``` -然后在命令行中输入如下命令。 +然后在命令行中输入如下命令。如果你想再次启动容器时上次内容不会被销毁,去掉 `volumes` 两行的注释,改成自己本地路径即可。 ```bash docker-compose up @@ -105,6 +111,14 @@ docker-compose up 注意⚠️,如果您的 Docker 宿主机不是本机,例如您用了 Docker Machine 或者 Docker 服务在其他机器上,您需要将环境变量 `ARTIPUB_API_ADDRESS` 改为宿主机 IP + 端口号(默认 3000)。然后,在浏览器输入 `http://< 宿主机 IP>:8000` 即可看到界面。 + - 独立启动 artipub 镜像 + + 如果你本地已有启动的mongodb容器,不想用上面方式再起一个。其中 `goofy_ganguly` 为本地已启动的 mongodb 容器名, 替换成你本地的即可。 + + ```bash + docker run --rm -it --link goofy_ganguly -p 3000:3000/tcp -p 8000:8000/tcp tanliyuan123/artipub:1.0 + ``` + ### 通过 npm 包安装 如果您对 npm 熟悉,且已经有 MongoDB 的环境,这是最为快捷的方式。 diff --git a/docker-compose.yaml b/docker-compose.yaml index 9ff6d0f..262eaee 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,7 +1,7 @@ version: "3.3" services: app: - image: "artipub:1.0" + image: "tanliyuan123/artipub:1.0" environment: MONGO_HOST: "mongo" # MONGO_USERNAME: root @@ -15,7 +15,7 @@ services: mongo: image: mongo:latest restart: always - volumes: - - "E:\\mongodb:/data/db" + #volumes: + # - "E:\\mongodb:/data/db" ports: - "27017:27017" diff --git a/spiders/oschina.js b/spiders/oschina.js index e392be0..4ca59ad 100644 --- a/spiders/oschina.js +++ b/spiders/oschina.js @@ -27,6 +27,7 @@ class OschinaSpider extends BaseSpider { //切换到HTML编辑器 await this.page.click('#editorTabList a'); await this.page.waitForSelector('.cke_wysiwyg_frame'); + await this.page.waitForTimeout(500); } async inputContent(article, editorSel) { diff --git a/spiders/toutiao.js b/spiders/toutiao.js index 6584d38..51a6745 100644 --- a/spiders/toutiao.js +++ b/spiders/toutiao.js @@ -26,6 +26,10 @@ class ToutiaoSpider extends BaseSpider { await this.page.waitForTimeout(100); } + await this.page.waitForSelector(this.editorSel.content, { + visible: true + }); + } async inputFooter(article, editorSel) { @@ -50,13 +54,21 @@ class ToutiaoSpider extends BaseSpider { } async afterInputEditor() { + //部分分辨率会展开右侧发文助手,影响点击 + await this.page.evaluate(() => { + document.querySelector('.byte-drawer-close-icon')?.click(); + }); + await this.page.waitForTimeout(1000); + //处理图片,要点击下 const editLinks = await this.page.$$('.editor-image-menu > .image-menu-event-prevent:nth-child(2) > a'); + for (let element of editLinks) { await element.click(); - await this.page.waitForTimeout(2000); + + await this.page.waitForTimeout(1000); await this.page.click('.btns button:nth-child(2)'); - await this.page.waitForTimeout(2000); + await this.page.waitForTimeout(1000); }; }