Skip to content

Commit

Permalink
TestSelenium爬虫小说测试
Browse files Browse the repository at this point in the history
  • Loading branch information
dolyw committed Apr 20, 2022
1 parent 7b10b28 commit 3ea26f6
Showing 1 changed file with 35 additions and 8 deletions.
43 changes: 35 additions & 8 deletions TestWeb/src/test/java/com/wang/other/TestSelenium.java
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public void test02() throws Exception {
}

@Test
public void test03() throws Exception {
public void test03() throws Exception{
String fileName = "D:\\Program Files (x86)\\Reader_v1.9.3.2\\天命大反派.txt";
if (!FileUtil.exist(fileName)) {
File file = new File(fileName);
Expand Down Expand Up @@ -134,18 +134,19 @@ public void test03() throws Exception {
WebDriver chromeDriver = new ChromeDriver(chromeOptions);

TestSelenium.text(chromeDriver, "https://www.xs123.org/xs/33/33112/21404025.html", fileName);
// TestSelenium.text(chromeDriver, "https://www.xs123.org/xs/33/33112/70349769.html", fileName);
// TestSelenium.text(chromeDriver, "https://www.xs123.org/xs/33/33112/70963309.html", fileName);

chromeDriver.close();
}

public static void text(WebDriver chromeDriver, String url, String fileName) {
public static void text(WebDriver chromeDriver, String url, String fileName) throws Exception {
chromeDriver.get(url);
Thread.sleep(100);
WebElement boxWebElement = chromeDriver.findElement(By.className("box_con"));
WebElement titleElement = boxWebElement.findElement(By.tagName("h1"));

// System.out.println(titleElement.getText());
if (titleElement.getText().contains("950")) {
if (titleElement.getText().contains("1030")) {
return;
}
if ("玄幻:我!天命大反派".equals(titleElement.getText())) {
Expand All @@ -154,17 +155,43 @@ public static void text(WebDriver chromeDriver, String url, String fileName) {

// 标题
List<String> lines = new ArrayList<>();
lines.add("第" + titleElement.getText().substring(0,4) + "章 " + titleElement.getText().substring(4));
lines.add("");
String title = "第" + titleElement.getText().substring(0,4).trim() + "章 " + titleElement.getText().substring(4).trim();
title.replaceAll("/?", "");
title.replaceAll(":", "");
System.out.println(title);
lines.add(title);

// 正文
WebElement conWebElement = chromeDriver.findElement(By.id("content"));
String con = conWebElement.getText();
// con = con.replaceAll("<br/>", "/r/n");
lines.add(con);
con = con.replaceAll("<br/>", "");
con = con.replaceAll("\n", "");
con = con.replaceAll(",", ",");
String[] conArray = con.split("。");
for (String text : conArray) {
if (text.length() > 50) {
String[] conArray2 = text.split(",");
for (int i = 0; i < conArray2.length; i++) {
if (i + 2 < conArray2.length) {
lines.add(conArray2[i] + "," + conArray2[++i] + "," + conArray2[++i] + ",");
} else if (i + 1 < conArray2.length) {
lines.add(conArray2[i] + "," + conArray2[++i] + ",");
} else {
lines.add(conArray2[i] + "。");
}
lines.add("");
}
lines.add("");
} else {
lines.add(text + "。");
lines.add("");
}
}

FileUtil.appendUtf8Lines(lines, fileName);

Thread.sleep(10);

WebElement btnWebElement = chromeDriver.findElement(By.className("bottem2"));
List<WebElement> btnListWebElement = btnWebElement.findElements(By.tagName("a"));

Expand Down

0 comments on commit 3ea26f6

Please sign in to comment.