From 016f0a5cc517a07f5d8b3cfb57b1502b95704bc9 Mon Sep 17 00:00:00 2001 From: KoalaBear Date: Sun, 12 Jul 2020 21:51:56 +0200 Subject: [PATCH] - Very ugly way to speed up the last resort (link parsing) when the whole HTML does not contain a .fileSize class --- OpenDirectoryDownloader/DirectoryParser.cs | 28 ++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/OpenDirectoryDownloader/DirectoryParser.cs b/OpenDirectoryDownloader/DirectoryParser.cs index e7ebc7cd..6b6942c9 100644 --- a/OpenDirectoryDownloader/DirectoryParser.cs +++ b/OpenDirectoryDownloader/DirectoryParser.cs @@ -154,7 +154,8 @@ public static async Task ParseHtml(WebDirectory webDirectory, stri if (tables.Any()) { - return ParseTablesDirectoryListing(baseUrl, parsedWebDirectory, tables); + bool containsFileSizeClass = htmlDocument.QuerySelector(".fileSize") != null; + return ParseTablesDirectoryListing(baseUrl, parsedWebDirectory, tables, containsFileSizeClass); } IHtmlCollection materialDesignListItems = htmlDocument.QuerySelectorAll("ul.mdui-list li"); @@ -173,7 +174,8 @@ public static async Task ParseHtml(WebDirectory webDirectory, stri if (listItems.Any()) { - WebDirectory result = ParseListItemsDirectoryListing(baseUrl, parsedWebDirectory, listItems); + bool containsFileSizeClass = htmlDocument.QuerySelector(".fileSize") != null; + WebDirectory result = ParseListItemsDirectoryListing(baseUrl, parsedWebDirectory, listItems, containsFileSizeClass); if (result.ParsedSuccesfully || result.Error) { @@ -185,7 +187,8 @@ public static async Task ParseHtml(WebDirectory webDirectory, stri if (listItems.Any()) { - WebDirectory result = ParseListItemsDirectoryListing(baseUrl, parsedWebDirectory, listItems); + bool containsFileSizeClass = htmlDocument.QuerySelector(".fileSize") != null; + WebDirectory result = ParseListItemsDirectoryListing(baseUrl, parsedWebDirectory, listItems, containsFileSizeClass); if (result.ParsedSuccesfully || result.Error) { @@ -198,7 +201,8 @@ public static async Task ParseHtml(WebDirectory webDirectory, stri if (links.Any()) { - parsedWebDirectory = ParseLinksDirectoryListing(baseUrl, parsedWebDirectory, links); + bool containsFileSizeClass = htmlDocument.QuerySelector(".fileSize") != null; + parsedWebDirectory = ParseLinksDirectoryListing(baseUrl, parsedWebDirectory, links, containsFileSizeClass); } parsedWebDirectory = await ParseDirectoryListingModel01(baseUrl, parsedWebDirectory, htmlDocument, httpClient); @@ -621,7 +625,7 @@ private static WebDirectory ParseH5aiDirectoryListing(string baseUrl, WebDirecto return parsedWebDirectory; } - private static WebDirectory ParseTablesDirectoryListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlCollection tables) + private static WebDirectory ParseTablesDirectoryListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlCollection tables, bool containsFileSizeClass) { // Dirty solution.. bool hasSeperateDirectoryAndFilesTables = false; @@ -648,7 +652,7 @@ private static WebDirectory ParseTablesDirectoryListing(string baseUrl, WebDirec { if (table.QuerySelector("a") != null) { - webDirectoryCopy = ParseLinksDirectoryListing(baseUrl, webDirectoryCopy, table.QuerySelectorAll("a")); + webDirectoryCopy = ParseLinksDirectoryListing(baseUrl, webDirectoryCopy, table.QuerySelectorAll("a"), containsFileSizeClass); } } else @@ -1542,7 +1546,7 @@ private static WebDirectory ParseDirectoryListerDirectoryListing(string baseUrl, return parsedWebDirectory; } - private static WebDirectory ParseListItemsDirectoryListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlCollection listItems) + private static WebDirectory ParseListItemsDirectoryListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlCollection listItems, bool containsFileSizeClass) { bool firstLink = true; @@ -1562,7 +1566,7 @@ private static WebDirectory ParseListItemsDirectoryListing(string baseUrl, WebDi if (link != null) { - ProcessLink(baseUrl, parsedWebDirectory, link, "ParseListItemsDirectoryListing"); + ProcessLink(baseUrl, parsedWebDirectory, link, "ParseListItemsDirectoryListing", containsFileSizeClass); } } @@ -1571,11 +1575,11 @@ private static WebDirectory ParseListItemsDirectoryListing(string baseUrl, WebDi return parsedWebDirectory; } - private static WebDirectory ParseLinksDirectoryListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlCollection links) + private static WebDirectory ParseLinksDirectoryListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlCollection links, bool containsFileSizeClass) { foreach (IElement link in links) { - ProcessLink(baseUrl, parsedWebDirectory, link, "ParseLinksDirectoryListing"); + ProcessLink(baseUrl, parsedWebDirectory, link, "ParseLinksDirectoryListing", containsFileSizeClass); } CheckParsedResults(parsedWebDirectory); @@ -1583,7 +1587,7 @@ private static WebDirectory ParseLinksDirectoryListing(string baseUrl, WebDirect return parsedWebDirectory; } - private static void ProcessLink(string baseUrl, WebDirectory parsedWebDirectory, IElement link, string parser) + private static void ProcessLink(string baseUrl, WebDirectory parsedWebDirectory, IElement link, string parser, bool containsFileSizeClass) { if (link.HasAttribute("href")) { @@ -1617,7 +1621,7 @@ private static void ProcessLink(string baseUrl, WebDirectory parsedWebDirectory, { parsedWebDirectory.Parser = parser; - long fileSize = FileSizeHelper.ParseFileSize(link.ParentElement?.QuerySelector(".fileSize")?.TextContent); + long fileSize = FileSizeHelper.ParseFileSize(containsFileSizeClass ? link.ParentElement?.QuerySelector(".fileSize")?.TextContent : null); string fileName = Path.GetFileName(WebUtility.UrlDecode(linkHref)); urlEncodingParser = new UrlEncodingParser(fileName);