Skip to content

Commit

Permalink
- Very ugly way to speed up the last resort (link parsing) when the w…
Browse files Browse the repository at this point in the history
…hole HTML does not contain a .fileSize class
  • Loading branch information
KoalaBear84 committed Jul 12, 2020
1 parent 35d906f commit 016f0a5
Showing 1 changed file with 16 additions and 12 deletions.
28 changes: 16 additions & 12 deletions OpenDirectoryDownloader/DirectoryParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ public static async Task<WebDirectory> ParseHtml(WebDirectory webDirectory, stri

if (tables.Any())
{
return ParseTablesDirectoryListing(baseUrl, parsedWebDirectory, tables);
bool containsFileSizeClass = htmlDocument.QuerySelector(".fileSize") != null;
return ParseTablesDirectoryListing(baseUrl, parsedWebDirectory, tables, containsFileSizeClass);
}

IHtmlCollection<IElement> materialDesignListItems = htmlDocument.QuerySelectorAll("ul.mdui-list li");
Expand All @@ -173,7 +174,8 @@ public static async Task<WebDirectory> ParseHtml(WebDirectory webDirectory, stri

if (listItems.Any())
{
WebDirectory result = ParseListItemsDirectoryListing(baseUrl, parsedWebDirectory, listItems);
bool containsFileSizeClass = htmlDocument.QuerySelector(".fileSize") != null;
WebDirectory result = ParseListItemsDirectoryListing(baseUrl, parsedWebDirectory, listItems, containsFileSizeClass);

if (result.ParsedSuccesfully || result.Error)
{
Expand All @@ -185,7 +187,8 @@ public static async Task<WebDirectory> ParseHtml(WebDirectory webDirectory, stri

if (listItems.Any())
{
WebDirectory result = ParseListItemsDirectoryListing(baseUrl, parsedWebDirectory, listItems);
bool containsFileSizeClass = htmlDocument.QuerySelector(".fileSize") != null;
WebDirectory result = ParseListItemsDirectoryListing(baseUrl, parsedWebDirectory, listItems, containsFileSizeClass);

if (result.ParsedSuccesfully || result.Error)
{
Expand All @@ -198,7 +201,8 @@ public static async Task<WebDirectory> ParseHtml(WebDirectory webDirectory, stri

if (links.Any())
{
parsedWebDirectory = ParseLinksDirectoryListing(baseUrl, parsedWebDirectory, links);
bool containsFileSizeClass = htmlDocument.QuerySelector(".fileSize") != null;
parsedWebDirectory = ParseLinksDirectoryListing(baseUrl, parsedWebDirectory, links, containsFileSizeClass);
}

parsedWebDirectory = await ParseDirectoryListingModel01(baseUrl, parsedWebDirectory, htmlDocument, httpClient);
Expand Down Expand Up @@ -621,7 +625,7 @@ private static WebDirectory ParseH5aiDirectoryListing(string baseUrl, WebDirecto
return parsedWebDirectory;
}

private static WebDirectory ParseTablesDirectoryListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlCollection<IElement> tables)
private static WebDirectory ParseTablesDirectoryListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlCollection<IElement> tables, bool containsFileSizeClass)
{
// Dirty solution..
bool hasSeperateDirectoryAndFilesTables = false;
Expand All @@ -648,7 +652,7 @@ private static WebDirectory ParseTablesDirectoryListing(string baseUrl, WebDirec
{
if (table.QuerySelector("a") != null)
{
webDirectoryCopy = ParseLinksDirectoryListing(baseUrl, webDirectoryCopy, table.QuerySelectorAll("a"));
webDirectoryCopy = ParseLinksDirectoryListing(baseUrl, webDirectoryCopy, table.QuerySelectorAll("a"), containsFileSizeClass);
}
}
else
Expand Down Expand Up @@ -1542,7 +1546,7 @@ private static WebDirectory ParseDirectoryListerDirectoryListing(string baseUrl,
return parsedWebDirectory;
}

private static WebDirectory ParseListItemsDirectoryListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlCollection<IElement> listItems)
private static WebDirectory ParseListItemsDirectoryListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlCollection<IElement> listItems, bool containsFileSizeClass)
{
bool firstLink = true;

Expand All @@ -1562,7 +1566,7 @@ private static WebDirectory ParseListItemsDirectoryListing(string baseUrl, WebDi

if (link != null)
{
ProcessLink(baseUrl, parsedWebDirectory, link, "ParseListItemsDirectoryListing");
ProcessLink(baseUrl, parsedWebDirectory, link, "ParseListItemsDirectoryListing", containsFileSizeClass);
}
}

Expand All @@ -1571,19 +1575,19 @@ private static WebDirectory ParseListItemsDirectoryListing(string baseUrl, WebDi
return parsedWebDirectory;
}

private static WebDirectory ParseLinksDirectoryListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlCollection<IElement> links)
private static WebDirectory ParseLinksDirectoryListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlCollection<IElement> links, bool containsFileSizeClass)
{
foreach (IElement link in links)
{
ProcessLink(baseUrl, parsedWebDirectory, link, "ParseLinksDirectoryListing");
ProcessLink(baseUrl, parsedWebDirectory, link, "ParseLinksDirectoryListing", containsFileSizeClass);
}

CheckParsedResults(parsedWebDirectory);

return parsedWebDirectory;
}

private static void ProcessLink(string baseUrl, WebDirectory parsedWebDirectory, IElement link, string parser)
private static void ProcessLink(string baseUrl, WebDirectory parsedWebDirectory, IElement link, string parser, bool containsFileSizeClass)
{
if (link.HasAttribute("href"))
{
Expand Down Expand Up @@ -1617,7 +1621,7 @@ private static void ProcessLink(string baseUrl, WebDirectory parsedWebDirectory,
{
parsedWebDirectory.Parser = parser;

long fileSize = FileSizeHelper.ParseFileSize(link.ParentElement?.QuerySelector(".fileSize")?.TextContent);
long fileSize = FileSizeHelper.ParseFileSize(containsFileSizeClass ? link.ParentElement?.QuerySelector(".fileSize")?.TextContent : null);

string fileName = Path.GetFileName(WebUtility.UrlDecode(linkHref));
urlEncodingParser = new UrlEncodingParser(fileName);
Expand Down

0 comments on commit 016f0a5

Please sign in to comment.