-
-
Notifications
You must be signed in to change notification settings - Fork 95
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
64c29d1
commit 02ede0b
Showing
3 changed files
with
229 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
154 changes: 154 additions & 0 deletions
154
src/OpenDirectoryDownloader/Site/Copyparty/CopypartyParser.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
using AngleSharp.Dom; | ||
using AngleSharp.Html.Dom; | ||
using OpenDirectoryDownloader.Helpers; | ||
using OpenDirectoryDownloader.Shared.Models; | ||
using System.Net; | ||
using System.Text.RegularExpressions; | ||
|
||
namespace OpenDirectoryDownloader.Site.Copyparty; | ||
|
||
/// <summary> | ||
/// Similar to GoIndex | ||
/// </summary> | ||
public static class Copyparty | ||
{ | ||
private const string Parser = "Copyparty"; | ||
private static readonly Regex JsListingRegex = new("ls0\\s?=\\s?(?<Listing>.*);$", RegexOptions.Multiline); | ||
|
||
public static async Task<WebDirectory> ParseIndex(string baseUrl, HttpClient httpClient, WebDirectory webDirectory, IHtmlDocument htmlDocument, string html) | ||
{ | ||
try | ||
{ | ||
webDirectory = await ScanAsync(baseUrl, httpClient, webDirectory, htmlDocument, html); | ||
} | ||
catch (Exception ex) | ||
{ | ||
Program.Logger.Error(ex, "Error parsing {parser} for '{url}'", Parser, webDirectory.Url); | ||
webDirectory.Error = true; | ||
|
||
OpenDirectoryIndexer.Session.Errors++; | ||
|
||
if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) | ||
{ | ||
OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); | ||
} | ||
|
||
throw; | ||
} | ||
|
||
return webDirectory; | ||
} | ||
|
||
private static async Task<WebDirectory> ScanAsync(string baseUrl, HttpClient httpClient, WebDirectory webDirectory, IHtmlDocument htmlDocument, string html) | ||
{ | ||
Program.Logger.Debug("Processing listings for '{url}'", webDirectory.Uri); | ||
|
||
webDirectory.Parser = Parser; | ||
|
||
try | ||
{ | ||
IElement table = htmlDocument.QuerySelector("table#files"); | ||
|
||
IHtmlCollection<IElement> entries = table.QuerySelectorAll("tbody tr"); | ||
|
||
if (entries.Any()) | ||
{ | ||
foreach (IElement entry in entries) | ||
{ | ||
IHtmlAnchorElement link = entry.QuerySelector("td:nth-child(2) a") as IHtmlAnchorElement; | ||
IHtmlTableCellElement fileSize = entry.QuerySelector("td:nth-child(3)") as IHtmlTableCellElement; | ||
|
||
bool isDirectory = link.TextContent.EndsWith("/"); | ||
|
||
if (link is not null) | ||
{ | ||
Library.ProcessUrl(baseUrl, link, out _, out _, out string fullUrl); | ||
|
||
if (isDirectory) | ||
{ | ||
string directoryName = link.TextContent.TrimEnd('/'); | ||
|
||
webDirectory.Subdirectories.Add(new WebDirectory(webDirectory) | ||
{ | ||
Parser = Parser, | ||
Url = fullUrl, | ||
Name = directoryName | ||
}); | ||
} | ||
else | ||
{ | ||
webDirectory.Files.Add(new WebFile | ||
{ | ||
Url = fullUrl, | ||
FileName = Path.GetFileName(WebUtility.UrlDecode(fullUrl.Split('?')[0])), | ||
FileSize = FileSizeHelper.ParseFileSize(fileSize.TextContent) | ||
}); | ||
} | ||
} | ||
} | ||
|
||
webDirectory.ParsedSuccessfully = true; | ||
} | ||
else | ||
{ | ||
return ParseCopypartyJavaScriptListing(baseUrl, webDirectory, htmlDocument, html); | ||
} | ||
|
||
return webDirectory; | ||
} | ||
catch (Exception ex) | ||
{ | ||
Program.Logger.Error(ex, "Error processing {parser} for '{url}'", Parser, webDirectory.Url); | ||
webDirectory.Error = true; | ||
|
||
OpenDirectoryIndexer.Session.Errors++; | ||
|
||
if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) | ||
{ | ||
OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); | ||
} | ||
|
||
//throw; | ||
} | ||
|
||
return webDirectory; | ||
} | ||
|
||
private static WebDirectory ParseCopypartyJavaScriptListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlDocument htmlDocument, string html) | ||
{ | ||
Match jsListingRegexMatch = JsListingRegex.Match(html); | ||
|
||
if (!jsListingRegexMatch.Success) | ||
{ | ||
return parsedWebDirectory; | ||
} | ||
|
||
CopypartyListing copypartyListing = CopypartyListing.FromJson(jsListingRegexMatch.Groups["Listing"].Value); | ||
|
||
Uri baseUri = new(baseUrl); | ||
|
||
foreach (Dir dir in copypartyListing.Dirs) | ||
{ | ||
parsedWebDirectory.Subdirectories.Add(new WebDirectory(parsedWebDirectory) | ||
{ | ||
Parser = Parser, | ||
Url = new Uri(baseUri, dir.Href).ToString(), | ||
Name = dir.Name.TrimEnd('/') | ||
}); | ||
} | ||
|
||
foreach (Dir file in copypartyListing.Files) | ||
{ | ||
parsedWebDirectory.Files.Add(new WebFile | ||
{ | ||
Url = new Uri(baseUri, file.Href).ToString(), | ||
FileName = file.Name, | ||
FileSize = file.Sz | ||
}); | ||
} | ||
|
||
parsedWebDirectory.ParsedSuccessfully = true; | ||
|
||
return parsedWebDirectory; | ||
} | ||
} |
71 changes: 71 additions & 0 deletions
71
src/OpenDirectoryDownloader/Site/Copyparty/CopypartyResult.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
using Newtonsoft.Json; | ||
using Newtonsoft.Json.Converters; | ||
using System.Globalization; | ||
|
||
namespace OpenDirectoryDownloader.Site.Copyparty; | ||
|
||
public partial class CopypartyListing | ||
{ | ||
[JsonProperty("dirs")] | ||
public Dir[] Dirs { get; set; } | ||
|
||
[JsonProperty("files")] | ||
public Dir[] Files { get; set; } | ||
|
||
[JsonProperty("taglist")] | ||
public object[] Taglist { get; set; } | ||
} | ||
|
||
public partial class Dir | ||
{ | ||
[JsonProperty("dt")] | ||
public DateTimeOffset Dt { get; set; } | ||
|
||
[JsonProperty("ext")] | ||
public string Ext { get; set; } | ||
|
||
[JsonProperty("href")] | ||
public string Href { get; set; } | ||
|
||
[JsonProperty("lead")] | ||
public string Lead { get; set; } | ||
|
||
[JsonProperty("name")] | ||
public string Name { get; set; } | ||
|
||
[JsonProperty("sz")] | ||
public long Sz { get; set; } | ||
|
||
[JsonProperty("tags")] | ||
public Tags Tags { get; set; } | ||
|
||
[JsonProperty("ts")] | ||
public long Ts { get; set; } | ||
} | ||
|
||
public partial class Tags | ||
{ | ||
} | ||
|
||
public partial class CopypartyListing | ||
{ | ||
public static CopypartyListing FromJson(string json) => JsonConvert.DeserializeObject<CopypartyListing>(json, Converter.Settings); | ||
} | ||
|
||
public static class Serialize | ||
{ | ||
public static string ToJson(this CopypartyListing self) => JsonConvert.SerializeObject(self, Converter.Settings); | ||
} | ||
|
||
internal static class Converter | ||
{ | ||
public static readonly JsonSerializerSettings Settings = new JsonSerializerSettings | ||
{ | ||
MetadataPropertyHandling = MetadataPropertyHandling.Ignore, | ||
DateParseHandling = DateParseHandling.None, | ||
Converters = | ||
{ | ||
new IsoDateTimeConverter { DateTimeStyles = DateTimeStyles.AssumeUniversal } | ||
}, | ||
}; | ||
} |