diff --git a/Grimoire.Commons/Grimoire.Commons.csproj b/Grimoire.Commons/Grimoire.Commons.csproj
index 2a83277..43a4cb7 100644
--- a/Grimoire.Commons/Grimoire.Commons.csproj
+++ b/Grimoire.Commons/Grimoire.Commons.csproj
@@ -6,7 +6,7 @@
-
+
diff --git a/Grimoire.Commons/HtmlParser.cs b/Grimoire.Commons/HtmlParser.cs
index f72ff27..5d2ac4d 100644
--- a/Grimoire.Commons/HtmlParser.cs
+++ b/Grimoire.Commons/HtmlParser.cs
@@ -13,25 +13,6 @@ public class HtmlParser(ILogger logger,
Configuration.Default.WithDefaultLoader()
);
- public async Task ParseAsync(string url) {
- var retries = 0;
- IDocument document;
- do {
- var content = await GetContentAsync(url);
- await using var stream = await content.ReadAsStreamAsync();
- document = await _context.OpenAsync(x => x.Content(stream));
- await document.WaitForReadyAsync();
- if (document.All.Length == 3) {
- retries++;
- continue;
- }
-
- break;
- } while (retries <= configuration.GetValue("Http:Retries"));
-
- return document;
- }
-
public async Task GetContentAsync(string url) {
try {
var requestMessage = new HttpRequestMessage {
@@ -69,15 +50,71 @@ public Task ParseHtmlAsync(string html) {
public async Task DownloadAsync(string url, string output) {
try {
- var content = await GetContentAsync(url);
- var fileName =
- (content.Headers.ContentDisposition?.FileNameStar
- ?? url.Split('/')[^1]).Clean();
+ var requestMessage = new HttpRequestMessage {
+ Method = HttpMethod.Get,
+ RequestUri = new Uri(url),
+ Headers = {
+ {
+ "User-Agent", configuration.GetSection("Http:UserAgents").Get().RandomItem()
+ }
+ }
+ };
+
+ await Task.Delay(Random.Shared.Next(configuration.GetValue("Http:Delay")));
+ using var responseMessage = await httpClient.SendAsync(requestMessage);
+ if (!responseMessage.IsSuccessStatusCode) {
+ logger.LogError("{}\n{}", responseMessage.StatusCode, responseMessage.ReasonPhrase);
+ throw new Exception(responseMessage.ReasonPhrase);
+ }
+
+ var fileName = (responseMessage.Content.Headers.ContentDisposition?.FileNameStar
+ ?? url.Split('/')[^1]).Clean();
await using var fs = new FileStream($"{output}/{fileName}", FileMode.CreateNew);
- await content.CopyToAsync(fs);
+ await responseMessage.Content.CopyToAsync(fs);
+ }
+ catch (Exception exception) {
+ logger.LogError("Failed to download {}\n{}", url, exception);
+ }
+ }
+
+ public async Task ParseAsync(string url) {
+ try {
+ var retries = 0;
+ IDocument document;
+ do {
+ var requestMessage = new HttpRequestMessage {
+ Method = HttpMethod.Get,
+ RequestUri = new Uri(url),
+ Headers = {
+ {
+ "User-Agent", configuration.GetSection("Http:UserAgents").Get().RandomItem()
+ }
+ }
+ };
+
+ await Task.Delay(Random.Shared.Next(configuration.GetValue("Http:Delay")));
+ using var responseMessage = await httpClient.SendAsync(requestMessage);
+ if (!responseMessage.IsSuccessStatusCode) {
+ logger.LogError("{}\n{}", responseMessage.StatusCode, responseMessage.ReasonPhrase);
+ throw new Exception(responseMessage.ReasonPhrase);
+ }
+
+ await using var stream = await responseMessage.Content.ReadAsStreamAsync();
+ document = await _context.OpenAsync(x => x.Content(stream));
+ await document.WaitForReadyAsync();
+ if (document.All.Length == 3) {
+ retries++;
+ continue;
+ }
+
+ break;
+ } while (retries <= configuration.GetValue("Http:Retries"));
+
+ return document;
}
- catch {
- logger.LogError("Failed to download {}", url);
+ catch (Exception exception) {
+ logger.LogError("Failed to get {}\n{}", url, exception);
+ throw;
}
}
}
\ No newline at end of file