From a73e45f8468fcb87714859f7db0f5a35443d76d6 Mon Sep 17 00:00:00 2001 From: KoalaBear Date: Sun, 26 May 2019 13:24:55 +0200 Subject: [PATCH] - Moved some things to Constants class --- OpenDirectoryDownloader/Constants.cs | 13 +++++++++ OpenDirectoryDownloader/DirectoryParser.cs | 3 +- .../OpenDirectoryIndexer.cs | 29 +++++++++---------- 3 files changed, 29 insertions(+), 16 deletions(-) create mode 100644 OpenDirectoryDownloader/Constants.cs diff --git a/OpenDirectoryDownloader/Constants.cs b/OpenDirectoryDownloader/Constants.cs new file mode 100644 index 00000000..0d00103c --- /dev/null +++ b/OpenDirectoryDownloader/Constants.cs @@ -0,0 +1,13 @@ +namespace OpenDirectoryDownloader +{ + public class Constants + { + public const string GoogleDriveDomain = "drive.google.com"; + + public class UserAgent + { + public const string Curl = "curl/7.55.1"; + public const string Chrome = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3800.0 Safari/537.36"; + } + } +} diff --git a/OpenDirectoryDownloader/DirectoryParser.cs b/OpenDirectoryDownloader/DirectoryParser.cs index 13c31cb5..1ee6694a 100644 --- a/OpenDirectoryDownloader/DirectoryParser.cs +++ b/OpenDirectoryDownloader/DirectoryParser.cs @@ -51,7 +51,7 @@ public static async Task ParseHtml(WebDirectory webDirectory, stri try { - if (webDirectory.Uri.Host == "drive.google.com") + if (webDirectory.Uri.Host == Constants.GoogleDriveDomain) { return await GoogleDriveIndexer.IndexAsync(webDirectory); //return GoogleDriveParser.ParseGoogleDriveHtml(html, webDirectory); @@ -1362,6 +1362,7 @@ private static void ProcessLink(string baseUrl, WebDirectory parsedWebDirectory, { Url = fullUrl, FileName = Path.GetFileName(WebUtility.UrlDecode(linkHref)), + //FileName = link.TextContent.Trim(), FileSize = fileSize, }); } diff --git a/OpenDirectoryDownloader/OpenDirectoryIndexer.cs b/OpenDirectoryDownloader/OpenDirectoryIndexer.cs index 082f60a5..892c7f4b 100644 --- a/OpenDirectoryDownloader/OpenDirectoryIndexer.cs +++ b/OpenDirectoryDownloader/OpenDirectoryIndexer.cs @@ -58,9 +58,6 @@ public class OpenDirectoryIndexer } ); - private const string UserAgent_Curl = "curl/7.55.1"; - private const string UserAgent_Chrome = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3642.0 Safari/537.36"; - public OpenDirectoryIndexer(OpenDirectoryIndexerSettings openDirectoryIndexerSettings) { OpenDirectoryIndexerSettings = openDirectoryIndexerSettings; @@ -81,8 +78,8 @@ public OpenDirectoryIndexer(OpenDirectoryIndexerSettings openDirectoryIndexerSet WebDirectoryProcessors = new Task[OpenDirectoryIndexerSettings.Threads]; WebFileFileSizeProcessors = new Task[OpenDirectoryIndexerSettings.Threads]; - //HttpClient.DefaultRequestHeaders.Add("User-Agent", UserAgent_Curl); - //HttpClient.DefaultRequestHeaders.Add("User-Agent", UserAgent_Chrome); + //HttpClient.DefaultRequestHeaders.Add("User-Agent", Constants.UserAgent.Curl); + //HttpClient.DefaultRequestHeaders.Add("User-Agent", Constants.UserAgent.Chrome); } public async void StartIndexingAsync() @@ -109,7 +106,7 @@ public async void StartIndexingAsync() }; } - if (Session.Root.Uri.Host == "drive.google.com") + if (Session.Root.Uri.Host == Constants.GoogleDriveDomain) { Logger.Warn("Google Drive scanning is limited to 10 directories per second!"); } @@ -372,7 +369,7 @@ private async Task WebDirectoryProcessor(ConcurrentQueue queue, st AddProcessedWebDirectory(webDirectory, parsedWebDirectory); } else - if (Session.Root.Uri.Host == "drive.google.com") + if (Session.Root.Uri.Host == Constants.GoogleDriveDomain) { string baseUrl = webDirectory.Url; @@ -387,9 +384,11 @@ private async Task WebDirectoryProcessor(ConcurrentQueue queue, st { Logger.Debug($"[{name}] Start download '{webDirectory.Url}'"); Session.TotalHttpRequests++; - Context pollyContext = new Context(); - pollyContext.Add("Processor", name); - pollyContext.Add("WebDirectory", webDirectory); + Context pollyContext = new Context + { + { "Processor", name }, + { "WebDirectory", webDirectory } + }; await RetryPolicy.ExecuteAsync(ctx => ProcessWebDirectoryAsync(name, webDirectory), pollyContext); } else @@ -455,7 +454,7 @@ private async Task ProcessWebDirectoryAsync(string name, WebDirectory webDirecto { Logger.Warn("First request fails, using Curl fallback User-Agent"); HttpClient.DefaultRequestHeaders.UserAgent.Clear(); - HttpClient.DefaultRequestHeaders.UserAgent.ParseAdd(UserAgent_Curl); + HttpClient.DefaultRequestHeaders.UserAgent.ParseAdd(Constants.UserAgent.Curl); httpResponseMessage = await HttpClient.GetAsync(webDirectory.Url); if (httpResponseMessage.IsSuccessStatusCode) @@ -463,7 +462,7 @@ private async Task ProcessWebDirectoryAsync(string name, WebDirectory webDirecto SetRootUrl(httpResponseMessage); html = await GetHtml(httpResponseMessage); - Logger.Warn("Yes, this Curl User-Agent did the trick!"); + Logger.Warn("Yes, the Curl User-Agent did the trick!"); } } @@ -471,7 +470,7 @@ private async Task ProcessWebDirectoryAsync(string name, WebDirectory webDirecto { Logger.Warn("First request fails, using Chrome fallback User-Agent"); HttpClient.DefaultRequestHeaders.UserAgent.Clear(); - HttpClient.DefaultRequestHeaders.UserAgent.ParseAdd(UserAgent_Chrome); + HttpClient.DefaultRequestHeaders.UserAgent.ParseAdd(Constants.UserAgent.Chrome); httpResponseMessage = await HttpClient.GetAsync(webDirectory.Url); if (httpResponseMessage.IsSuccessStatusCode) @@ -621,7 +620,7 @@ private void AddProcessedWebDirectory(WebDirectory webDirectory, WebDirectory pa { if (!Session.ProcessedUrls.Contains(subdirectory.Url)) { - if (subdirectory.Uri.Host != "drive.google.com" && (subdirectory.Uri.Host != Session.Root.Uri.Host || !subdirectory.Uri.LocalPath.StartsWith(Session.Root.Uri.LocalPath))) + if (subdirectory.Uri.Host != Constants.GoogleDriveDomain && (subdirectory.Uri.Host != Session.Root.Uri.Host || !subdirectory.Uri.LocalPath.StartsWith(Session.Root.Uri.LocalPath))) { Logger.Debug($"Removed subdirectory {subdirectory.Uri} from parsed webdirectory because it is not the same host"); } @@ -646,7 +645,7 @@ private void AddProcessedWebDirectory(WebDirectory webDirectory, WebDirectory pa { Uri uri = new Uri(f.Url); - if (uri.Host == "drive.google.com") + if (uri.Host == Constants.GoogleDriveDomain) { return false; }