Skip to content

Commit

Permalink
- Moved some things to Constants class
Browse files Browse the repository at this point in the history
  • Loading branch information
KoalaBear84 committed May 26, 2019
1 parent a4b0f21 commit a73e45f
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 16 deletions.
13 changes: 13 additions & 0 deletions OpenDirectoryDownloader/Constants.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
namespace OpenDirectoryDownloader
{
public class Constants
{
public const string GoogleDriveDomain = "drive.google.com";

public class UserAgent
{
public const string Curl = "curl/7.55.1";
public const string Chrome = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3800.0 Safari/537.36";
}
}
}
3 changes: 2 additions & 1 deletion OpenDirectoryDownloader/DirectoryParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public static async Task<WebDirectory> ParseHtml(WebDirectory webDirectory, stri

try
{
if (webDirectory.Uri.Host == "drive.google.com")
if (webDirectory.Uri.Host == Constants.GoogleDriveDomain)
{
return await GoogleDriveIndexer.IndexAsync(webDirectory);
//return GoogleDriveParser.ParseGoogleDriveHtml(html, webDirectory);
Expand Down Expand Up @@ -1362,6 +1362,7 @@ private static void ProcessLink(string baseUrl, WebDirectory parsedWebDirectory,
{
Url = fullUrl,
FileName = Path.GetFileName(WebUtility.UrlDecode(linkHref)),
//FileName = link.TextContent.Trim(),
FileSize = fileSize,
});
}
Expand Down
29 changes: 14 additions & 15 deletions OpenDirectoryDownloader/OpenDirectoryIndexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,6 @@ public class OpenDirectoryIndexer
}
);

private const string UserAgent_Curl = "curl/7.55.1";
private const string UserAgent_Chrome = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3642.0 Safari/537.36";

public OpenDirectoryIndexer(OpenDirectoryIndexerSettings openDirectoryIndexerSettings)
{
OpenDirectoryIndexerSettings = openDirectoryIndexerSettings;
Expand All @@ -81,8 +78,8 @@ public OpenDirectoryIndexer(OpenDirectoryIndexerSettings openDirectoryIndexerSet
WebDirectoryProcessors = new Task[OpenDirectoryIndexerSettings.Threads];
WebFileFileSizeProcessors = new Task[OpenDirectoryIndexerSettings.Threads];

//HttpClient.DefaultRequestHeaders.Add("User-Agent", UserAgent_Curl);
//HttpClient.DefaultRequestHeaders.Add("User-Agent", UserAgent_Chrome);
//HttpClient.DefaultRequestHeaders.Add("User-Agent", Constants.UserAgent.Curl);
//HttpClient.DefaultRequestHeaders.Add("User-Agent", Constants.UserAgent.Chrome);
}

public async void StartIndexingAsync()
Expand All @@ -109,7 +106,7 @@ public async void StartIndexingAsync()
};
}

if (Session.Root.Uri.Host == "drive.google.com")
if (Session.Root.Uri.Host == Constants.GoogleDriveDomain)
{
Logger.Warn("Google Drive scanning is limited to 10 directories per second!");
}
Expand Down Expand Up @@ -372,7 +369,7 @@ private async Task WebDirectoryProcessor(ConcurrentQueue<WebDirectory> queue, st
AddProcessedWebDirectory(webDirectory, parsedWebDirectory);
}
else
if (Session.Root.Uri.Host == "drive.google.com")
if (Session.Root.Uri.Host == Constants.GoogleDriveDomain)
{
string baseUrl = webDirectory.Url;

Expand All @@ -387,9 +384,11 @@ private async Task WebDirectoryProcessor(ConcurrentQueue<WebDirectory> queue, st
{
Logger.Debug($"[{name}] Start download '{webDirectory.Url}'");
Session.TotalHttpRequests++;
Context pollyContext = new Context();
pollyContext.Add("Processor", name);
pollyContext.Add("WebDirectory", webDirectory);
Context pollyContext = new Context
{
{ "Processor", name },
{ "WebDirectory", webDirectory }
};
await RetryPolicy.ExecuteAsync(ctx => ProcessWebDirectoryAsync(name, webDirectory), pollyContext);
}
else
Expand Down Expand Up @@ -455,23 +454,23 @@ private async Task ProcessWebDirectoryAsync(string name, WebDirectory webDirecto
{
Logger.Warn("First request fails, using Curl fallback User-Agent");
HttpClient.DefaultRequestHeaders.UserAgent.Clear();
HttpClient.DefaultRequestHeaders.UserAgent.ParseAdd(UserAgent_Curl);
HttpClient.DefaultRequestHeaders.UserAgent.ParseAdd(Constants.UserAgent.Curl);
httpResponseMessage = await HttpClient.GetAsync(webDirectory.Url);

if (httpResponseMessage.IsSuccessStatusCode)
{
SetRootUrl(httpResponseMessage);

html = await GetHtml(httpResponseMessage);
Logger.Warn("Yes, this Curl User-Agent did the trick!");
Logger.Warn("Yes, the Curl User-Agent did the trick!");
}
}

if (FirstRequest && !httpResponseMessage.IsSuccessStatusCode || httpResponseMessage.IsSuccessStatusCode && string.IsNullOrWhiteSpace(html))
{
Logger.Warn("First request fails, using Chrome fallback User-Agent");
HttpClient.DefaultRequestHeaders.UserAgent.Clear();
HttpClient.DefaultRequestHeaders.UserAgent.ParseAdd(UserAgent_Chrome);
HttpClient.DefaultRequestHeaders.UserAgent.ParseAdd(Constants.UserAgent.Chrome);
httpResponseMessage = await HttpClient.GetAsync(webDirectory.Url);

if (httpResponseMessage.IsSuccessStatusCode)
Expand Down Expand Up @@ -621,7 +620,7 @@ private void AddProcessedWebDirectory(WebDirectory webDirectory, WebDirectory pa
{
if (!Session.ProcessedUrls.Contains(subdirectory.Url))
{
if (subdirectory.Uri.Host != "drive.google.com" && (subdirectory.Uri.Host != Session.Root.Uri.Host || !subdirectory.Uri.LocalPath.StartsWith(Session.Root.Uri.LocalPath)))
if (subdirectory.Uri.Host != Constants.GoogleDriveDomain && (subdirectory.Uri.Host != Session.Root.Uri.Host || !subdirectory.Uri.LocalPath.StartsWith(Session.Root.Uri.LocalPath)))
{
Logger.Debug($"Removed subdirectory {subdirectory.Uri} from parsed webdirectory because it is not the same host");
}
Expand All @@ -646,7 +645,7 @@ private void AddProcessedWebDirectory(WebDirectory webDirectory, WebDirectory pa
{
Uri uri = new Uri(f.Url);

if (uri.Host == "drive.google.com")
if (uri.Host == Constants.GoogleDriveDomain)
{
return false;
}
Expand Down

0 comments on commit a73e45f

Please sign in to comment.