From 765dcb07243dcc2b006f63cdb7c01f2baf5317be Mon Sep 17 00:00:00 2001 From: KoalaBear Date: Thu, 9 Jul 2020 09:43:10 +0200 Subject: [PATCH] - Added support for GoIndex (already gone) and alternatives like goindex-drive, goindex-backup, goindex-theme-acrou, gdindex, go2index --- .../Models/Session.cs | 4 +- OpenDirectoryDownloader/Constants.cs | 1 + OpenDirectoryDownloader/DirectoryParser.cs | 40 +++- .../Models/DirectoryListingModel01.cs | 2 +- .../Models/FriendlyException.cs | 11 + .../OpenDirectoryIndexer.cs | 6 + .../BlitzfilesTech/BlitzfilesTechParser.cs | 9 +- .../Site/GoIndex/BhadooIndexParser.cs | 162 ++++++++++++++ .../Site/GoIndex/BhadooIndexResult.cs | 79 +++++++ .../Site/GoIndex/GdIndexParser.cs | 200 +++++++++++++++++ .../Site/GoIndex/GdIndexResult.cs | 64 ++++++ .../Site/GoIndex/Go2IndexParser.cs | 203 ++++++++++++++++++ .../Site/GoIndex/Go2IndexResult.cs | 88 ++++++++ .../Site/GoIndex/GoIndexParser.cs | 183 ++++++++++++++++ .../Site/GoIndex/GoIndexResult.cs | 76 +++++++ 15 files changed, 1122 insertions(+), 6 deletions(-) create mode 100644 OpenDirectoryDownloader/Models/FriendlyException.cs create mode 100644 OpenDirectoryDownloader/Site/GoIndex/BhadooIndexParser.cs create mode 100644 OpenDirectoryDownloader/Site/GoIndex/BhadooIndexResult.cs create mode 100644 OpenDirectoryDownloader/Site/GoIndex/GdIndexParser.cs create mode 100644 OpenDirectoryDownloader/Site/GoIndex/GdIndexResult.cs create mode 100644 OpenDirectoryDownloader/Site/GoIndex/Go2IndexParser.cs create mode 100644 OpenDirectoryDownloader/Site/GoIndex/Go2IndexResult.cs create mode 100644 OpenDirectoryDownloader/Site/GoIndex/GoIndexParser.cs create mode 100644 OpenDirectoryDownloader/Site/GoIndex/GoIndexResult.cs diff --git a/OpenDirectoryDownloader.Shared/Models/Session.cs b/OpenDirectoryDownloader.Shared/Models/Session.cs index 40f78eb8..c73adfa4 100644 --- a/OpenDirectoryDownloader.Shared/Models/Session.cs +++ b/OpenDirectoryDownloader.Shared/Models/Session.cs @@ -1,4 +1,4 @@ -using Newtonsoft.Json; +using Newtonsoft.Json; using Roslyn.Utilities; using System; using System.Collections.Generic; @@ -17,6 +17,8 @@ public class Session public int TotalFiles { get; set; } public long TotalFileSizeEstimated { get; set; } public int Errors { get; set; } + [JsonIgnore] + public int MaxThreads { get; set; } public int Skipped { get; set; } [JsonIgnore] public bool StopLogging { get; set; } diff --git a/OpenDirectoryDownloader/Constants.cs b/OpenDirectoryDownloader/Constants.cs index fee73c20..7e50c66e 100644 --- a/OpenDirectoryDownloader/Constants.cs +++ b/OpenDirectoryDownloader/Constants.cs @@ -6,6 +6,7 @@ public class Constants public const string BlitzfilesTechDomain = "blitzfiles.tech"; public const string DateTimeFormat = "yyyy-MM-dd HH:mm:ss"; public const string Parameters_Password = "PASSWORD"; + public const string Parameters_GdIndex_RootId = "GdIndex_RootId"; public class UserAgent { diff --git a/OpenDirectoryDownloader/DirectoryParser.cs b/OpenDirectoryDownloader/DirectoryParser.cs index 82a83b5d..f9296229 100644 --- a/OpenDirectoryDownloader/DirectoryParser.cs +++ b/OpenDirectoryDownloader/DirectoryParser.cs @@ -3,12 +3,15 @@ using AngleSharp.Html.Parser; using Newtonsoft.Json; using NLog; -using OpenDirectoryDownloader.GoogleDrive; using OpenDirectoryDownloader.Helpers; using OpenDirectoryDownloader.Models; using OpenDirectoryDownloader.Shared; using OpenDirectoryDownloader.Shared.Models; using OpenDirectoryDownloader.Site.BlitzfilesTech; +using OpenDirectoryDownloader.Site.GoIndex; +using OpenDirectoryDownloader.Site.GoIndex.Bhadoo; +using OpenDirectoryDownloader.Site.GoIndex.GdIndex; +using OpenDirectoryDownloader.Site.GoIndex.Go2Index; using System; using System.Collections.Generic; using System.Diagnostics; @@ -62,6 +65,32 @@ public static async Task ParseHtml(WebDirectory webDirectory, stri return await BlitzfilesTechParser.ParseIndex(httpClient, webDirectory); } + if (htmlDocument.QuerySelector("script[src*=\"goindex-theme-acrou\"]") != null) + { + return await Go2IndexParser.ParseIndex(httpClient, webDirectory); + } + + if (htmlDocument.QuerySelector("script[src*=\"Bhadoo-Drive-Index\"]") != null) + { + return await BhadooIndexParser.ParseIndex(httpClient, webDirectory); + } + + // goindex, goindex-drive, goindex-backup + if (htmlDocument.QuerySelector("script[src*=\"goindex\"]") != null) + { + return await GoIndexParser.ParseIndex(httpClient, webDirectory); + } + + if (htmlDocument.QuerySelector("script[src*=\"gdindex\"]") != null) + { + return await GdIndexParser.ParseIndex(httpClient, webDirectory, html); + } + + if (htmlDocument.QuerySelector("script[src*=\"/go2index/\"]") != null) + { + return await Go2IndexParser.ParseIndex(httpClient, webDirectory); + } + htmlDocument.QuerySelectorAll("#sidebar").ToList().ForEach(e => e.Remove()); htmlDocument.QuerySelectorAll("nav").ToList().ForEach(e => e.Remove()); @@ -95,7 +124,7 @@ public static async Task ParseHtml(WebDirectory webDirectory, stri if (pureTableRows.Any()) { - return ParsePureDirectoryListing(ref baseUrl, parsedWebDirectory, htmlDocument, pureTableRows); + return ParsePureDirectoryListing(ref baseUrl, parsedWebDirectory, htmlDocument, pureTableRows); } // Remove it after ParsePureDirectoryListing (.breadcrumb is used in it) @@ -178,6 +207,12 @@ public static async Task ParseHtml(WebDirectory webDirectory, stri return parsedWebDirectory; } + catch (FriendlyException ex) + { + Logger.Error(ex.Message); + + parsedWebDirectory.Error = true; + } catch (Exception ex) { Logger.Error(ex); @@ -1980,6 +2015,7 @@ private static bool IsValidLink(IElement link) linkHref?.ToLower().StartsWith("javascript") == false && linkHref?.ToLower().StartsWith("mailto:") == false && link.TextContent.ToLower() != "parent directory" && + link.TextContent.ToLower() != "[to parent directory]" && link.TextContent.Trim() != "Name" && linkHref?.Contains("&expand") == false && (!new Regex(@"\?[NMSD]=?[AD]").IsMatch(linkHref) || linkHref.StartsWith("DirectoryList.asp")) && diff --git a/OpenDirectoryDownloader/Models/DirectoryListingModel01.cs b/OpenDirectoryDownloader/Models/DirectoryListingModel01.cs index e4a90ec4..4dc15c29 100644 --- a/OpenDirectoryDownloader/Models/DirectoryListingModel01.cs +++ b/OpenDirectoryDownloader/Models/DirectoryListingModel01.cs @@ -18,6 +18,6 @@ public class DirectoryListingModel01 public List Items { get; set; } [JsonProperty(PropertyName = "size")] - public long Size{ get; set; } + public long Size { get; set; } } } diff --git a/OpenDirectoryDownloader/Models/FriendlyException.cs b/OpenDirectoryDownloader/Models/FriendlyException.cs new file mode 100644 index 00000000..b07b607f --- /dev/null +++ b/OpenDirectoryDownloader/Models/FriendlyException.cs @@ -0,0 +1,11 @@ +using System; + +namespace OpenDirectoryDownloader.Models +{ + public class FriendlyException : Exception + { + public FriendlyException() : base() { } + public FriendlyException(string message) : base(message) { } + public FriendlyException(string message, Exception innerException) : base(message, innerException) { } + } +} diff --git a/OpenDirectoryDownloader/OpenDirectoryIndexer.cs b/OpenDirectoryDownloader/OpenDirectoryIndexer.cs index 5f7a670d..5f8cae1f 100644 --- a/OpenDirectoryDownloader/OpenDirectoryIndexer.cs +++ b/OpenDirectoryDownloader/OpenDirectoryIndexer.cs @@ -574,6 +574,12 @@ private bool SameHostAndDirectory(Uri baseUri, Uri checkUri) private async Task ProcessWebDirectoryAsync(string name, WebDirectory webDirectory, CancellationToken cancellationToken) { + if (Session.Parameters.ContainsKey(Constants.Parameters_GdIndex_RootId)) + { + await Site.GoIndex.GdIndex.GdIndexParser.ParseIndex(HttpClient, webDirectory, string.Empty); + return; + } + HttpResponseMessage httpResponseMessage = await HttpClient.GetAsync(webDirectory.Url, cancellationToken); string html = null; diff --git a/OpenDirectoryDownloader/Site/BlitzfilesTech/BlitzfilesTechParser.cs b/OpenDirectoryDownloader/Site/BlitzfilesTech/BlitzfilesTechParser.cs index 73749587..d5924f72 100644 --- a/OpenDirectoryDownloader/Site/BlitzfilesTech/BlitzfilesTechParser.cs +++ b/OpenDirectoryDownloader/Site/BlitzfilesTech/BlitzfilesTechParser.cs @@ -1,4 +1,5 @@ using NLog; +using OpenDirectoryDownloader.Models; using OpenDirectoryDownloader.Shared.Models; using System; using System.Net.Http; @@ -18,6 +19,11 @@ public static class BlitzfilesTechParser public static async Task ParseIndex(HttpClient httpClient, WebDirectory webDirectory) { + if (OpenDirectoryIndexer.Session.MaxThreads > 1) + { + throw new FriendlyException($"{Parser} can only scan at maximum of 1 thread, please call with -t 1 or --threads 1"); + } + try { string driveHash = GetDriveHash(webDirectory); @@ -75,8 +81,7 @@ private static string GetDriveHash(WebDirectory webDirectory) throw new Exception("Error getting drivehash"); } - string driveHash = driveHashRegexMatch.Groups["DriveHash"].Value; - return driveHash; + return driveHashRegexMatch.Groups["DriveHash"].Value; } private static async Task ScanAsync(HttpClient httpClient, WebDirectory webDirectory) diff --git a/OpenDirectoryDownloader/Site/GoIndex/BhadooIndexParser.cs b/OpenDirectoryDownloader/Site/GoIndex/BhadooIndexParser.cs new file mode 100644 index 00000000..031ae2fc --- /dev/null +++ b/OpenDirectoryDownloader/Site/GoIndex/BhadooIndexParser.cs @@ -0,0 +1,162 @@ +using NLog; +using OpenDirectoryDownloader.Models; +using OpenDirectoryDownloader.Shared.Models; +using System; +using System.Collections.Generic; +using System.Net.Http; +using System.Threading.Tasks; + +namespace OpenDirectoryDownloader.Site.GoIndex.Bhadoo +{ + /// + /// Similar to GoIndex + /// + public static class BhadooIndexParser + { + private static readonly Logger Logger = LogManager.GetCurrentClassLogger(); + private const string FolderMimeType = "application/vnd.google-apps.folder"; + const string Parser = "BhadooIndex"; + + public static async Task ParseIndex(HttpClient httpClient, WebDirectory webDirectory) + { + if (OpenDirectoryIndexer.Session.MaxThreads > 1) + { + throw new FriendlyException($"{Parser} can only scan at maximum of 1 thread, please call with -t 1 or --threads 1"); + } + + try + { + if (!OpenDirectoryIndexer.Session.Parameters.ContainsKey(Constants.Parameters_Password)) + { + Console.WriteLine($"{Parser} will always be indexed with only 1 thread, else you will run into problems and errors."); + Logger.Info($"{Parser} will always be indexed with only 1 thread, else you will run into problems and errors."); + OpenDirectoryIndexer.Session.MaxThreads = 1; + + Console.WriteLine("Check if password is needed (unsupported currently)..."); + Logger.Info("Check if password is needed (unsupported currently)..."); + OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] = ""; + + Dictionary postValues = new Dictionary + { + { "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] }, + { "page_token", string.Empty }, + { "page_index", "0" }, + { "q", "" } + }; + HttpRequestMessage httpRequestMessage = new HttpRequestMessage(HttpMethod.Post, webDirectory.Uri) { Content = new FormUrlEncodedContent(postValues) }; + HttpResponseMessage httpResponseMessage = await httpClient.SendAsync(httpRequestMessage); + + if (httpResponseMessage.IsSuccessStatusCode) + { + string responseJson = await httpResponseMessage.Content.ReadAsStringAsync(); + + BhadooIndexResponse response = BhadooIndexResponse.FromJson(responseJson); + + webDirectory = await ScanAsync(httpClient, webDirectory); + } + } + else + { + webDirectory = await ScanAsync(httpClient, webDirectory); + } + } + catch (Exception ex) + { + Logger.Error(ex, $"Error parsing {Parser} for URL: {webDirectory.Url}"); + webDirectory.Error = true; + + OpenDirectoryIndexer.Session.Errors++; + + if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) + { + OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); + } + + throw; + } + + return webDirectory; + } + + private static async Task ScanAsync(HttpClient httpClient, WebDirectory webDirectory) + { + Logger.Debug($"Retrieving listings for {webDirectory.Uri} with password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}"); + + webDirectory.Parser = Parser; + + try + { + if (!webDirectory.Url.EndsWith("/")) + { + webDirectory.Url += "/"; + } + + long pageIndex = 0; + string nextPageToken = string.Empty; + + do + { + Logger.Warn($"Retrieving listings for {webDirectory.Uri} with password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}, page {pageIndex + 1}"); + + Dictionary postValues = new Dictionary + { + { "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] }, + { "page_token", nextPageToken }, + { "page_index", pageIndex.ToString() } + }; + HttpRequestMessage httpRequestMessage = new HttpRequestMessage(HttpMethod.Post, webDirectory.Uri) { Content = new FormUrlEncodedContent(postValues) }; + HttpResponseMessage httpResponseMessage = await httpClient.SendAsync(httpRequestMessage); + + webDirectory.ParsedSuccesfully = httpResponseMessage.IsSuccessStatusCode; + httpResponseMessage.EnsureSuccessStatusCode(); + + string responseJson = await httpResponseMessage.Content.ReadAsStringAsync(); + + BhadooIndexResponse indexResponse = BhadooIndexResponse.FromJson(responseJson); + + nextPageToken = indexResponse.NextPageToken; + pageIndex = indexResponse.CurPageIndex + 1; + + foreach (File file in indexResponse.Data.Files) + { + if (file.MimeType == FolderMimeType) + { + webDirectory.Subdirectories.Add(new WebDirectory(webDirectory) + { + Parser = Parser, + // Yes, string concatenation, do not use new Uri(webDirectory.Uri, file.Name), because things could end with a space... + Url = $"{webDirectory.Uri}{file.Name}/", + Name = file.Name + }); + } + else + { + webDirectory.Files.Add(new WebFile + { + Url = new Uri(webDirectory.Uri, file.Name).ToString(), + FileName = file.Name, + FileSize = file.Size + }); + } + } + } while (!string.IsNullOrWhiteSpace(nextPageToken)); + } + catch (Exception ex) + { + Logger.Error(ex, $"Error processing {Parser} for URL: {webDirectory.Url}"); + webDirectory.Error = true; + + OpenDirectoryIndexer.Session.Errors++; + + if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) + { + OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); + } + + //throw; + } + + return webDirectory; + } + } +} diff --git a/OpenDirectoryDownloader/Site/GoIndex/BhadooIndexResult.cs b/OpenDirectoryDownloader/Site/GoIndex/BhadooIndexResult.cs new file mode 100644 index 00000000..ac051573 --- /dev/null +++ b/OpenDirectoryDownloader/Site/GoIndex/BhadooIndexResult.cs @@ -0,0 +1,79 @@ +// +// +// To parse this JSON data, add NuGet 'Newtonsoft.Json' then do: +// +// using QuickType; +// +// var bhadooIndexResponse = BhadooIndexResponse.FromJson(jsonString); + +namespace OpenDirectoryDownloader.Site.GoIndex.Bhadoo +{ + using System; + using System.Collections.Generic; + + using System.Globalization; + using Newtonsoft.Json; + using Newtonsoft.Json.Converters; + + public partial class BhadooIndexResponse + { + [JsonProperty("nextPageToken")] + public string NextPageToken { get; set; } + + [JsonProperty("curPageIndex")] + public long CurPageIndex { get; set; } + + [JsonProperty("data")] + public Data Data { get; set; } + } + + public partial class Data + { + [JsonProperty("nextPageToken")] + public string NextPageToken { get; set; } + + [JsonProperty("files")] + public List Files { get; set; } + } + + public partial class File + { + [JsonProperty("id")] + public string Id { get; set; } + + [JsonProperty("name")] + public string Name { get; set; } + + [JsonProperty("mimeType")] + public string MimeType { get; set; } + + [JsonProperty("modifiedTime")] + public DateTimeOffset ModifiedTime { get; set; } + + [JsonProperty("size")] + public long Size { get; set; } + } + + public partial class BhadooIndexResponse + { + public static BhadooIndexResponse FromJson(string json) => JsonConvert.DeserializeObject(json, Converter.Settings); + } + + public static class Serialize + { + public static string ToJson(this BhadooIndexResponse self) => JsonConvert.SerializeObject(self, Converter.Settings); + } + + internal static class Converter + { + public static readonly JsonSerializerSettings Settings = new JsonSerializerSettings + { + MetadataPropertyHandling = MetadataPropertyHandling.Ignore, + DateParseHandling = DateParseHandling.None, + Converters = + { + new IsoDateTimeConverter { DateTimeStyles = DateTimeStyles.AssumeUniversal } + }, + }; + } +} diff --git a/OpenDirectoryDownloader/Site/GoIndex/GdIndexParser.cs b/OpenDirectoryDownloader/Site/GoIndex/GdIndexParser.cs new file mode 100644 index 00000000..0a9c0c9a --- /dev/null +++ b/OpenDirectoryDownloader/Site/GoIndex/GdIndexParser.cs @@ -0,0 +1,200 @@ +using Newtonsoft.Json; +using NLog; +using OpenDirectoryDownloader.Models; +using OpenDirectoryDownloader.Shared.Models; +using System; +using System.Collections.Generic; +using System.Net.Http; +using System.Text.RegularExpressions; +using System.Threading.Tasks; + +namespace OpenDirectoryDownloader.Site.GoIndex.GdIndex +{ + public static class GdIndexParser + { + private static readonly Logger Logger = LogManager.GetCurrentClassLogger(); + private const string FolderMimeType = "application/vnd.google-apps.folder"; + private static readonly Regex RootIdRegex = new Regex(@"default_root_id: '(?.*?)'"); + const string Parser = "GdIndex"; + + public static async Task ParseIndex(HttpClient httpClient, WebDirectory webDirectory, string html) + { + if (OpenDirectoryIndexer.Session.MaxThreads > 1) + { + throw new FriendlyException($"{Parser} can only scan at maximum of 1 thread, please call with -t 1 or --threads 1"); + } + + try + { + string rootId = string.Empty; + + if (OpenDirectoryIndexer.Session.Parameters.ContainsKey(Constants.Parameters_GdIndex_RootId)) + { + rootId = OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_GdIndex_RootId]; + } + else + { + rootId = GetRootId(html); + OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_GdIndex_RootId] = rootId; + } + + if (!OpenDirectoryIndexer.Session.Parameters.ContainsKey(Constants.Parameters_Password)) + { + Console.WriteLine($"{Parser} will always be indexed with only 1 thread, else you will run into problems and errors."); + Logger.Info($"{Parser} will always be indexed with only 1 thread, else you will run into problems and errors."); + OpenDirectoryIndexer.Session.MaxThreads = 1; + + Console.WriteLine("Check if password is needed..."); + Logger.Info("Check if password is needed..."); + OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] = null; + + HttpResponseMessage httpResponseMessage = await httpClient.PostAsync($"{webDirectory.Uri}?rootId={rootId}", null); + + GdIndexResponse indexResponse = null; + + if (httpResponseMessage.IsSuccessStatusCode) + { + string responseJson = await httpResponseMessage.Content.ReadAsStringAsync(); + indexResponse = GdIndexResponse.FromJson(responseJson); + + if (indexResponse == null) + { + Console.WriteLine("Directory is password protected, please enter password:"); + Logger.Info("Directory is password protected, please enter password."); + + OpenDirectoryIndexer.Session.Parameters["GoIndex_Password"] = Console.ReadLine(); + + Console.WriteLine($"Using password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}"); + Logger.Info($"Using password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}"); + + httpResponseMessage = await httpClient.PostAsync($"{webDirectory.Uri}?rootId={rootId}", new StringContent(JsonConvert.SerializeObject(new Dictionary + { + { "page_index", 0 }, + { "page_token", null }, + { "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] }, + { "q", "" } + }))); + + if (httpResponseMessage.IsSuccessStatusCode) + { + responseJson = await httpResponseMessage.Content.ReadAsStringAsync(); + indexResponse = GdIndexResponse.FromJson(responseJson); + } + } + } + + if (indexResponse != null) + { + Console.WriteLine("Password OK!"); + Logger.Info("Password OK!"); + + webDirectory = await ScanIndexAsync(httpClient, webDirectory); + } + else + { + OpenDirectoryIndexer.Session.Parameters.Remove(Constants.Parameters_Password); + Console.WriteLine($"Error. Stopping."); + Logger.Error($"Error. Stopping."); + } + } + else + { + webDirectory = await ScanIndexAsync(httpClient, webDirectory); + } + } + catch (Exception ex) + { + Logger.Error(ex, $"Error parsing {Parser} for URL: {webDirectory.Url}"); + webDirectory.Error = true; + + OpenDirectoryIndexer.Session.Errors++; + + if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) + { + OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); + } + + throw; + } + + return webDirectory; + } + + private static string GetRootId(string html) + { + Match rootIdRegexMatch = RootIdRegex.Match(html); + + if (!rootIdRegexMatch.Success) + { + return "root"; + } + + return rootIdRegexMatch.Groups["RootId"].Value; + } + + private static async Task ScanIndexAsync(HttpClient httpClient, WebDirectory webDirectory) + { + Logger.Debug($"Retrieving listings for {webDirectory.Uri} with password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}"); + + webDirectory.Parser = Parser; + + try + { + if (!webDirectory.Url.EndsWith("/")) + { + webDirectory.Url += "/"; + } + + HttpResponseMessage httpResponseMessage = await httpClient.PostAsync($"{OpenDirectoryIndexer.Session.Root.Url}{Uri.EscapeDataString(webDirectory.Url.Replace(OpenDirectoryIndexer.Session.Root.Url, string.Empty).TrimEnd('/'))}/?rootId={OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_GdIndex_RootId]}", null); + + webDirectory.ParsedSuccesfully = httpResponseMessage.IsSuccessStatusCode; + httpResponseMessage.EnsureSuccessStatusCode(); + + string responseJson = await httpResponseMessage.Content.ReadAsStringAsync(); + + GdIndexResponse indexResponse = GdIndexResponse.FromJson(responseJson); + + webDirectory.ParsedSuccesfully = indexResponse != null; + + foreach (File file in indexResponse.Files) + { + if (file.MimeType == FolderMimeType) + { + webDirectory.Subdirectories.Add(new WebDirectory(webDirectory) + { + Parser = Parser, + // Yes, string concatenation, do not use new Uri(webDirectory.Uri, file.Name), because things could end with a space... + Url = $"{webDirectory.Uri}{file.Name}/", + Name = file.Name + }); + } + else + { + webDirectory.Files.Add(new WebFile + { + Url = new Uri(webDirectory.Uri, file.Name).ToString(), + FileName = file.Name, + FileSize = file.Size + }); + } + } + } + catch (Exception ex) + { + Logger.Error(ex, $"Error processing {Parser} for URL: {webDirectory.Url}"); + webDirectory.Error = true; + + OpenDirectoryIndexer.Session.Errors++; + + if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) + { + OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); + } + + //throw; + } + + return webDirectory; + } + } +} diff --git a/OpenDirectoryDownloader/Site/GoIndex/GdIndexResult.cs b/OpenDirectoryDownloader/Site/GoIndex/GdIndexResult.cs new file mode 100644 index 00000000..f6737bdb --- /dev/null +++ b/OpenDirectoryDownloader/Site/GoIndex/GdIndexResult.cs @@ -0,0 +1,64 @@ +// +// +// To parse this JSON data, add NuGet 'Newtonsoft.Json' then do: +// +// using QuickType; +// +// var gdIndexResponse = GdIndexResponse.FromJson(jsonString); + +namespace OpenDirectoryDownloader.Site.GoIndex.GdIndex +{ + using System; + using System.Collections.Generic; + + using System.Globalization; + using Newtonsoft.Json; + using Newtonsoft.Json.Converters; + + public partial class GdIndexResponse + { + [JsonProperty("files")] + public List Files { get; set; } + } + + public partial class File + { + [JsonProperty("id")] + public string Id { get; set; } + + [JsonProperty("name")] + public string Name { get; set; } + + [JsonProperty("mimeType")] + public string MimeType { get; set; } + + [JsonProperty("modifiedTime")] + public DateTimeOffset ModifiedTime { get; set; } + + [JsonProperty("size", NullValueHandling = NullValueHandling.Ignore)] + public long Size { get; set; } + } + + public partial class GdIndexResponse + { + public static GdIndexResponse FromJson(string json) => JsonConvert.DeserializeObject(json, Converter.Settings); + } + + public static class Serialize + { + public static string ToJson(this GdIndexResponse self) => JsonConvert.SerializeObject(self, Converter.Settings); + } + + internal static class Converter + { + public static readonly JsonSerializerSettings Settings = new JsonSerializerSettings + { + MetadataPropertyHandling = MetadataPropertyHandling.Ignore, + DateParseHandling = DateParseHandling.None, + Converters = + { + new IsoDateTimeConverter { DateTimeStyles = DateTimeStyles.AssumeUniversal } + }, + }; + } +} diff --git a/OpenDirectoryDownloader/Site/GoIndex/Go2IndexParser.cs b/OpenDirectoryDownloader/Site/GoIndex/Go2IndexParser.cs new file mode 100644 index 00000000..1bb0f0fa --- /dev/null +++ b/OpenDirectoryDownloader/Site/GoIndex/Go2IndexParser.cs @@ -0,0 +1,203 @@ +using Newtonsoft.Json; +using NLog; +using OpenDirectoryDownloader.Models; +using OpenDirectoryDownloader.Shared.Models; +using System; +using System.Collections.Generic; +using System.Net; +using System.Net.Http; +using System.Threading.Tasks; + +namespace OpenDirectoryDownloader.Site.GoIndex.Go2Index +{ + public static class Go2IndexParser + { + private static readonly Logger Logger = LogManager.GetCurrentClassLogger(); + private const string FolderMimeType = "application/vnd.google-apps.folder"; + const string Parser = "Go2Index"; + + public static async Task ParseIndex(HttpClient httpClient, WebDirectory webDirectory) + { + if (OpenDirectoryIndexer.Session.MaxThreads > 1) + { + throw new FriendlyException($"{Parser} can only scan at maximum of 1 thread, please call with -t 1 or --threads 1"); + } + + try + { + if (!OpenDirectoryIndexer.Session.Parameters.ContainsKey(Constants.Parameters_Password)) + { + Console.WriteLine($"{Parser} will always be indexed with only 1 thread, else you will run into problems and errors."); + Logger.Info($"{Parser} will always be indexed with only 1 thread, else you will run into problems and errors."); + OpenDirectoryIndexer.Session.MaxThreads = 1; + + Console.WriteLine("Check if password is needed..."); + Logger.Info("Check if password is needed..."); + OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] = null; + + HttpResponseMessage httpResponseMessage = await httpClient.PostAsync(webDirectory.Uri, new StringContent(JsonConvert.SerializeObject(new Dictionary + { + { "page_index", 0 }, + { "page_token", null }, + { "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] }, + { "q", "" } + }))); + + Go2IndexResponse indexResponse = null; + + if (httpResponseMessage.IsSuccessStatusCode) + { + string responseJson = await httpResponseMessage.Content.ReadAsStringAsync(); + indexResponse = Go2IndexResponse.FromJson(responseJson); + + if (indexResponse.Error?.Code == (int)HttpStatusCode.Unauthorized) + { + Console.WriteLine("Directory is password protected, please enter password:"); + Logger.Info("Directory is password protected, please enter password."); + + OpenDirectoryIndexer.Session.Parameters["GoIndex_Password"] = Console.ReadLine(); + + Console.WriteLine($"Using password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}"); + Logger.Info($"Using password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}"); + + httpResponseMessage = await httpClient.PostAsync(webDirectory.Uri, new StringContent(JsonConvert.SerializeObject(new Dictionary + { + { "page_index", 0 }, + { "page_token", null }, + { "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] }, + { "q", "" } + }))); + + if (httpResponseMessage.IsSuccessStatusCode) + { + responseJson = await httpResponseMessage.Content.ReadAsStringAsync(); + indexResponse = Go2IndexResponse.FromJson(responseJson); + } + } + } + + if (indexResponse.Error == null) + { + Console.WriteLine("Password OK!"); + Logger.Info("Password OK!"); + + webDirectory = await ScanIndexAsync(httpClient, webDirectory); + } + else + { + OpenDirectoryIndexer.Session.Parameters.Remove(Constants.Parameters_Password); + Console.WriteLine($"Error. Code: {indexResponse.Error.Code}, Message: {indexResponse.Error.Message}. Stopping."); + Logger.Error($"Error. Code: {indexResponse.Error.Code}, Message: {indexResponse.Error.Message}. Stopping."); + } + } + else + { + webDirectory = await ScanIndexAsync(httpClient, webDirectory); + } + } + catch (Exception ex) + { + Logger.Error(ex, $"Error parsing {Parser} for URL: {webDirectory.Url}"); + webDirectory.Error = true; + + OpenDirectoryIndexer.Session.Errors++; + + if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) + { + OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); + } + + throw; + } + + return webDirectory; + } + + private static async Task ScanIndexAsync(HttpClient httpClient, WebDirectory webDirectory) + { + Logger.Debug($"Retrieving listings for {webDirectory.Uri} with password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}"); + + webDirectory.Parser = Parser; + + try + { + if (!webDirectory.Url.EndsWith("/")) + { + webDirectory.Url += "/"; + } + + long pageIndex = 0; + string nextPageToken = null; + + do + { + Logger.Warn($"Retrieving listings for {webDirectory.Uri} with password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}, page {pageIndex + 1}"); + + HttpResponseMessage httpResponseMessage = await httpClient.PostAsync(webDirectory.Uri, new StringContent(JsonConvert.SerializeObject(new Dictionary + { + { "page_index", pageIndex }, + { "page_token", nextPageToken }, + { "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] }, + { "q", "" } + }))); + + webDirectory.ParsedSuccesfully = httpResponseMessage.IsSuccessStatusCode; + httpResponseMessage.EnsureSuccessStatusCode(); + + string responseJson = await httpResponseMessage.Content.ReadAsStringAsync(); + + Go2IndexResponse indexResponse = Go2IndexResponse.FromJson(responseJson); + + webDirectory.ParsedSuccesfully = indexResponse.Error == null; + + if (indexResponse.Error != null) + { + throw new Exception($"{indexResponse.Error.Code} | {indexResponse.Error.Message}"); + } + + nextPageToken = indexResponse.NextPageToken; + pageIndex = indexResponse.CurPageIndex + 1; + + foreach (File file in indexResponse.Data.Files) + { + if (file.MimeType == FolderMimeType) + { + webDirectory.Subdirectories.Add(new WebDirectory(webDirectory) + { + Parser = Parser, + // Yes, string concatenation, do not use new Uri(webDirectory.Uri, file.Name), because things could end with a space... + Url = $"{webDirectory.Uri}{file.Name}/", + Name = file.Name + }); + } + else + { + webDirectory.Files.Add(new WebFile + { + Url = new Uri(webDirectory.Uri, file.Name).ToString(), + FileName = file.Name, + FileSize = file.Size + }); + } + } + } while (!string.IsNullOrWhiteSpace(nextPageToken)); + } + catch (Exception ex) + { + Logger.Error(ex, $"Error processing {Parser} for URL: {webDirectory.Url}"); + webDirectory.Error = true; + + OpenDirectoryIndexer.Session.Errors++; + + if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) + { + OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); + } + + //throw; + } + + return webDirectory; + } + } +} diff --git a/OpenDirectoryDownloader/Site/GoIndex/Go2IndexResult.cs b/OpenDirectoryDownloader/Site/GoIndex/Go2IndexResult.cs new file mode 100644 index 00000000..85f98309 --- /dev/null +++ b/OpenDirectoryDownloader/Site/GoIndex/Go2IndexResult.cs @@ -0,0 +1,88 @@ +// +// +// To parse this JSON data, add NuGet 'Newtonsoft.Json' then do: +// +// using QuickType; +// +// var go2IndexResponse = Go2IndexResponse.FromJson(jsonString); + +namespace OpenDirectoryDownloader.Site.GoIndex.Go2Index +{ + using System; + using System.Collections.Generic; + + using System.Globalization; + using Newtonsoft.Json; + using Newtonsoft.Json.Converters; + + public partial class Go2IndexResponse + { + [JsonProperty("nextPageToken")] + public string NextPageToken { get; set; } + + [JsonProperty("curPageIndex")] + public int CurPageIndex { get; set; } + + [JsonProperty("data")] + public Data Data { get; set; } + + [JsonProperty("error")] + public Error Error { get; set; } + } + + public partial class Data + { + [JsonProperty("files")] + public List Files { get; set; } + } + + public partial class File + { + [JsonProperty("id")] + public string Id { get; set; } + + [JsonProperty("name")] + public string Name { get; set; } + + [JsonProperty("mimeType")] + public string MimeType { get; set; } + + [JsonProperty("modifiedTime")] + public DateTimeOffset ModifiedTime { get; set; } + + [JsonProperty("size", NullValueHandling = NullValueHandling.Ignore)] + public long Size { get; set; } + } + + public partial class Error + { + [JsonProperty("code")] + public int Code { get; set; } + + [JsonProperty("message")] + public string Message { get; set; } + } + + public partial class Go2IndexResponse + { + public static Go2IndexResponse FromJson(string json) => JsonConvert.DeserializeObject(json, Converter.Settings); + } + + public static class Serialize + { + public static string ToJson(this Go2IndexResponse self) => JsonConvert.SerializeObject(self, Converter.Settings); + } + + internal static class Converter + { + public static readonly JsonSerializerSettings Settings = new JsonSerializerSettings + { + MetadataPropertyHandling = MetadataPropertyHandling.Ignore, + DateParseHandling = DateParseHandling.None, + Converters = + { + new IsoDateTimeConverter { DateTimeStyles = DateTimeStyles.AssumeUniversal } + }, + }; + } +} diff --git a/OpenDirectoryDownloader/Site/GoIndex/GoIndexParser.cs b/OpenDirectoryDownloader/Site/GoIndex/GoIndexParser.cs new file mode 100644 index 00000000..070054d1 --- /dev/null +++ b/OpenDirectoryDownloader/Site/GoIndex/GoIndexParser.cs @@ -0,0 +1,183 @@ +using Newtonsoft.Json; +using NLog; +using OpenDirectoryDownloader.Models; +using OpenDirectoryDownloader.Shared.Models; +using System; +using System.Collections.Generic; +using System.Net; +using System.Net.Http; +using System.Threading.Tasks; + +namespace OpenDirectoryDownloader.Site.GoIndex +{ + public static class GoIndexParser + { + private static readonly Logger Logger = LogManager.GetCurrentClassLogger(); + private const string FolderMimeType = "application/vnd.google-apps.folder"; + const string Parser = "GoIndex"; + + public static async Task ParseIndex(HttpClient httpClient, WebDirectory webDirectory) + { + if (OpenDirectoryIndexer.Session.MaxThreads > 1) + { + throw new FriendlyException($"{Parser} can only scan at maximum of 1 thread, please call with -t 1 or --threads 1"); + } + + try + { + if (!OpenDirectoryIndexer.Session.Parameters.ContainsKey(Constants.Parameters_Password)) + { + Console.WriteLine($"{Parser} will always be indexed with only 1 thread, else you will run into problems and errors."); + Logger.Info($"{Parser} will always be indexed with only 1 thread, else you will run into problems and errors."); + OpenDirectoryIndexer.Session.MaxThreads = 1; + + Console.WriteLine("Check if password is needed..."); + Logger.Info("Check if password is needed..."); + OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] = "null"; + + HttpResponseMessage httpResponseMessage = await httpClient.PostAsync(webDirectory.Uri, new StringContent(JsonConvert.SerializeObject(new Dictionary + { + { "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] } + }))); + + GoIndexResponse indexResponse = null; + + if (httpResponseMessage.IsSuccessStatusCode) + { + string responseJson = await httpResponseMessage.Content.ReadAsStringAsync(); + indexResponse = GoIndexResponse.FromJson(responseJson); + + if (indexResponse.Error?.Code == (int)HttpStatusCode.Unauthorized) + { + Console.WriteLine("Directory is password protected, please enter password:"); + Logger.Info("Directory is password protected, please enter password."); + + OpenDirectoryIndexer.Session.Parameters["GoIndex_Password"] = Console.ReadLine(); + + Console.WriteLine($"Using password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}"); + Logger.Info($"Using password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}"); + + httpResponseMessage = await httpClient.PostAsync(webDirectory.Uri, new StringContent(JsonConvert.SerializeObject(new Dictionary + { + { "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] } + }))); + + if (httpResponseMessage.IsSuccessStatusCode) + { + responseJson = await httpResponseMessage.Content.ReadAsStringAsync(); + indexResponse = GoIndexResponse.FromJson(responseJson); + } + } + } + + if (indexResponse.Error == null) + { + Console.WriteLine("Password OK!"); + Logger.Info("Password OK!"); + + webDirectory = await ScanIndexAsync(httpClient, webDirectory); + } + else + { + OpenDirectoryIndexer.Session.Parameters.Remove(Constants.Parameters_Password); + Console.WriteLine($"Error. Code: {indexResponse.Error.Code}, Message: {indexResponse.Error.Message}. Stopping."); + Logger.Error($"Error. Code: {indexResponse.Error.Code}, Message: {indexResponse.Error.Message}. Stopping."); + } + } + else + { + webDirectory = await ScanIndexAsync(httpClient, webDirectory); + } + } + catch (Exception ex) + { + Logger.Error(ex, $"Error parsing {Parser} for URL: {webDirectory.Url}"); + webDirectory.Error = true; + + OpenDirectoryIndexer.Session.Errors++; + + if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) + { + OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); + } + + throw; + } + + return webDirectory; + } + + private static async Task ScanIndexAsync(HttpClient httpClient, WebDirectory webDirectory) + { + Logger.Debug($"Retrieving listings for {webDirectory.Uri} with password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}"); + + webDirectory.Parser = Parser; + + try + { + if (!webDirectory.Url.EndsWith("/")) + { + webDirectory.Url += "/"; + } + + HttpResponseMessage httpResponseMessage = await httpClient.PostAsync(webDirectory.Uri, new StringContent(JsonConvert.SerializeObject(new Dictionary + { + { "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] } + }))); + + webDirectory.ParsedSuccesfully = httpResponseMessage.IsSuccessStatusCode; + httpResponseMessage.EnsureSuccessStatusCode(); + + string responseJson = await httpResponseMessage.Content.ReadAsStringAsync(); + + GoIndexResponse indexResponse = GoIndexResponse.FromJson(responseJson); + + webDirectory.ParsedSuccesfully = indexResponse.Error == null; + + if (indexResponse.Error != null) + { + throw new Exception($"{indexResponse.Error.Code} | {indexResponse.Error.Message}"); + } + + foreach (File file in indexResponse.Files) + { + if (file.MimeType == FolderMimeType) + { + webDirectory.Subdirectories.Add(new WebDirectory(webDirectory) + { + Parser = Parser, + // Yes, string concatenation, do not use new Uri(webDirectory.Uri, file.Name), because things could end with a space... + Url = $"{webDirectory.Uri}{file.Name}/", + Name = file.Name + }); + } + else + { + webDirectory.Files.Add(new WebFile + { + Url = new Uri(webDirectory.Uri, file.Name).ToString(), + FileName = file.Name, + FileSize = file.Size + }); + } + } + } + catch (Exception ex) + { + Logger.Error(ex, $"Error processing {Parser} for URL: {webDirectory.Url}"); + webDirectory.Error = true; + + OpenDirectoryIndexer.Session.Errors++; + + if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) + { + OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); + } + + //throw; + } + + return webDirectory; + } + } +} diff --git a/OpenDirectoryDownloader/Site/GoIndex/GoIndexResult.cs b/OpenDirectoryDownloader/Site/GoIndex/GoIndexResult.cs new file mode 100644 index 00000000..b89a0ac0 --- /dev/null +++ b/OpenDirectoryDownloader/Site/GoIndex/GoIndexResult.cs @@ -0,0 +1,76 @@ +// +// +// To parse this JSON data, add NuGet 'Newtonsoft.Json' then do: +// +// using QuickType; +// +// var goIndexResponse = GoIndexResponse.FromJson(jsonString); + +namespace OpenDirectoryDownloader.Site.GoIndex +{ + using System; + using System.Collections.Generic; + + using System.Globalization; + using Newtonsoft.Json; + using Newtonsoft.Json.Converters; + + public partial class GoIndexResponse + { + [JsonProperty("error")] + public Error Error { get; set; } + + [JsonProperty("files")] + public List Files { get; set; } + } + + public partial class Error + { + [JsonProperty("code")] + public long Code { get; set; } + + [JsonProperty("message")] + public string Message { get; set; } + } + + public partial class File + { + [JsonProperty("id")] + public string Id { get; set; } + + [JsonProperty("name")] + public string Name { get; set; } + + [JsonProperty("mimeType")] + public string MimeType { get; set; } + + [JsonProperty("modifiedTime")] + public DateTimeOffset ModifiedTime { get; set; } + + [JsonProperty("size")] + public long Size { get; set; } + } + + public partial class GoIndexResponse + { + public static GoIndexResponse FromJson(string json) => JsonConvert.DeserializeObject(json, Converter.Settings); + } + + public static class Serialize + { + public static string ToJson(this GoIndexResponse self) => JsonConvert.SerializeObject(self, Converter.Settings); + } + + internal static class Converter + { + public static readonly JsonSerializerSettings Settings = new JsonSerializerSettings + { + MetadataPropertyHandling = MetadataPropertyHandling.Ignore, + DateParseHandling = DateParseHandling.None, + Converters = + { + new IsoDateTimeConverter { DateTimeStyles = DateTimeStyles.AssumeUniversal } + }, + }; + } +}