From 16fcd5a92a241dc60f12f81cb8df56b25e30abb2 Mon Sep 17 00:00:00 2001 From: KoalaBear Date: Sun, 22 May 2022 23:18:14 +0200 Subject: [PATCH] - Add Dropbox.com indexing --- src/OpenDirectoryDownloader/Constants.cs | 1 + .../DirectoryParser.cs | 6 + .../OpenDirectoryIndexer.cs | 11 +- .../Site/Dropbox/DropboxParser.cs | 178 ++++++++++++ .../Site/Dropbox/DropboxResult.cs | 274 ++++++++++++++++++ .../Site/GDIndex/BhadooIndexParser.cs | 4 +- 6 files changed, 470 insertions(+), 4 deletions(-) create mode 100644 src/OpenDirectoryDownloader/Site/Dropbox/DropboxParser.cs create mode 100644 src/OpenDirectoryDownloader/Site/Dropbox/DropboxResult.cs diff --git a/src/OpenDirectoryDownloader/Constants.cs b/src/OpenDirectoryDownloader/Constants.cs index 8ad28420..bb37193b 100644 --- a/src/OpenDirectoryDownloader/Constants.cs +++ b/src/OpenDirectoryDownloader/Constants.cs @@ -4,6 +4,7 @@ public class Constants { public const string GoogleDriveDomain = "drive.google.com"; public const string BlitzfilesTechDomain = "blitzfiles.tech"; + public const string DropboxDomain = "www.dropbox.com"; public const string GoFileIoDomain = "gofile.io"; public const string Parameters_GdIndex_RootId = "GDINDEX_ROOTID"; diff --git a/src/OpenDirectoryDownloader/DirectoryParser.cs b/src/OpenDirectoryDownloader/DirectoryParser.cs index 3123138b..538c8b2b 100644 --- a/src/OpenDirectoryDownloader/DirectoryParser.cs +++ b/src/OpenDirectoryDownloader/DirectoryParser.cs @@ -8,6 +8,7 @@ using OpenDirectoryDownloader.Shared; using OpenDirectoryDownloader.Shared.Models; using OpenDirectoryDownloader.Site.BlitzfilesTech; +using OpenDirectoryDownloader.Site.Dropbox; using OpenDirectoryDownloader.Site.GDIndex; using OpenDirectoryDownloader.Site.GDIndex.Bhadoo; using OpenDirectoryDownloader.Site.GDIndex.GdIndex; @@ -69,6 +70,11 @@ public static async Task ParseHtml(WebDirectory webDirectory, stri return await BlitzfilesTechParser.ParseIndex(httpClient, webDirectory); } + if (webDirectory.Uri.Host == Constants.DropboxDomain) + { + return await DropboxParser.ParseIndex(httpClient, webDirectory); + } + if (webDirectory.Uri.Host == Constants.GoFileIoDomain) { return await GoFileIOParser.ParseIndex(httpClient, webDirectory); diff --git a/src/OpenDirectoryDownloader/OpenDirectoryIndexer.cs b/src/OpenDirectoryDownloader/OpenDirectoryIndexer.cs index aaa51f53..ff0b98aa 100644 --- a/src/OpenDirectoryDownloader/OpenDirectoryIndexer.cs +++ b/src/OpenDirectoryDownloader/OpenDirectoryIndexer.cs @@ -379,6 +379,7 @@ public async void StartIndexingAsync() if (!OpenDirectoryIndexerSettings.CommandLineOptions.NoUrls && Session.Root.Uri.Host != Constants.GoogleDriveDomain && Session.Root.Uri.Host != Constants.BlitzfilesTechDomain && + Session.Root.Uri.Host != Constants.DropboxDomain && Session.Root.Uri.Host != Constants.GoFileIoDomain && Session.Root.Uri.Host != Constants.MediafireDomain && Session.Root.Uri.Host != Constants.PixeldrainDomain) @@ -450,6 +451,7 @@ public async void StartIndexingAsync() if (OpenDirectoryIndexerSettings.CommandLineOptions.Speedtest && Session.Root.Uri.Host != Constants.GoogleDriveDomain && Session.Root.Uri.Host != Constants.BlitzfilesTechDomain && + Session.Root.Uri.Host != Constants.DropboxDomain && Session.Root.Uri.Host != Constants.GoFileIoDomain && Session.Root.Uri.Host != Constants.MediafireDomain && Session.Root.Uri.Host != Constants.PixeldrainDomain) @@ -733,7 +735,9 @@ private async Task WebDirectoryProcessor(ConcurrentQueue queue, st } else { - if (Session.Root.Uri.Host == Constants.BlitzfilesTechDomain || DirectoryParser.SameHostAndDirectoryFile(Session.Root.Uri, webDirectory.Uri)) + if (Session.Root.Uri.Host == Constants.BlitzfilesTechDomain || + Session.Root.Uri.Host == Constants.DropboxDomain || + DirectoryParser.SameHostAndDirectoryFile(Session.Root.Uri, webDirectory.Uri)) { Logger.Debug($"[{name}] Start download '{webDirectory.Url}'"); Session.TotalHttpRequests++; @@ -1392,7 +1396,10 @@ private void AddProcessedWebDirectory(WebDirectory webDirectory, WebDirectory pa { if (!Session.ProcessedUrls.Contains(subdirectory.Url)) { - if (subdirectory.Uri.Host != Constants.GoogleDriveDomain && subdirectory.Uri.Host != Constants.BlitzfilesTechDomain && !DirectoryParser.SameHostAndDirectoryFile(Session.Root.Uri, subdirectory.Uri)) + if (subdirectory.Uri.Host != Constants.GoogleDriveDomain && + subdirectory.Uri.Host != Constants.BlitzfilesTechDomain && + subdirectory.Uri.Host != Constants.DropboxDomain && + !DirectoryParser.SameHostAndDirectoryFile(Session.Root.Uri, subdirectory.Uri)) { Logger.Debug($"Removed subdirectory {subdirectory.Uri} from parsed webdirectory because it is not the same host"); } diff --git a/src/OpenDirectoryDownloader/Site/Dropbox/DropboxParser.cs b/src/OpenDirectoryDownloader/Site/Dropbox/DropboxParser.cs new file mode 100644 index 00000000..1ffdeb12 --- /dev/null +++ b/src/OpenDirectoryDownloader/Site/Dropbox/DropboxParser.cs @@ -0,0 +1,178 @@ +using Esprima; +using Esprima.Ast; +using NLog; +using OpenDirectoryDownloader.Shared.Models; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.Net.Http; +using System.Text.RegularExpressions; +using System.Threading.Tasks; + +namespace OpenDirectoryDownloader.Site.Dropbox; + +public static class DropboxParser +{ + private static readonly Logger Logger = LogManager.GetCurrentClassLogger(); + private static readonly Regex UrlRegex = new Regex(@"\/sh\/(?[^\/]*)\/(?[^\/?]*)(?:\/(?[^?]*))"); + private static readonly Regex PrefetchListingRegex = new Regex(@"window\[""__REGISTER_SHARED_LINK_FOLDER_PRELOAD_HANDLER""\]\.responseReceived\((?"".*)\)\s?}\);"); + private const string Parser = "Dropbox"; + public const string Parameters_CSRFToken = "CSRFTOKEN"; + + public static async Task ParseIndex(HttpClient httpClient, WebDirectory webDirectory) + { + try + { + webDirectory = await ScanAsync(httpClient, webDirectory); + } + catch (Exception ex) + { + Logger.Error(ex, $"Error parsing {Parser} for URL: {webDirectory.Url}"); + webDirectory.Error = true; + + OpenDirectoryIndexer.Session.Errors++; + + if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) + { + OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); + } + + throw; + } + + return webDirectory; + } + + private static async Task ScanAsync(HttpClient httpClient, WebDirectory webDirectory) + { + Logger.Debug($"Retrieving listings for {webDirectory.Uri}"); + + webDirectory.Parser = Parser; + + try + { + if (!httpClient.DefaultRequestHeaders.UserAgent.Any()) + { + httpClient.DefaultRequestHeaders.UserAgent.ParseAdd(Constants.UserAgent.Chrome); + } + + HttpResponseMessage httpResponseMessage = await httpClient.GetAsync(webDirectory.Uri); + + CookieContainer cookieContainer = new CookieContainer(); + + if (httpResponseMessage.Headers.Contains("Set-Cookie")) + { + foreach (string cookieHeader in httpResponseMessage.Headers.GetValues("Set-Cookie")) + { + cookieContainer.SetCookies(webDirectory.Uri, cookieHeader); + } + + if (!OpenDirectoryIndexer.Session.Parameters.ContainsKey(Parameters_CSRFToken)) + { + Cookie cookie = cookieContainer.GetCookies(webDirectory.Uri).FirstOrDefault(c => c.Name == "__Host-js_csrf"); + + if (cookie is not null) + { + OpenDirectoryIndexer.Session.Parameters[Parameters_CSRFToken] = cookie.Value; + } + } + } + + string html = await httpResponseMessage.Content.ReadAsStringAsync(); + + Match prefetchListingRegexMatch = PrefetchListingRegex.Match(html); + + if (prefetchListingRegexMatch.Success) + { + string htmlJavascriptString = prefetchListingRegexMatch.Groups["PrefetchListing"].Value; + JavaScriptParser javaScriptParser = new JavaScriptParser(htmlJavascriptString); + Script program = javaScriptParser.ParseScript(); + string decodedJson = (program.Body[0].ChildNodes[0] as Literal).StringValue; + + Match urlRegexMatch = UrlRegex.Match(webDirectory.Uri.ToString()); + bool takedownActive = false; + + DropboxResult dropboxResult = DropboxResult.FromJson(decodedJson); + takedownActive = takedownActive || dropboxResult.TakedownRequestType is not null; + + List entries = new List(); + entries.AddRange(dropboxResult.Entries); + + if (dropboxResult.HasMoreEntries) + { + do + { + Dictionary postValues = new Dictionary + { + { "is_xhr", "true" }, + { "t", OpenDirectoryIndexer.Session.Parameters[Parameters_CSRFToken] }, + { "link_key", urlRegexMatch.Groups["LinkKey"].Value }, + { "link_type", "s" }, + { "secure_hash", urlRegexMatch.Groups["SecureHash"].Value }, + { "sub_path", urlRegexMatch.Groups["SubPath"].Value }, + { "voucher", dropboxResult.NextRequestVoucher } + }; + + HttpRequestMessage httpRequestMessage = new HttpRequestMessage(HttpMethod.Post, "https://www.dropbox.com/list_shared_link_folder_entries") { Content = new FormUrlEncodedContent(postValues) }; + httpResponseMessage = await httpClient.SendAsync(httpRequestMessage); + + string response = await httpResponseMessage.Content.ReadAsStringAsync(); + + dropboxResult = DropboxResult.FromJson(response); + takedownActive |= dropboxResult.TakedownRequestType is not null; + + entries.AddRange(dropboxResult.Entries); + } while (dropboxResult?.HasMoreEntries == true); + } + + foreach (Entry entry in entries) + { + if (entry.IsDir || entry.IsSymlink) + { + webDirectory.Subdirectories.Add(new WebDirectory(webDirectory) + { + Parser = Parser, + Url = entry.Href.ToString(), + Name = entry.Filename + }); + } + else + { + webDirectory.Files.Add(new WebFile + { + Url = entry.Href?.ToString(), + FileName = entry.Filename, + FileSize = entry.Bytes + }); + } + } + + if (takedownActive) + { + Logger.Warn("Some entries are not provided because of DCMA/takedown."); + } + } + else + { + throw new Exception("Cannot find prefetch listing"); + } + } + catch (Exception ex) + { + Logger.Error(ex, $"Error processing {Parser} for URL: {webDirectory.Url}"); + webDirectory.Error = true; + + OpenDirectoryIndexer.Session.Errors++; + + if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) + { + OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); + } + + //throw; + } + + return webDirectory; + } +} diff --git a/src/OpenDirectoryDownloader/Site/Dropbox/DropboxResult.cs b/src/OpenDirectoryDownloader/Site/Dropbox/DropboxResult.cs new file mode 100644 index 00000000..9192a75d --- /dev/null +++ b/src/OpenDirectoryDownloader/Site/Dropbox/DropboxResult.cs @@ -0,0 +1,274 @@ +// +// +// To parse this JSON data, add NuGet 'Newtonsoft.Json' then do: +// +// using QuickType; +// +// var dropboxResult = DropboxResult.FromJson(jsonString); + +namespace OpenDirectoryDownloader.Site.Dropbox +{ + using System; + using System.Collections.Generic; + + using System.Globalization; + using Newtonsoft.Json; + using Newtonsoft.Json.Converters; + + public partial class DropboxResult + { + [JsonProperty("entries")] + public Entry[] Entries { get; set; } + + [JsonProperty("share_tokens")] + public ShareToken[] ShareTokens { get; set; } + + [JsonProperty("shared_link_infos")] + public SharedLinkInfo[] SharedLinkInfos { get; set; } + + [JsonProperty("share_permissions")] + public SharePermission[] SharePermissions { get; set; } + + [JsonProperty("takedown_request_type")] + public object TakedownRequestType { get; set; } + + [JsonProperty("total_num_entries")] + public long TotalNumEntries { get; set; } + + [JsonProperty("has_more_entries")] + public bool HasMoreEntries { get; set; } + + [JsonProperty("next_request_voucher")] + public string NextRequestVoucher { get; set; } + + [JsonProperty("folder")] + public Folder Folder { get; set; } + + [JsonProperty("folder_share_permission")] + public SharePermission FolderSharePermission { get; set; } + + [JsonProperty("folder_share_token")] + public ShareToken FolderShareToken { get; set; } + + [JsonProperty("folder_shared_link_info")] + public SharedLinkInfo FolderSharedLinkInfo { get; set; } + } + + public partial class Entry + { + [JsonProperty("bytes")] + public long Bytes { get; set; } + + [JsonProperty("file_id")] + public string FileId { get; set; } + + [JsonProperty("filename")] + public string Filename { get; set; } + + [JsonProperty("href")] + public Uri Href { get; set; } + + [JsonProperty("icon")] + public string Icon { get; set; } + + [JsonProperty("is_dir")] + public bool IsDir { get; set; } + + [JsonProperty("ns_id")] + public long NsId { get; set; } + + [JsonProperty("open_in_app_data")] + public object OpenInAppData { get; set; } + + [JsonProperty("preview")] + public Preview Preview { get; set; } + + [JsonProperty("preview_type")] + public string PreviewType { get; set; } + + [JsonProperty("revision_id")] + public string RevisionId { get; set; } + + [JsonProperty("sjid")] + public long Sjid { get; set; } + + [JsonProperty("sort_key")] + public string[] SortKey { get; set; } + + [JsonProperty("thumbnail_url_tmpl")] + public Uri ThumbnailUrlTmpl { get; set; } + + [JsonProperty("ts")] + public long Ts { get; set; } + + [JsonProperty("is_symlink")] + public bool IsSymlink { get; set; } + } + + public partial class Preview + { + [JsonProperty("content")] + public Content Content { get; set; } + + [JsonProperty("preview_url")] + public Uri PreviewUrl { get; set; } + } + + public partial class Content + { + [JsonProperty(".tag")] + public string Tag { get; set; } + + [JsonProperty("text_url_tmpl")] + public Uri TextUrlTmpl { get; set; } + + [JsonProperty("image_url_tmpl")] + public Uri ImageUrlTmpl { get; set; } + + [JsonProperty("refresh_url")] + public Uri RefreshUrl { get; set; } + + [JsonProperty("placeholder_image_url")] + public Uri PlaceholderImageUrl { get; set; } + + [JsonProperty("autoprint_url")] + public Uri AutoprintUrl { get; set; } + + [JsonProperty("supported_widths")] + public long[] SupportedWidths { get; set; } + } + + public partial class Folder + { + [JsonProperty("_mount_access_perms")] + public string[] MountAccessPerms { get; set; } + + [JsonProperty("filename")] + public string Filename { get; set; } + + [JsonProperty("href")] + public Uri Href { get; set; } + + [JsonProperty("is_dir")] + public bool IsDir { get; set; } + + [JsonProperty("open_in_app_data")] + public object OpenInAppData { get; set; } + + [JsonProperty("shared_folder_id")] + public object SharedFolderId { get; set; } + + [JsonProperty("ns_id")] + public long NsId { get; set; } + + [JsonProperty("sort_key")] + public string[] SortKey { get; set; } + + [JsonProperty("folder_id")] + public string FolderId { get; set; } + } + + public partial class SharePermission + { + [JsonProperty("canCopyToDropboxRoles")] + public string[] CanCopyToDropboxRoles { get; set; } + + [JsonProperty("canSyncToDropboxRoles")] + public object[] CanSyncToDropboxRoles { get; set; } + + [JsonProperty("canDownloadRoles")] + public string[] CanDownloadRoles { get; set; } + + [JsonProperty("canRemoveLinkUids")] + public object[] CanRemoveLinkUids { get; set; } + + [JsonProperty("canPrintRoles")] + public string[] CanPrintRoles { get; set; } + + [JsonProperty("canViewContextMenuRoles")] + public string[] CanViewContextMenuRoles { get; set; } + + [JsonProperty("canViewMetadataRoles")] + public object[] CanViewMetadataRoles { get; set; } + + [JsonProperty("isEditFolderLink")] + public bool IsEditFolderLink { get; set; } + + [JsonProperty("syncVarsByRoles")] + public object SyncVarsByRoles { get; set; } + } + + public partial class ShareToken + { + [JsonProperty("itemId")] + public object ItemId { get; set; } + + [JsonProperty("linkType")] + public string LinkType { get; set; } + + [JsonProperty("linkKey")] + public string LinkKey { get; set; } + + [JsonProperty("subPath")] + public string SubPath { get; set; } + + [JsonProperty("secureHash")] + public string SecureHash { get; set; } + + [JsonProperty("rlkey")] + public object Rlkey { get; set; } + } + + public partial class SharedLinkInfo + { + [JsonProperty("displayName")] + public string DisplayName { get; set; } + + [JsonProperty("downloadTestUrl")] + public Uri DownloadTestUrl { get; set; } + + [JsonProperty("hasPublicAudienceOrVisibility")] + public bool HasPublicAudienceOrVisibility { get; set; } + + [JsonProperty("ownerName")] + public string OwnerName { get; set; } + + [JsonProperty("ownerTeamLogo")] + public object OwnerTeamLogo { get; set; } + + [JsonProperty("ownerTeamBackground")] + public object OwnerTeamBackground { get; set; } + + [JsonProperty("ownerTeamName")] + public object OwnerTeamName { get; set; } + + [JsonProperty("teamMemberBrandingPolicyEnabled")] + public bool TeamMemberBrandingPolicyEnabled { get; set; } + + [JsonProperty("url")] + public Uri Url { get; set; } + } + + public partial class DropboxResult + { + public static DropboxResult FromJson(string json) => JsonConvert.DeserializeObject(json, Converter.Settings); + } + + public static class Serialize + { + public static string ToJson(this DropboxResult self) => JsonConvert.SerializeObject(self, Converter.Settings); + } + + internal static class Converter + { + public static readonly JsonSerializerSettings Settings = new JsonSerializerSettings + { + MetadataPropertyHandling = MetadataPropertyHandling.Ignore, + DateParseHandling = DateParseHandling.None, + Converters = + { + new IsoDateTimeConverter { DateTimeStyles = DateTimeStyles.AssumeUniversal } + }, + }; + } +} diff --git a/src/OpenDirectoryDownloader/Site/GDIndex/BhadooIndexParser.cs b/src/OpenDirectoryDownloader/Site/GDIndex/BhadooIndexParser.cs index 27936fba..3864478a 100644 --- a/src/OpenDirectoryDownloader/Site/GDIndex/BhadooIndexParser.cs +++ b/src/OpenDirectoryDownloader/Site/GDIndex/BhadooIndexParser.cs @@ -132,7 +132,7 @@ private static async Task DecodeResponse(IHtmlDocument htmlDocument, Htt Script program = javaScriptParser.ParseScript(); IEnumerable javaScriptFunctions = program.ChildNodes.OfType(); FunctionDeclaration readFunctionDeclaration = javaScriptFunctions.FirstOrDefault(f => f.ChildNodes.OfType().Any(i => i.Name == "read")); - string readFunction = appJsSource.Substring(readFunctionDeclaration.Range.Start, readFunctionDeclaration.Range.End - readFunctionDeclaration.Range.Start); + string readFunction = appJsSource[readFunctionDeclaration.Range.Start..readFunctionDeclaration.Range.End]; JintEngine = new Engine(); @@ -144,7 +144,7 @@ private static async Task DecodeResponse(IHtmlDocument htmlDocument, Htt JintEngine.SetValue("atob", atob); FunctionDeclaration gdidecodeFunctionDeclaration = javaScriptFunctions.FirstOrDefault(f => f.ChildNodes.OfType().Any(i => i.Name == "gdidecode")); - string gdidecodeFunction = appJsSource.Substring(gdidecodeFunctionDeclaration.Range.Start, gdidecodeFunctionDeclaration.Range.End - gdidecodeFunctionDeclaration.Range.Start); + string gdidecodeFunction = appJsSource[gdidecodeFunctionDeclaration.Range.Start..gdidecodeFunctionDeclaration.Range.End]; JintEngine.Execute(gdidecodeFunction); } }