Skip to content

Commit

Permalink
- Add Dropbox.com indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
KoalaBear84 committed May 22, 2022
1 parent ba9f2f2 commit 16fcd5a
Show file tree
Hide file tree
Showing 6 changed files with 470 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/OpenDirectoryDownloader/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ public class Constants
{
public const string GoogleDriveDomain = "drive.google.com";
public const string BlitzfilesTechDomain = "blitzfiles.tech";
public const string DropboxDomain = "www.dropbox.com";

public const string GoFileIoDomain = "gofile.io";
public const string Parameters_GdIndex_RootId = "GDINDEX_ROOTID";
Expand Down
6 changes: 6 additions & 0 deletions src/OpenDirectoryDownloader/DirectoryParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using OpenDirectoryDownloader.Shared;
using OpenDirectoryDownloader.Shared.Models;
using OpenDirectoryDownloader.Site.BlitzfilesTech;
using OpenDirectoryDownloader.Site.Dropbox;
using OpenDirectoryDownloader.Site.GDIndex;
using OpenDirectoryDownloader.Site.GDIndex.Bhadoo;
using OpenDirectoryDownloader.Site.GDIndex.GdIndex;
Expand Down Expand Up @@ -69,6 +70,11 @@ public static async Task<WebDirectory> ParseHtml(WebDirectory webDirectory, stri
return await BlitzfilesTechParser.ParseIndex(httpClient, webDirectory);
}

if (webDirectory.Uri.Host == Constants.DropboxDomain)
{
return await DropboxParser.ParseIndex(httpClient, webDirectory);
}

if (webDirectory.Uri.Host == Constants.GoFileIoDomain)
{
return await GoFileIOParser.ParseIndex(httpClient, webDirectory);
Expand Down
11 changes: 9 additions & 2 deletions src/OpenDirectoryDownloader/OpenDirectoryIndexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ public async void StartIndexingAsync()
if (!OpenDirectoryIndexerSettings.CommandLineOptions.NoUrls &&
Session.Root.Uri.Host != Constants.GoogleDriveDomain &&
Session.Root.Uri.Host != Constants.BlitzfilesTechDomain &&
Session.Root.Uri.Host != Constants.DropboxDomain &&
Session.Root.Uri.Host != Constants.GoFileIoDomain &&
Session.Root.Uri.Host != Constants.MediafireDomain &&
Session.Root.Uri.Host != Constants.PixeldrainDomain)
Expand Down Expand Up @@ -450,6 +451,7 @@ public async void StartIndexingAsync()
if (OpenDirectoryIndexerSettings.CommandLineOptions.Speedtest &&
Session.Root.Uri.Host != Constants.GoogleDriveDomain &&
Session.Root.Uri.Host != Constants.BlitzfilesTechDomain &&
Session.Root.Uri.Host != Constants.DropboxDomain &&
Session.Root.Uri.Host != Constants.GoFileIoDomain &&
Session.Root.Uri.Host != Constants.MediafireDomain &&
Session.Root.Uri.Host != Constants.PixeldrainDomain)
Expand Down Expand Up @@ -733,7 +735,9 @@ private async Task WebDirectoryProcessor(ConcurrentQueue<WebDirectory> queue, st
}
else
{
if (Session.Root.Uri.Host == Constants.BlitzfilesTechDomain || DirectoryParser.SameHostAndDirectoryFile(Session.Root.Uri, webDirectory.Uri))
if (Session.Root.Uri.Host == Constants.BlitzfilesTechDomain ||
Session.Root.Uri.Host == Constants.DropboxDomain ||
DirectoryParser.SameHostAndDirectoryFile(Session.Root.Uri, webDirectory.Uri))
{
Logger.Debug($"[{name}] Start download '{webDirectory.Url}'");
Session.TotalHttpRequests++;
Expand Down Expand Up @@ -1392,7 +1396,10 @@ private void AddProcessedWebDirectory(WebDirectory webDirectory, WebDirectory pa
{
if (!Session.ProcessedUrls.Contains(subdirectory.Url))
{
if (subdirectory.Uri.Host != Constants.GoogleDriveDomain && subdirectory.Uri.Host != Constants.BlitzfilesTechDomain && !DirectoryParser.SameHostAndDirectoryFile(Session.Root.Uri, subdirectory.Uri))
if (subdirectory.Uri.Host != Constants.GoogleDriveDomain &&
subdirectory.Uri.Host != Constants.BlitzfilesTechDomain &&
subdirectory.Uri.Host != Constants.DropboxDomain &&
!DirectoryParser.SameHostAndDirectoryFile(Session.Root.Uri, subdirectory.Uri))
{
Logger.Debug($"Removed subdirectory {subdirectory.Uri} from parsed webdirectory because it is not the same host");
}
Expand Down
178 changes: 178 additions & 0 deletions src/OpenDirectoryDownloader/Site/Dropbox/DropboxParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
using Esprima;
using Esprima.Ast;
using NLog;
using OpenDirectoryDownloader.Shared.Models;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace OpenDirectoryDownloader.Site.Dropbox;

public static class DropboxParser
{
private static readonly Logger Logger = LogManager.GetCurrentClassLogger();
private static readonly Regex UrlRegex = new Regex(@"\/sh\/(?<LinkKey>[^\/]*)\/(?<SecureHash>[^\/?]*)(?:\/(?<SubPath>[^?]*))");
private static readonly Regex PrefetchListingRegex = new Regex(@"window\[""__REGISTER_SHARED_LINK_FOLDER_PRELOAD_HANDLER""\]\.responseReceived\((?<PrefetchListing>"".*)\)\s?}\);");
private const string Parser = "Dropbox";
public const string Parameters_CSRFToken = "CSRFTOKEN";

public static async Task<WebDirectory> ParseIndex(HttpClient httpClient, WebDirectory webDirectory)
{
try
{
webDirectory = await ScanAsync(httpClient, webDirectory);
}
catch (Exception ex)
{
Logger.Error(ex, $"Error parsing {Parser} for URL: {webDirectory.Url}");
webDirectory.Error = true;

OpenDirectoryIndexer.Session.Errors++;

if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url))
{
OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url);
}

throw;
}

return webDirectory;
}

private static async Task<WebDirectory> ScanAsync(HttpClient httpClient, WebDirectory webDirectory)
{
Logger.Debug($"Retrieving listings for {webDirectory.Uri}");

webDirectory.Parser = Parser;

try
{
if (!httpClient.DefaultRequestHeaders.UserAgent.Any())
{
httpClient.DefaultRequestHeaders.UserAgent.ParseAdd(Constants.UserAgent.Chrome);
}

HttpResponseMessage httpResponseMessage = await httpClient.GetAsync(webDirectory.Uri);

CookieContainer cookieContainer = new CookieContainer();

if (httpResponseMessage.Headers.Contains("Set-Cookie"))
{
foreach (string cookieHeader in httpResponseMessage.Headers.GetValues("Set-Cookie"))
{
cookieContainer.SetCookies(webDirectory.Uri, cookieHeader);
}

if (!OpenDirectoryIndexer.Session.Parameters.ContainsKey(Parameters_CSRFToken))
{
Cookie cookie = cookieContainer.GetCookies(webDirectory.Uri).FirstOrDefault(c => c.Name == "__Host-js_csrf");

if (cookie is not null)
{
OpenDirectoryIndexer.Session.Parameters[Parameters_CSRFToken] = cookie.Value;
}
}
}

string html = await httpResponseMessage.Content.ReadAsStringAsync();

Match prefetchListingRegexMatch = PrefetchListingRegex.Match(html);

if (prefetchListingRegexMatch.Success)
{
string htmlJavascriptString = prefetchListingRegexMatch.Groups["PrefetchListing"].Value;
JavaScriptParser javaScriptParser = new JavaScriptParser(htmlJavascriptString);
Script program = javaScriptParser.ParseScript();
string decodedJson = (program.Body[0].ChildNodes[0] as Literal).StringValue;

Match urlRegexMatch = UrlRegex.Match(webDirectory.Uri.ToString());
bool takedownActive = false;

DropboxResult dropboxResult = DropboxResult.FromJson(decodedJson);
takedownActive = takedownActive || dropboxResult.TakedownRequestType is not null;

List<Entry> entries = new List<Entry>();
entries.AddRange(dropboxResult.Entries);

if (dropboxResult.HasMoreEntries)
{
do
{
Dictionary<string, string> postValues = new Dictionary<string, string>
{
{ "is_xhr", "true" },
{ "t", OpenDirectoryIndexer.Session.Parameters[Parameters_CSRFToken] },
{ "link_key", urlRegexMatch.Groups["LinkKey"].Value },
{ "link_type", "s" },
{ "secure_hash", urlRegexMatch.Groups["SecureHash"].Value },
{ "sub_path", urlRegexMatch.Groups["SubPath"].Value },
{ "voucher", dropboxResult.NextRequestVoucher }
};

HttpRequestMessage httpRequestMessage = new HttpRequestMessage(HttpMethod.Post, "https://www.dropbox.com/list_shared_link_folder_entries") { Content = new FormUrlEncodedContent(postValues) };
httpResponseMessage = await httpClient.SendAsync(httpRequestMessage);

string response = await httpResponseMessage.Content.ReadAsStringAsync();

dropboxResult = DropboxResult.FromJson(response);
takedownActive |= dropboxResult.TakedownRequestType is not null;

entries.AddRange(dropboxResult.Entries);
} while (dropboxResult?.HasMoreEntries == true);
}

foreach (Entry entry in entries)
{
if (entry.IsDir || entry.IsSymlink)
{
webDirectory.Subdirectories.Add(new WebDirectory(webDirectory)
{
Parser = Parser,
Url = entry.Href.ToString(),
Name = entry.Filename
});
}
else
{
webDirectory.Files.Add(new WebFile
{
Url = entry.Href?.ToString(),
FileName = entry.Filename,
FileSize = entry.Bytes
});
}
}

if (takedownActive)
{
Logger.Warn("Some entries are not provided because of DCMA/takedown.");
}
}
else
{
throw new Exception("Cannot find prefetch listing");
}
}
catch (Exception ex)
{
Logger.Error(ex, $"Error processing {Parser} for URL: {webDirectory.Url}");
webDirectory.Error = true;

OpenDirectoryIndexer.Session.Errors++;

if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url))
{
OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url);
}

//throw;
}

return webDirectory;
}
}
Loading

0 comments on commit 16fcd5a

Please sign in to comment.