Skip to content

Commit

Permalink
- Added support for GoIndex (already gone) and alternatives like goin…
Browse files Browse the repository at this point in the history
…dex-drive, goindex-backup, goindex-theme-acrou, gdindex, go2index
  • Loading branch information
KoalaBear84 committed Jul 9, 2020
1 parent 56d3d6f commit 765dcb0
Show file tree
Hide file tree
Showing 15 changed files with 1,122 additions and 6 deletions.
4 changes: 3 additions & 1 deletion OpenDirectoryDownloader.Shared/Models/Session.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using Newtonsoft.Json;
using Newtonsoft.Json;
using Roslyn.Utilities;
using System;
using System.Collections.Generic;
Expand All @@ -17,6 +17,8 @@ public class Session
public int TotalFiles { get; set; }
public long TotalFileSizeEstimated { get; set; }
public int Errors { get; set; }
[JsonIgnore]
public int MaxThreads { get; set; }
public int Skipped { get; set; }
[JsonIgnore]
public bool StopLogging { get; set; }
Expand Down
1 change: 1 addition & 0 deletions OpenDirectoryDownloader/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ public class Constants
public const string BlitzfilesTechDomain = "blitzfiles.tech";
public const string DateTimeFormat = "yyyy-MM-dd HH:mm:ss";
public const string Parameters_Password = "PASSWORD";
public const string Parameters_GdIndex_RootId = "GdIndex_RootId";

public class UserAgent
{
Expand Down
40 changes: 38 additions & 2 deletions OpenDirectoryDownloader/DirectoryParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
using AngleSharp.Html.Parser;
using Newtonsoft.Json;
using NLog;
using OpenDirectoryDownloader.GoogleDrive;
using OpenDirectoryDownloader.Helpers;
using OpenDirectoryDownloader.Models;
using OpenDirectoryDownloader.Shared;
using OpenDirectoryDownloader.Shared.Models;
using OpenDirectoryDownloader.Site.BlitzfilesTech;
using OpenDirectoryDownloader.Site.GoIndex;
using OpenDirectoryDownloader.Site.GoIndex.Bhadoo;
using OpenDirectoryDownloader.Site.GoIndex.GdIndex;
using OpenDirectoryDownloader.Site.GoIndex.Go2Index;
using System;
using System.Collections.Generic;
using System.Diagnostics;
Expand Down Expand Up @@ -62,6 +65,32 @@ public static async Task<WebDirectory> ParseHtml(WebDirectory webDirectory, stri
return await BlitzfilesTechParser.ParseIndex(httpClient, webDirectory);
}

if (htmlDocument.QuerySelector("script[src*=\"goindex-theme-acrou\"]") != null)
{
return await Go2IndexParser.ParseIndex(httpClient, webDirectory);
}

if (htmlDocument.QuerySelector("script[src*=\"Bhadoo-Drive-Index\"]") != null)
{
return await BhadooIndexParser.ParseIndex(httpClient, webDirectory);
}

// goindex, goindex-drive, goindex-backup
if (htmlDocument.QuerySelector("script[src*=\"goindex\"]") != null)
{
return await GoIndexParser.ParseIndex(httpClient, webDirectory);
}

if (htmlDocument.QuerySelector("script[src*=\"gdindex\"]") != null)
{
return await GdIndexParser.ParseIndex(httpClient, webDirectory, html);
}

if (htmlDocument.QuerySelector("script[src*=\"/go2index/\"]") != null)
{
return await Go2IndexParser.ParseIndex(httpClient, webDirectory);
}

htmlDocument.QuerySelectorAll("#sidebar").ToList().ForEach(e => e.Remove());
htmlDocument.QuerySelectorAll("nav").ToList().ForEach(e => e.Remove());

Expand Down Expand Up @@ -95,7 +124,7 @@ public static async Task<WebDirectory> ParseHtml(WebDirectory webDirectory, stri

if (pureTableRows.Any())
{
return ParsePureDirectoryListing(ref baseUrl, parsedWebDirectory, htmlDocument, pureTableRows);
return ParsePureDirectoryListing(ref baseUrl, parsedWebDirectory, htmlDocument, pureTableRows);
}

// Remove it after ParsePureDirectoryListing (.breadcrumb is used in it)
Expand Down Expand Up @@ -178,6 +207,12 @@ public static async Task<WebDirectory> ParseHtml(WebDirectory webDirectory, stri

return parsedWebDirectory;
}
catch (FriendlyException ex)
{
Logger.Error(ex.Message);

parsedWebDirectory.Error = true;
}
catch (Exception ex)
{
Logger.Error(ex);
Expand Down Expand Up @@ -1980,6 +2015,7 @@ private static bool IsValidLink(IElement link)
linkHref?.ToLower().StartsWith("javascript") == false &&
linkHref?.ToLower().StartsWith("mailto:") == false &&
link.TextContent.ToLower() != "parent directory" &&
link.TextContent.ToLower() != "[to parent directory]" &&
link.TextContent.Trim() != "Name" &&
linkHref?.Contains("&expand") == false &&
(!new Regex(@"\?[NMSD]=?[AD]").IsMatch(linkHref) || linkHref.StartsWith("DirectoryList.asp")) &&
Expand Down
2 changes: 1 addition & 1 deletion OpenDirectoryDownloader/Models/DirectoryListingModel01.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ public class DirectoryListingModel01
public List<DirectoryListingModel01> Items { get; set; }

[JsonProperty(PropertyName = "size")]
public long Size{ get; set; }
public long Size { get; set; }
}
}
11 changes: 11 additions & 0 deletions OpenDirectoryDownloader/Models/FriendlyException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
using System;

namespace OpenDirectoryDownloader.Models
{
public class FriendlyException : Exception
{
public FriendlyException() : base() { }
public FriendlyException(string message) : base(message) { }
public FriendlyException(string message, Exception innerException) : base(message, innerException) { }
}
}
6 changes: 6 additions & 0 deletions OpenDirectoryDownloader/OpenDirectoryIndexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,12 @@ private bool SameHostAndDirectory(Uri baseUri, Uri checkUri)

private async Task ProcessWebDirectoryAsync(string name, WebDirectory webDirectory, CancellationToken cancellationToken)
{
if (Session.Parameters.ContainsKey(Constants.Parameters_GdIndex_RootId))
{
await Site.GoIndex.GdIndex.GdIndexParser.ParseIndex(HttpClient, webDirectory, string.Empty);
return;
}

HttpResponseMessage httpResponseMessage = await HttpClient.GetAsync(webDirectory.Url, cancellationToken);
string html = null;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using NLog;
using OpenDirectoryDownloader.Models;
using OpenDirectoryDownloader.Shared.Models;
using System;
using System.Net.Http;
Expand All @@ -18,6 +19,11 @@ public static class BlitzfilesTechParser

public static async Task<WebDirectory> ParseIndex(HttpClient httpClient, WebDirectory webDirectory)
{
if (OpenDirectoryIndexer.Session.MaxThreads > 1)
{
throw new FriendlyException($"{Parser} can only scan at maximum of 1 thread, please call with -t 1 or --threads 1");
}

try
{
string driveHash = GetDriveHash(webDirectory);
Expand Down Expand Up @@ -75,8 +81,7 @@ private static string GetDriveHash(WebDirectory webDirectory)
throw new Exception("Error getting drivehash");
}

string driveHash = driveHashRegexMatch.Groups["DriveHash"].Value;
return driveHash;
return driveHashRegexMatch.Groups["DriveHash"].Value;
}

private static async Task<WebDirectory> ScanAsync(HttpClient httpClient, WebDirectory webDirectory)
Expand Down
162 changes: 162 additions & 0 deletions OpenDirectoryDownloader/Site/GoIndex/BhadooIndexParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
using NLog;
using OpenDirectoryDownloader.Models;
using OpenDirectoryDownloader.Shared.Models;
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Threading.Tasks;

namespace OpenDirectoryDownloader.Site.GoIndex.Bhadoo
{
/// <summary>
/// Similar to GoIndex
/// </summary>
public static class BhadooIndexParser
{
private static readonly Logger Logger = LogManager.GetCurrentClassLogger();
private const string FolderMimeType = "application/vnd.google-apps.folder";
const string Parser = "BhadooIndex";

public static async Task<WebDirectory> ParseIndex(HttpClient httpClient, WebDirectory webDirectory)
{
if (OpenDirectoryIndexer.Session.MaxThreads > 1)
{
throw new FriendlyException($"{Parser} can only scan at maximum of 1 thread, please call with -t 1 or --threads 1");
}

try
{
if (!OpenDirectoryIndexer.Session.Parameters.ContainsKey(Constants.Parameters_Password))
{
Console.WriteLine($"{Parser} will always be indexed with only 1 thread, else you will run into problems and errors.");
Logger.Info($"{Parser} will always be indexed with only 1 thread, else you will run into problems and errors.");
OpenDirectoryIndexer.Session.MaxThreads = 1;

Console.WriteLine("Check if password is needed (unsupported currently)...");
Logger.Info("Check if password is needed (unsupported currently)...");
OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] = "";

Dictionary<string, string> postValues = new Dictionary<string, string>
{
{ "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] },
{ "page_token", string.Empty },
{ "page_index", "0" },
{ "q", "" }
};
HttpRequestMessage httpRequestMessage = new HttpRequestMessage(HttpMethod.Post, webDirectory.Uri) { Content = new FormUrlEncodedContent(postValues) };
HttpResponseMessage httpResponseMessage = await httpClient.SendAsync(httpRequestMessage);

if (httpResponseMessage.IsSuccessStatusCode)
{
string responseJson = await httpResponseMessage.Content.ReadAsStringAsync();

BhadooIndexResponse response = BhadooIndexResponse.FromJson(responseJson);

webDirectory = await ScanAsync(httpClient, webDirectory);
}
}
else
{
webDirectory = await ScanAsync(httpClient, webDirectory);
}
}
catch (Exception ex)
{
Logger.Error(ex, $"Error parsing {Parser} for URL: {webDirectory.Url}");
webDirectory.Error = true;

OpenDirectoryIndexer.Session.Errors++;

if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url))
{
OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url);
}

throw;
}

return webDirectory;
}

private static async Task<WebDirectory> ScanAsync(HttpClient httpClient, WebDirectory webDirectory)
{
Logger.Debug($"Retrieving listings for {webDirectory.Uri} with password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}");

webDirectory.Parser = Parser;

try
{
if (!webDirectory.Url.EndsWith("/"))
{
webDirectory.Url += "/";
}

long pageIndex = 0;
string nextPageToken = string.Empty;

do
{
Logger.Warn($"Retrieving listings for {webDirectory.Uri} with password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}, page {pageIndex + 1}");

Dictionary<string, string> postValues = new Dictionary<string, string>
{
{ "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] },
{ "page_token", nextPageToken },
{ "page_index", pageIndex.ToString() }
};
HttpRequestMessage httpRequestMessage = new HttpRequestMessage(HttpMethod.Post, webDirectory.Uri) { Content = new FormUrlEncodedContent(postValues) };
HttpResponseMessage httpResponseMessage = await httpClient.SendAsync(httpRequestMessage);

webDirectory.ParsedSuccesfully = httpResponseMessage.IsSuccessStatusCode;
httpResponseMessage.EnsureSuccessStatusCode();

string responseJson = await httpResponseMessage.Content.ReadAsStringAsync();

BhadooIndexResponse indexResponse = BhadooIndexResponse.FromJson(responseJson);

nextPageToken = indexResponse.NextPageToken;
pageIndex = indexResponse.CurPageIndex + 1;

foreach (File file in indexResponse.Data.Files)
{
if (file.MimeType == FolderMimeType)
{
webDirectory.Subdirectories.Add(new WebDirectory(webDirectory)
{
Parser = Parser,
// Yes, string concatenation, do not use new Uri(webDirectory.Uri, file.Name), because things could end with a space...
Url = $"{webDirectory.Uri}{file.Name}/",
Name = file.Name
});
}
else
{
webDirectory.Files.Add(new WebFile
{
Url = new Uri(webDirectory.Uri, file.Name).ToString(),
FileName = file.Name,
FileSize = file.Size
});
}
}
} while (!string.IsNullOrWhiteSpace(nextPageToken));
}
catch (Exception ex)
{
Logger.Error(ex, $"Error processing {Parser} for URL: {webDirectory.Url}");
webDirectory.Error = true;

OpenDirectoryIndexer.Session.Errors++;

if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url))
{
OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url);
}

//throw;
}

return webDirectory;
}
}
}
Loading

0 comments on commit 765dcb0

Please sign in to comment.