diff --git a/README.md b/README.md index 09ff1aa..517052e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Table of Contents === - [Overview](#overview) -- [Why Build Yet Another Parser?](#why-build-yet-another-parser) + - [Design Considerations](#design-considerations) - [Features](#features) - [Usage](#usage) - [Minimal Example](#minimal-example) @@ -29,12 +29,12 @@ Supports the proposed [RFC9309](https://datatracker.ietf.org/doc/html/rfc9309) s - Host - Crawl-delay -# Why Build Yet Another Parser? - -There are several _robots.txt_ and _sitemap_ parsers that already exist, however they all suffer from their lack of flexibility. +## Design Considerations This library is based upon `HttpClient`, making it very familiar, easy to use and adaptable to your needs. Since you have full control over the `HttpClient`, you are able to configure custom message handlers to intercept outgoing requests and responses. For example, you may want to add custom headers on a request, configure additional logging or set up a retry policy. +Some websites can have very large sitemaps. For this reason, async streaming is supported as the preferred way of parsing sitemaps. + There is also the possibility to extend this library to support protocols other than HTTP, such as FTP. # Features @@ -53,6 +53,8 @@ There is also the possibility to extend this library to support protocols other | Atom 0.3/1.0 feeds | ❌ | 0.8 | | Sitemaps XML format | ✔️ | | | Simple text sitemaps | ✔️ | | +| Async streaming of sitemaps | ✔️ | | +| Cancellation token support | ✔️ | | | Memory management | ✔️ | | # Usage @@ -136,9 +138,11 @@ var robotWebClient = new RobotWebClient(httpClient); var robotsTxt = await robotWebClient.LoadRobotsTxtAsync(); // providing a datetime only retrieves sitemap items modified since this datetime var modifiedSince = new DateTime(2023, 01, 01); -// sitemaps are scanned recursively and combined into single Sitemap object +// sitemaps are iterated asynchronously // even if robots.txt does not contain sitemap directive, looks for a sitemap at {TWebsite.BaseAddress}/sitemap.xml -var sitemap = await robotsTxt.LoadSitemapAsync(modifiedSince); +await foreach(var item in robotsTxt.LoadSitemapAsync(modifiedSince)) +{ +} ``` ## Checking a Rule diff --git a/src/Robots.Txt.Parser/Http/RobotWebClient.cs b/src/Robots.Txt.Parser/Http/RobotWebClient.cs index 228faf1..0125374 100644 --- a/src/Robots.Txt.Parser/Http/RobotWebClient.cs +++ b/src/Robots.Txt.Parser/Http/RobotWebClient.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Net.Http; using System.Net.Mime; +using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; @@ -76,49 +77,48 @@ the 500-599 range. return new RobotsTxt(this, userAgentRules, new Dictionary(), null, new HashSet()); } - var stream = await response.Content.ReadAsStreamAsync(cancellationToken); + await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken); return await new RobotsTxtParser(this).ReadFromStreamAsync(stream, cancellationToken); } - async Task IRobotClient.LoadSitemapsAsync(IEnumerable uris, DateTime? modifiedSince, CancellationToken cancellationToken) + async IAsyncEnumerable IRobotClient.LoadSitemapsAsync(Uri uri, DateTime? modifiedSince, [EnumeratorCancellation] CancellationToken cancellationToken) { - Sitemap? sitemap = null; + var request = new HttpRequestMessage(HttpMethod.Get, uri); + request.Headers.Add("Accept", "application/xml,text/plain,text/xml,*/*"); + var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken); + if (!response.IsSuccessStatusCode) yield break; - foreach (var uri in uris) - { - var request = new HttpRequestMessage(HttpMethod.Get, uri); - request.Headers.Add("Accept", "application/xml,text/plain,text/xml,*/*"); - var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken); - if (!response.IsSuccessStatusCode) return null; - using var stream = await response.Content.ReadAsStreamAsync(cancellationToken); - - var parsedSitemap = response.Content.Headers.ContentType?.MediaType switch - { - MediaTypeNames.Text.Plain => await SimpleTextSitemapParser.ReadFromStreamAsync(stream, cancellationToken), - MediaTypeNames.Text.Xml or MediaTypeNames.Application.Xml or _ - => await SitemapParser.ReadFromStreamAsync(stream, modifiedSince, cancellationToken) - }; - - if (parsedSitemap is null) - { - continue; - } + await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken); - if (sitemap is null) - { - sitemap = parsedSitemap; - continue; - } - - if (parsedSitemap is SitemapIndex sitemapRoot) - { - var sitemaps = await (this as IRobotWebClient).LoadSitemapsAsync(sitemapRoot.SitemapUris, modifiedSince, cancellationToken); - if (sitemaps is not null) sitemap = sitemaps.Combine(sitemaps); - } - - sitemap = sitemap.Combine(parsedSitemap); + switch (response.Content.Headers.ContentType?.MediaType) + { + case MediaTypeNames.Text.Plain: + await foreach (var urlSet in SimpleTextSitemapParser.ReadFromStreamAsync(stream, cancellationToken)) + { + yield return urlSet; + } + yield break; + case MediaTypeNames.Text.Xml or MediaTypeNames.Application.Xml: + default: + var sitemap = await SitemapParser.ReadFromStreamAsync(stream, modifiedSince, cancellationToken); + if (sitemap is SitemapIndex index) + { + await foreach (var location in index.SitemapUris) + { + await foreach (var item in (this as IRobotClient).LoadSitemapsAsync(location, modifiedSince, cancellationToken)) + { + yield return item; + } + } + } + else + { + await foreach (var item in sitemap.UrlSet) + { + yield return item; + } + } + yield break; } - - return sitemap; } } diff --git a/src/Robots.Txt.Parser/IRobotClient.cs b/src/Robots.Txt.Parser/IRobotClient.cs index 3642b95..747fde5 100644 --- a/src/Robots.Txt.Parser/IRobotClient.cs +++ b/src/Robots.Txt.Parser/IRobotClient.cs @@ -19,5 +19,5 @@ public interface IRobotClient /// Thrown if a status code that cannot be handled is returned. Task LoadRobotsTxtAsync(CancellationToken cancellationToken = default); - protected internal Task LoadSitemapsAsync(IEnumerable uris, DateTime? modifiedSince, CancellationToken cancellationToken); + protected internal IAsyncEnumerable LoadSitemapsAsync(Uri uri, DateTime? modifiedSince = null, CancellationToken cancellationToken = default); } diff --git a/src/Robots.Txt.Parser/ISitemap.cs b/src/Robots.Txt.Parser/ISitemap.cs index 74a71c0..831a62d 100644 --- a/src/Robots.Txt.Parser/ISitemap.cs +++ b/src/Robots.Txt.Parser/ISitemap.cs @@ -11,7 +11,7 @@ public interface ISitemap /// /// Url set included in the Sitemap /// - HashSet UrlSet { get; } + IAsyncEnumerable UrlSet { get; } } /// @@ -19,27 +19,28 @@ public interface ISitemap /// public class Sitemap : ISitemap { - public Sitemap(HashSet urlSet) + public Sitemap(IAsyncEnumerable urlSet) { UrlSet = urlSet; } /// - public HashSet UrlSet { get; } - - internal Sitemap Combine(Sitemap other) - { - UrlSet.UnionWith(other.UrlSet); - return this; - } + public IAsyncEnumerable UrlSet { get; } } internal class SitemapIndex : Sitemap { - public SitemapIndex(HashSet sitemapUris) : base(new HashSet()) + public SitemapIndex(IAsyncEnumerable sitemapUris) : base(Empty()) { SitemapUris = sitemapUris; } - public HashSet SitemapUris { get; } + public IAsyncEnumerable SitemapUris { get; } + +#pragma warning disable CS1998 + private static async IAsyncEnumerable Empty() +#pragma warning restore CS1998 + { + yield break; + } } \ No newline at end of file diff --git a/src/Robots.Txt.Parser/RobotsTxt.cs b/src/Robots.Txt.Parser/RobotsTxt.cs index 36f11e5..f4bd82f 100644 --- a/src/Robots.Txt.Parser/RobotsTxt.cs +++ b/src/Robots.Txt.Parser/RobotsTxt.cs @@ -1,8 +1,8 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Runtime.CompilerServices; using System.Threading; -using System.Threading.Tasks; namespace Robots.Txt.Parser; @@ -17,7 +17,7 @@ public interface IRobotsTxt /// Filter to retrieve site maps modified after this date /// Cancellation token /// A sitemap, or null or no sitemap is found - ValueTask LoadSitemapAsync(DateTime? modifiedSince = default, CancellationToken cancellationToken = default); + IAsyncEnumerable LoadSitemapAsync(DateTime? modifiedSince = default, CancellationToken cancellationToken = default); /// /// Retrieves the crawl delay specified for a User-Agent @@ -71,10 +71,17 @@ internal RobotsTxt(IRobotClient client, } /// - public async ValueTask LoadSitemapAsync(DateTime? modifiedSince = default, CancellationToken cancellationToken = default) - => _sitemapUrls.Count != 0 - ? await _client.LoadSitemapsAsync(_sitemapUrls, modifiedSince, cancellationToken) - : await _client.LoadSitemapsAsync(new[] { new Uri(_client.BaseAddress, "/sitemap.xml") }, modifiedSince, cancellationToken); + public async IAsyncEnumerable LoadSitemapAsync(DateTime? modifiedSince = default, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + var urls = _sitemapUrls.Count != 0 ? _sitemapUrls.AsEnumerable() : new[] { new Uri(_client.BaseAddress, "/sitemap.xml") }; + foreach (var url in urls) + { + await foreach (var item in _client.LoadSitemapsAsync(url, modifiedSince, cancellationToken)) + { + yield return item; + } + } + } /// public bool TryGetCrawlDelay(ProductToken userAgent, out int crawlDelay) diff --git a/src/Robots.Txt.Parser/RobotsTxtException.cs b/src/Robots.Txt.Parser/RobotsTxtException.cs index d8c0230..9332bfa 100644 --- a/src/Robots.Txt.Parser/RobotsTxtException.cs +++ b/src/Robots.Txt.Parser/RobotsTxtException.cs @@ -1,4 +1,5 @@ using System; +using System.Diagnostics.CodeAnalysis; using System.Runtime.Serialization; namespace Robots.Txt.Parser; @@ -6,6 +7,7 @@ namespace Robots.Txt.Parser; /// /// Exception raised when parsing a robots.txt file /// +[Serializable] public class RobotsTxtException : Exception { internal RobotsTxtException() @@ -20,6 +22,7 @@ internal RobotsTxtException(string? message, Exception? innerException) : base(m { } + [ExcludeFromCodeCoverage] protected RobotsTxtException(SerializationInfo info, StreamingContext context) : base(info, context) { } diff --git a/src/Robots.Txt.Parser/SimpleTextSitemapParser.cs b/src/Robots.Txt.Parser/SimpleTextSitemapParser.cs index 4ad923e..13e0938 100644 --- a/src/Robots.Txt.Parser/SimpleTextSitemapParser.cs +++ b/src/Robots.Txt.Parser/SimpleTextSitemapParser.cs @@ -1,8 +1,8 @@ using System; using System.Collections.Generic; using System.IO; +using System.Runtime.CompilerServices; using System.Threading; -using System.Threading.Tasks; namespace Robots.Txt.Parser; @@ -11,55 +11,55 @@ namespace Robots.Txt.Parser; /// public static class SimpleTextSitemapParser { - private const int MaxLines = 50000; - private const int ByteCount50MiB = 52_428_800; + private const int MaxLines = 50000; + private const int ByteCount50MiB = 52_428_800; - /// - /// Parses a from a - /// - /// Sitemap document stream - /// Cancellation token - /// The parsed - /// Raised when there is an error parsing the Sitemap - public static async Task ReadFromStreamAsync(Stream stream, CancellationToken cancellationToken = default) + /// + /// Parses a from a + /// + /// Sitemap document stream + /// Cancellation token + /// The parsed + /// Raised when there is an error parsing the Sitemap + public static async IAsyncEnumerable ReadFromStreamAsync(Stream stream, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + using var streamReader = new StreamReader(stream); + string? line; + var lineCount = 0; + while (((line = await streamReader.ReadLineAsync(cancellationToken)) is not null) && !cancellationToken.IsCancellationRequested) { - var urlSet = new HashSet(); - try - { - using var streamReader = new StreamReader(stream); - string? line; - var lineCount = 0; - while (((line = await streamReader.ReadLineAsync(cancellationToken)) is not null) && !cancellationToken.IsCancellationRequested) - { - /* - Each text file ... and must be no larger than 50MiB (52,428,800 bytes) - */ - if (stream.Position > ByteCount50MiB) throw new SitemapException("Reached parsing limit"); + /* + Each text file ... and must be no larger than 50MiB (52,428,800 bytes) + */ + if (stream.Position > ByteCount50MiB) throw new SitemapException("Reached parsing limit"); - if (string.IsNullOrWhiteSpace(line)) continue; + if (string.IsNullOrWhiteSpace(line)) continue; - lineCount++; + lineCount++; - /* - Each text file can contain a maximum of 50,000 URLs - */ - if (lineCount > MaxLines) throw new SitemapException("Reached line limit"); + /* + Each text file can contain a maximum of 50,000 URLs + */ + if (lineCount > MaxLines) throw new SitemapException("Reached line limit"); - /* - The text file must have one URL per line. The URLs cannot contain embedded new lines. - You must fully specify URLs, including the http. - The text file must use UTF-8 encoding. - The text file should contain no information other than the list of URLs. - The text file should contain no header or footer information. - */ - urlSet.Add(new UrlSetItem(new Uri(line), null, null, null)); - } + /* + The text file must have one URL per line. The URLs cannot contain embedded new lines. + You must fully specify URLs, including the http. + The text file must use UTF-8 encoding. + The text file should contain no information other than the list of URLs. + The text file should contain no header or footer information. + */ + Uri location; + try + { + location = new Uri(line); + } + catch (Exception e) + { + throw new SitemapException("Unable to parse sitemap item", e); + } - return new Sitemap(urlSet); - } - catch (Exception e) when (e is not SitemapException) - { - throw new SitemapException("Unable to parse sitemap", e); - } + yield return new UrlSetItem(location, null, null, null); } + } } diff --git a/src/Robots.Txt.Parser/SitemapException.cs b/src/Robots.Txt.Parser/SitemapException.cs index 550073d..566f79e 100644 --- a/src/Robots.Txt.Parser/SitemapException.cs +++ b/src/Robots.Txt.Parser/SitemapException.cs @@ -1,11 +1,13 @@ using System; +using System.Diagnostics.CodeAnalysis; using System.Runtime.Serialization; namespace Robots.Txt.Parser; /// -/// Exception raised when parsing a Sitemap +/// Exception raised when parsing a sitemap /// +[Serializable] public class SitemapException : Exception { internal SitemapException() @@ -20,6 +22,7 @@ internal SitemapException(string? message, Exception? innerException) : base(mes { } + [ExcludeFromCodeCoverage] protected SitemapException(SerializationInfo info, StreamingContext context) : base(info, context) { } diff --git a/src/Robots.Txt.Parser/SitemapParser.cs b/src/Robots.Txt.Parser/SitemapParser.cs index 56493ac..491e634 100644 --- a/src/Robots.Txt.Parser/SitemapParser.cs +++ b/src/Robots.Txt.Parser/SitemapParser.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; using System.Xml; @@ -29,15 +30,15 @@ public static async Task ReadFromStreamAsync(Stream stream, DateTime? m { try { - using var reader = XmlReader.Create(stream, new XmlReaderSettings { Async = true }); + var reader = XmlReader.Create(stream, new XmlReaderSettings { Async = true }); await reader.MoveToContentAsync(); return reader switch { XmlReader when reader.NamespaceURI == sitemapNamespace && reader.Name == "urlset" - => await ParseUrlSet(stream, reader, modifiedSince, cancellationToken), + => new Sitemap(ParseUrlSet(reader, () => stream.Position, modifiedSince, cancellationToken)), XmlReader when reader.NamespaceURI == sitemapNamespace && reader.Name == "sitemapindex" - => await ParseSitemapIndex(stream, reader, modifiedSince, cancellationToken), + => new SitemapIndex(ParseSitemapIndex(reader, () => stream.Position, modifiedSince, cancellationToken)), _ => throw new SitemapException("Unable to find root sitemap element") }; } @@ -47,67 +48,111 @@ public static async Task ReadFromStreamAsync(Stream stream, DateTime? m } } - private static async Task ParseSitemapIndex(Stream stream, XmlReader reader, DateTime? modifiedSince, CancellationToken cancellationToken) + private static async IAsyncEnumerable ParseSitemapIndex(XmlReader reader, Func getByteCount, DateTime? modifiedSince, [EnumeratorCancellation] CancellationToken cancellationToken) { - await reader.ReadAsync(); - - var uris = new HashSet(); - while (!reader.EOF && reader.ReadState is ReadState.Interactive && !cancellationToken.IsCancellationRequested) + try { - if (reader.NodeType is not XmlNodeType.Element || reader.Name != "sitemap" || reader.NamespaceURI != sitemapNamespace) + await reader.ReadAsync(); + + while (!reader.EOF && reader.ReadState is ReadState.Interactive && !cancellationToken.IsCancellationRequested) { - await reader.ReadAsync(); - continue; + if (reader.NodeType is not XmlNodeType.Element || reader.Name != "sitemap" || reader.NamespaceURI != sitemapNamespace) + { + await reader.ReadAsync(); + continue; + } + + XElement node; + try + { + node = (XElement)await XNode.ReadFromAsync(reader, cancellationToken); + } + catch (Exception e) + { + throw new SitemapException("Unable to parse sitemap item", e); + } + + if (getByteCount() > ByteCount50MiB) throw new SitemapException("Reached parsing limit"); + + Uri location; + try + { + var lastModifiedString = node.Element(sitemapNamespace + "lastmod")?.Value; + DateTime? lastModified = lastModifiedString is not null ? DateTime.Parse(lastModifiedString) : null; + if (modifiedSince is not null && lastModified is not null && lastModified < modifiedSince) continue; + location = new Uri(node.Element(sitemapNamespace + "loc")!.Value); + } + catch (Exception e) + { + throw new SitemapException("Unable to parse sitemap item", e); + } + + yield return location; } - - var node = (XElement)await XNode.ReadFromAsync(reader, cancellationToken); - - if (stream.Position > ByteCount50MiB) throw new SitemapException("Reached parsing limit"); - - var lastModifiedString = node.Element(sitemapNamespace + "lastmod")?.Value; - DateTime? lastModified = lastModifiedString is not null ? DateTime.Parse(lastModifiedString) : null; - - if (modifiedSince is not null && lastModified is not null && lastModified < modifiedSince) continue; - - var location = new Uri(node.Element(sitemapNamespace + "loc")!.Value); - - uris.Add(location); } - return new SitemapIndex(uris); + finally + { + reader.Dispose(); + } } - private static async Task ParseUrlSet(Stream stream, XmlReader reader, DateTime? modifiedSince, CancellationToken cancellationToken) + private static async IAsyncEnumerable ParseUrlSet(XmlReader reader, Func getByteCount, DateTime? modifiedSince, [EnumeratorCancellation] CancellationToken cancellationToken) { - await reader.ReadAsync(); - - var items = new HashSet(); - while (!reader.EOF && reader.ReadState is ReadState.Interactive && !cancellationToken.IsCancellationRequested) + try { - if (reader.NodeType is not XmlNodeType.Element || reader.Name != "url" || reader.NamespaceURI != sitemapNamespace) + await reader.ReadAsync(); + + while (!reader.EOF && reader.ReadState is ReadState.Interactive && !cancellationToken.IsCancellationRequested) { - await reader.ReadAsync(); - continue; + if (reader.NodeType is not XmlNodeType.Element || reader.Name != "url" || reader.NamespaceURI != sitemapNamespace) + { + await reader.ReadAsync(); + continue; + } + + XElement node; + try + { + node = (XElement)await XNode.ReadFromAsync(reader, cancellationToken); + } + catch (Exception e) + { + throw new SitemapException("Unable to parse sitemap item", e); + } + + if (getByteCount() > ByteCount50MiB) throw new SitemapException("Reached parsing limit"); + + Uri location; + DateTime? lastModified; + ChangeFrequency? changeFrequency; + decimal? priority; + + try + { + var lastModifiedString = node.Element(sitemapNamespace + "lastmod")?.Value; + lastModified = lastModifiedString is not null ? DateTime.Parse(lastModifiedString) : null; + + if (modifiedSince is not null && lastModified is not null && lastModified < modifiedSince) continue; + + location = new Uri(node.Element(sitemapNamespace + "loc")!.Value); + var changeFrequencyString = node.Element(sitemapNamespace + "changefreq")?.Value; + var priorityString = node.Element(sitemapNamespace + "priority")?.Value; + changeFrequency = changeFrequencyString is not null + ? Enum.Parse(changeFrequencyString, ignoreCase: true) + : null; + priority = priorityString is not null ? decimal.Parse(priorityString) : null; + } + catch (Exception e) + { + throw new SitemapException("Unable to parse sitemap item", e); + } + + yield return new UrlSetItem(location, lastModified, changeFrequency, priority); } - - var node = (XElement)await XNode.ReadFromAsync(reader, cancellationToken); - - if (stream.Position > ByteCount50MiB) throw new SitemapException("Reached parsing limit"); - - var lastModifiedString = node.Element(sitemapNamespace + "lastmod")?.Value; - DateTime? lastModified = lastModifiedString is not null ? DateTime.Parse(lastModifiedString) : null; - - if (modifiedSince is not null && lastModified is not null && lastModified < modifiedSince) continue; - - var location = new Uri(node.Element(sitemapNamespace + "loc")!.Value); - var changeFrequencyString = node.Element(sitemapNamespace + "changefreq")?.Value; - var priorityString = node.Element(sitemapNamespace + "priority")?.Value; - ChangeFrequency? changeFrequency = changeFrequencyString is not null - ? Enum.Parse(changeFrequencyString, ignoreCase: true) - : null; - decimal? priority = priorityString is not null ? decimal.Parse(priorityString) : null; - - items.Add(new UrlSetItem(location, lastModified, changeFrequency, priority)); } - return new Sitemap(items); + finally + { + reader.Dispose(); + } } } diff --git a/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtCrawlDelayTests.cs b/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtCrawlDelayTests.cs index 935f005..9fef553 100644 --- a/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtCrawlDelayTests.cs +++ b/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtCrawlDelayTests.cs @@ -16,7 +16,7 @@ public async Task NoMatchedRules_CrawlDelayNotSpecified_DefaultCrawlDelay() @"User-agent: AnotherBot Crawl-delay: 10 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -35,7 +35,7 @@ public async Task WildcardUserAgent_CrawlDelayNotSpecified_DefaultCrawlDelay() @"User-agent: * Disallow: "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -54,7 +54,7 @@ public async Task WildcardUserAgent_CrawlDelaySpecified_ReturnCrawlDelay() @"User-agent: * Crawl-delay: 10 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -73,7 +73,7 @@ public async Task WildcardUserAgent_NonStandardCaseCrawlDelaySpecified_ReturnCra @"User-agent: * crawl-delay: 10 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -95,7 +95,7 @@ public async Task MatchedUserAgent_NoCrawlDelaySpecified_DefaultCrawlDelay() User-agent: SomeBot Disallow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -117,7 +117,7 @@ public async Task MatchedUserAgent_CrawlDelaySpecified_ReturnCrawlDelay() User-agent: SomeBot Crawl-delay: 5 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -140,7 +140,7 @@ public async Task MatchedMultiLineUserAgent_NoCrawlDelaySpecified_DefaultCrawlDe User-agent: SomeOtherBot Disallow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -163,7 +163,7 @@ public async Task MatchedMultiLineUserAgent_CrawlDelaySpecified_ReturnCrawlDelay User-agent: SomeOtherBot Crawl-delay: 5 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -188,7 +188,7 @@ public async Task MatchedDuplicateGroupUserAgent_CrawlDelaySpecified_ReturnFirst User-agent: SomeBot Crawl-delay: 5 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); diff --git a/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtHostTests.cs b/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtHostTests.cs index 0dd72a5..1b93763 100644 --- a/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtHostTests.cs +++ b/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtHostTests.cs @@ -17,7 +17,7 @@ public async Task TryGetHost_HostNotSpecified_ReturnFalse() @"User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); _robotsClientMock.Setup(callTo => callTo.BaseAddress).Returns(new Uri("https://www.github.com")); @@ -40,7 +40,7 @@ public async Task TryGetHost_InvalidHostDirective_ReturnFalse() User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); _robotsClientMock.Setup(callTo => callTo.BaseAddress).Returns(new Uri("https://www.github.com")); @@ -63,7 +63,7 @@ public async Task TryGetHost_HostSpecified_ReturnTrue() User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); _robotsClientMock.Setup(callTo => callTo.BaseAddress).Returns(new Uri("https://www.github.com")); @@ -86,7 +86,7 @@ public async Task TryGetHost_NonStandardCaseHostSpecified_ReturnTrue() User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); _robotsClientMock.Setup(callTo => callTo.BaseAddress).Returns(new Uri("https://www.github.com")); @@ -109,7 +109,7 @@ public async Task TryGetHost_FullyQualifiedHostSpecified_ReturnTrue() User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); _robotsClientMock.Setup(callTo => callTo.BaseAddress).Returns(new Uri("https://www.github.com")); diff --git a/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtRuleCheckerTests.cs b/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtRuleCheckerTests.cs index a819dde..2dd3977 100644 --- a/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtRuleCheckerTests.cs +++ b/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtRuleCheckerTests.cs @@ -16,7 +16,7 @@ public async Task NoMatchedUserAgent_AnyPath_Allow() @"User-agent: AnotherBot Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -38,7 +38,7 @@ public async Task UserAgentWildcard_DisallowAll_RobotsTxtAllowed() User-agent: AnotherBot Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -60,7 +60,7 @@ public async Task UserAgentWildcard_DisallowPath_DisallowOnMatch() User-agent: AnotherBot Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -79,7 +79,7 @@ public async Task UserAgentWildcard_DisallowWildcardPath_DisallowOnMatch() @"User-agent: * Disallow: /some/*/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -98,7 +98,7 @@ public async Task UserAgentWildcard_DisallowDoubleWildcardPath_DisallowOnMatch() @"User-agent: * Disallow: /some/**/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -117,7 +117,7 @@ public async Task UserAgentWildcard_TwoPartWildcardPath_DisallowOnMatch() @"User-agent: * Disallow: /some/*/*/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -136,7 +136,7 @@ public async Task UserAgentWildcard_TwoPartWildcardPath_DisallowSubpathMatch() @"User-agent: * Disallow: /some/*/*/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -155,7 +155,7 @@ public async Task UserAgentWildcard_WildcardPathWithEndOfMatch_AllowSubpathMatch @"User-agent: * Disallow: /some/*/*/path$ "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -174,7 +174,7 @@ public async Task UserAgentWildcard_DisallowEndOfMatchPath_DisallowOnExactMatch( @"User-agent: * Disallow: /some/path$ "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -193,7 +193,7 @@ public async Task UserAgentWildcard_DisallowEndOfMatchPath_AllowOnSubPathMatch() @"User-agent: * Disallow: /some/path$ "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -215,7 +215,7 @@ public async Task UserAgentWildcard_DisallowPath_DisallowOnSubpath() User-agent: AnotherBot Disallow: "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -237,7 +237,7 @@ public async Task UserAgentWildcard_DisallowPath_AllowWhenDoesNotMatch() User-agent: AnotherBot Disallow: "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -259,7 +259,7 @@ public async Task MatchedUserAgent_NoDisallowRule_AllowAnyPath() User-agent: SomeBot Disallow: "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -281,7 +281,7 @@ public async Task MatchedUserAgent_DisallowAll_RobotsTxtAllowed() User-agent: SomeBot Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -303,7 +303,7 @@ public async Task MatchedUserAgent_DisallowPath_DisallowOnPathMatch() User-agent: SomeBot Disallow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -325,7 +325,7 @@ public async Task MatchedUserAgent_DisallowPath_DisallowOnSubpathMatch() User-agent: SomeBot Disallow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -347,7 +347,7 @@ public async Task UserAgentMatch_DisallowPath_AllowWhenPathDoesNotMatch() User-agent: SomeBot Disallow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -370,7 +370,7 @@ public async Task MultiLineUserAgentMatch_NoDisallowRule_AllowAnyPath() User-agent: SomeOtherBot Disallow: "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -393,7 +393,7 @@ public async Task MultiLineUserAgentMatch_DisallowPath_DisallowOnPathMatch() User-agent: SomeOtherBot Disallow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -416,7 +416,7 @@ public async Task MultiLineUserAgentMatch_DisallowPath_DisallowOnSubpathMatch() User-agent: SomeOtherBot Disallow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -439,7 +439,7 @@ public async Task MultiLineUserAgentMatch_DisallowPath_AllowWhenPathDoesNotMatch User-agent: SomeOtherBot Disallow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -464,7 +464,7 @@ public async Task MultiGroupUserAgentMatch_DisallowPaths_AllRulesRespected() User-agent: SomeBot Disallow: /yet/another/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -488,7 +488,7 @@ public async Task WildcardUserAgent_DisallowAllAndNoAllowPath_DisallowAll() User-agent: AnotherBot Allow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -508,7 +508,7 @@ public async Task WildcardUserAgent_DisallowAllAndAllowPath_DisallowIfNotAllowPa Disallow: / Allow: /some/other/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -528,7 +528,7 @@ public async Task WildcardUserAgent_DisallowAllAndAllowPath_AllowPathMatch() Disallow: / Allow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -548,7 +548,7 @@ public async Task UserAgentWildcard_DisallowAllAndAllowWildcardPath_AllowWildcar Disallow: / Allow: /some/*/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -568,7 +568,7 @@ public async Task WildcardUserAgentRuleMatch_DisallowAllAndAllowPath_AllowSubpat Disallow: / Allow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -588,7 +588,7 @@ public async Task WildcardUserAgent_BothAllowAndDisallowSamePath_PreferAllowRule Allow: /some/path Disallow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -607,7 +607,7 @@ public async Task WildcardUserAgent_DisallowPathWrongCase_Allow() @"User-agent: * Disallow: /SoMe/paTh "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -630,7 +630,7 @@ public async Task MatchedUserAgent_DisallowAllAndAllowPath_DisallowIfNotAllowPat Disallow: / Allow: /some/other/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -653,7 +653,7 @@ public async Task MatchedUserAgent_DisallowAllAndAllowPath_AllowPathMatch() Disallow: / Allow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -676,7 +676,7 @@ public async Task MatchedUserAgent_DisallowAllAndAllowPath_AllowSubpathMatch() Disallow: / Allow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -696,7 +696,7 @@ public async Task MatchedUserAgent_BothAllowAndDisallowSamePath_PreferAllowRule( Allow: /some/path Disallow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -716,7 +716,7 @@ public async Task MatchedUserAgent_BothDisallowAndAllowSamePath_PreferAllowRule( Disallow: /some/path Allow: /some/path "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -735,7 +735,7 @@ public async Task MatchedUserAgent_DisallowPathWrongCase_Allow() @"User-agent: SomeBot Disallow: /SoMe/paTh "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); diff --git a/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtSitemapTests.cs b/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtSitemapTests.cs index de3b002..814e740 100644 --- a/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtSitemapTests.cs +++ b/tests/Robots.Txt.Parser.Tests.Unit/RobotTxtSitemapTests.cs @@ -1,5 +1,4 @@ using System; -using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; @@ -23,16 +22,20 @@ public async Task LoadSitemapAsync_SitemapDirectiveExists_LoadSitemapDirective() User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + + _robotsClientMock.Setup(callTo => callTo.LoadSitemapsAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .Returns(Enumerable.Empty().ToAsyncEnumerable()); - // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); - await robotsTxt.LoadSitemapAsync(); + + // Act + await robotsTxt.LoadSitemapAsync().ToListAsync(); // Assert robotsTxt.Should().NotBe(null); _robotsClientMock.Verify(callTo => callTo.LoadSitemapsAsync( - It.Is>(uris => uris.SequenceEqual(new[] { new Uri("https://www.github.com/sitemap.xml") })), + new Uri("https://www.github.com/sitemap.xml"), null, default), Times.Once); } @@ -48,20 +51,24 @@ public async Task LoadSitemapAsync_MultipleSitemapDirectives_LoadMultipleUniqueS User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + + _robotsClientMock.Setup(callTo => callTo.LoadSitemapsAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .Returns(Enumerable.Empty().ToAsyncEnumerable()); - // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); - await robotsTxt.LoadSitemapAsync(); + + // Act + await robotsTxt.LoadSitemapAsync().ToListAsync(); // Assert robotsTxt.Should().NotBe(null); _robotsClientMock.Verify(callTo => callTo.LoadSitemapsAsync( - It.Is>(uris => uris.SequenceEqual(new[] - { - new Uri("https://www.github.com/sitemap.xml"), - new Uri("https://www.github.com/sitemap-2.xml"), - })), + new Uri("https://www.github.com/sitemap.xml"), + null, + default), Times.Once); + _robotsClientMock.Verify(callTo => callTo.LoadSitemapsAsync( + new Uri("https://www.github.com/sitemap-2.xml"), null, default), Times.Once); } @@ -77,19 +84,20 @@ public async Task LoadSitemapAsync_MultipleSitemapDirectives_RetrieveOneIfDuplic User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + + _robotsClientMock.Setup(callTo => callTo.LoadSitemapsAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .Returns(Enumerable.Empty().ToAsyncEnumerable()); - // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); - await robotsTxt.LoadSitemapAsync(); + + // Act + await robotsTxt.LoadSitemapAsync().ToListAsync(); // Assert robotsTxt.Should().NotBe(null); _robotsClientMock.Verify(callTo => callTo.LoadSitemapsAsync( - It.Is>(uris => uris.SequenceEqual(new[] - { - new Uri("https://www.github.com/sitemap.xml"), - })), + new Uri("https://www.github.com/sitemap.xml"), null, default), Times.Once); } @@ -104,18 +112,22 @@ public async Task LoadSitemapAsync_SitemapDirectiveExists_PassModifiedDate() User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + + _robotsClientMock.Setup(callTo => callTo.LoadSitemapsAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .Returns(Enumerable.Empty().ToAsyncEnumerable()); var modifiedDate = new DateTime(2023, 01, 01); - // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); - await robotsTxt.LoadSitemapAsync(modifiedDate); + + // Act + await robotsTxt.LoadSitemapAsync(modifiedDate).ToListAsync(); // Assert robotsTxt.Should().NotBe(null); _robotsClientMock.Verify(callTo => callTo.LoadSitemapsAsync( - It.IsAny>(), + It.IsAny(), modifiedDate, default), Times.Once); } @@ -130,19 +142,23 @@ public async Task LoadSitemapAsync_SitemapDirectiveExists_PassCancellationToken( User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + + _robotsClientMock.Setup(callTo => callTo.LoadSitemapsAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .Returns(Enumerable.Empty().ToAsyncEnumerable()); using var cancellationTokenSource = new CancellationTokenSource(); var cancellationToken = cancellationTokenSource.Token; - // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); - await robotsTxt.LoadSitemapAsync(cancellationToken: cancellationToken); + + // Act + await robotsTxt.LoadSitemapAsync(cancellationToken: cancellationToken).ToListAsync(); // Assert robotsTxt.Should().NotBe(null); _robotsClientMock.Verify(callTo => callTo.LoadSitemapsAsync( - It.IsAny>(), + It.IsAny(), null, cancellationToken), Times.Once); } @@ -155,22 +171,22 @@ public async Task LoadSitemapAsync_NoSitemapDirective_TryLoadDefaultSitemapIfNon @"User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); var baseAddress = new Uri("https://github.com"); _robotsClientMock.Setup(callTo => callTo.BaseAddress).Returns(baseAddress); + _robotsClientMock.Setup(callTo => callTo.LoadSitemapsAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .Returns(Enumerable.Empty().ToAsyncEnumerable()); - // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); - await robotsTxt.LoadSitemapAsync(); + + // Act + await robotsTxt.LoadSitemapAsync().ToListAsync(); // Assert robotsTxt.Should().NotBe(null); _robotsClientMock.Verify(callTo => callTo.LoadSitemapsAsync( - It.Is>(uris => uris.SequenceEqual(new[] - { - new Uri("https://github.com/sitemap.xml"), - })), + new Uri("https://github.com/sitemap.xml"), null, default), Times.Once); } @@ -183,20 +199,23 @@ public async Task LoadSitemapAsync_NoSitemapDirective_PassModifiedDate() @"User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); _robotsClientMock.Setup(callTo => callTo.BaseAddress).Returns(new Uri("https://github.com")); + _robotsClientMock.Setup(callTo => callTo.LoadSitemapsAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .Returns(Enumerable.Empty().ToAsyncEnumerable()); var modifiedDate = new DateTime(2023, 01, 01); - // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); - await robotsTxt.LoadSitemapAsync(modifiedDate); + + // Act + await robotsTxt.LoadSitemapAsync(modifiedDate).ToListAsync(); // Assert robotsTxt.Should().NotBe(null); _robotsClientMock.Verify(callTo => callTo.LoadSitemapsAsync( - It.IsAny>(), + It.IsAny(), modifiedDate, default), Times.Once); } @@ -209,21 +228,24 @@ public async Task LoadSitemapAsync_NoSitemapDirective_PassCancellationToken() @"User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); _robotsClientMock.Setup(callTo => callTo.BaseAddress).Returns(new Uri("https://github.com")); + _robotsClientMock.Setup(callTo => callTo.LoadSitemapsAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .Returns(Enumerable.Empty().ToAsyncEnumerable()); using var cancellationTokenSource = new CancellationTokenSource(); var cancellationToken = cancellationTokenSource.Token; - // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); - await robotsTxt.LoadSitemapAsync(cancellationToken: cancellationToken); + + // Act + await robotsTxt.LoadSitemapAsync(cancellationToken: cancellationToken).ToListAsync(); // Assert robotsTxt.Should().NotBe(null); _robotsClientMock.Verify(callTo => callTo.LoadSitemapsAsync( - It.IsAny>(), + It.IsAny(), null, cancellationToken), Times.Once); } diff --git a/tests/Robots.Txt.Parser.Tests.Unit/Robots.Txt.Parser.Tests.Unit.csproj b/tests/Robots.Txt.Parser.Tests.Unit/Robots.Txt.Parser.Tests.Unit.csproj index 6b9800a..839e339 100644 --- a/tests/Robots.Txt.Parser.Tests.Unit/Robots.Txt.Parser.Tests.Unit.csproj +++ b/tests/Robots.Txt.Parser.Tests.Unit/Robots.Txt.Parser.Tests.Unit.csproj @@ -22,6 +22,7 @@ all + diff --git a/tests/Robots.Txt.Parser.Tests.Unit/RobotsTxtParserTests.cs b/tests/Robots.Txt.Parser.Tests.Unit/RobotsTxtParserTests.cs index 3f19f79..f48810b 100644 --- a/tests/Robots.Txt.Parser.Tests.Unit/RobotsTxtParserTests.cs +++ b/tests/Robots.Txt.Parser.Tests.Unit/RobotsTxtParserTests.cs @@ -25,7 +25,7 @@ public async Task ReadFromStreamAsync_EmptyFile_LoadDefault() { // Arrange var file = ""; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -43,7 +43,7 @@ public async Task ReadFromStreamAsync_WithLineComments_CommentsIgnored() User-agent: * Disallow: / "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -60,7 +60,7 @@ public async Task ReadFromStreamAsync_WithEndOfLineComments_CommentsIgnored() @"User-agent: * # This line specifies any user agent Disallow: / # Directs the crawler to ignore the entire website "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); @@ -74,7 +74,7 @@ public async Task ReadFromStreamAsync_Exactly500KiB_DoNotThrow() { // Arrange var fileProvider = new EmbeddedFileProvider(Assembly.GetExecutingAssembly()); - var stream = fileProvider.GetFileInfo("exactly-500kib-robots.txt").CreateReadStream(); + await using var stream = fileProvider.GetFileInfo("exactly-500kib-robots.txt").CreateReadStream(); // Act var parse = async () => await _parser.ReadFromStreamAsync(stream); @@ -88,7 +88,7 @@ public async Task ReadFromStreamAsync_Over500KiB_ThrowRobotsTxtException() { // Arrange var fileProvider = new EmbeddedFileProvider(Assembly.GetExecutingAssembly()); - var stream = fileProvider.GetFileInfo("over-500kib-robots.txt").CreateReadStream(); + await using var stream = fileProvider.GetFileInfo("over-500kib-robots.txt").CreateReadStream(); // Act var parse = async () => await _parser.ReadFromStreamAsync(stream); @@ -111,7 +111,7 @@ public async Task ReadFromStreamAsync_InvalidProductToken_Ignore() User-agent: ValidProductToken Disallow: "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var robotsTxt = await _parser.ReadFromStreamAsync(stream); diff --git a/tests/Robots.Txt.Parser.Tests.Unit/SimpleTextSitemapParserTests.cs b/tests/Robots.Txt.Parser.Tests.Unit/SimpleTextSitemapParserTests.cs index 43b9c4a..9fbab11 100644 --- a/tests/Robots.Txt.Parser.Tests.Unit/SimpleTextSitemapParserTests.cs +++ b/tests/Robots.Txt.Parser.Tests.Unit/SimpleTextSitemapParserTests.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Linq; using System.Reflection; using System.Text; using System.Threading.Tasks; @@ -17,14 +18,13 @@ public async Task ReadAsStreamAsync_EmptyFile_ReturnEmptySitemap() { // Arrange var file = @""; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act - var sitemap = await SimpleTextSitemapParser.ReadFromStreamAsync(stream); + var urlSet = await SimpleTextSitemapParser.ReadFromStreamAsync(stream).ToListAsync(); // Assert - sitemap.Should().NotBe(null); - sitemap.UrlSet.Should().BeEmpty(); + urlSet.Should().BeEmpty(); } [Fact] @@ -33,10 +33,10 @@ public async Task ReadAsStreamAsync_InvalidFileStructure_ThrowSitemapException() // Arrange var file = @""; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act - var parse = async () => await SimpleTextSitemapParser.ReadFromStreamAsync(stream); + var parse = async () => await SimpleTextSitemapParser.ReadFromStreamAsync(stream).ToListAsync(); // Assert await parse.Should().ThrowAsync(); @@ -47,10 +47,10 @@ public async Task ReadAsStreamAsync_Over50000Lines_ThrowSitemapException() { // Arrange var fileProvider = new EmbeddedFileProvider(Assembly.GetExecutingAssembly()); - var stream = fileProvider.GetFileInfo("over-50k-lines-sitemap.txt").CreateReadStream(); + await using var stream = fileProvider.GetFileInfo("over-50k-lines-sitemap.txt").CreateReadStream(); // Act - var parse = async () => await SimpleTextSitemapParser.ReadFromStreamAsync(stream); + var parse = async () => await SimpleTextSitemapParser.ReadFromStreamAsync(stream).ToListAsync(); // Assert await parse.Should().ThrowAsync(); @@ -61,10 +61,10 @@ public async Task ReadAsStreamAsync_Exactly50000Lines_DoNotThrow() { // Arrange var fileProvider = new EmbeddedFileProvider(Assembly.GetExecutingAssembly()); - var stream = fileProvider.GetFileInfo("exactly-50k-lines-sitemap.txt").CreateReadStream(); + await using var stream = fileProvider.GetFileInfo("exactly-50k-lines-sitemap.txt").CreateReadStream(); // Act - var parse = async () => await SimpleTextSitemapParser.ReadFromStreamAsync(stream); + var parse = async () => await SimpleTextSitemapParser.ReadFromStreamAsync(stream).ToListAsync(); // Assert await parse.Should().NotThrowAsync(); @@ -75,10 +75,10 @@ public async Task ReadAsStreamAsync_Over50MiB_ThrowSitemapException() { // Arrange var fileProvider = new EmbeddedFileProvider(Assembly.GetExecutingAssembly()); - var stream = fileProvider.GetFileInfo("over-50mib-sitemap.txt").CreateReadStream(); + await using var stream = fileProvider.GetFileInfo("over-50mib-sitemap.txt").CreateReadStream(); // Act - var parse = async () => await SimpleTextSitemapParser.ReadFromStreamAsync(stream); + var parse = async () => await SimpleTextSitemapParser.ReadFromStreamAsync(stream).ToListAsync(); // Assert await parse.Should().ThrowAsync(); @@ -89,10 +89,10 @@ public async Task ReadAsStreamAsync_Exactly50MiB_DoNotThrow() { // Arrange var fileProvider = new EmbeddedFileProvider(Assembly.GetExecutingAssembly()); - var stream = fileProvider.GetFileInfo("exactly-50mib-sitemap.txt").CreateReadStream(); + await using var stream = fileProvider.GetFileInfo("exactly-50mib-sitemap.txt").CreateReadStream(); // Act - var parse = async () => await SimpleTextSitemapParser.ReadFromStreamAsync(stream); + var parse = async () => await SimpleTextSitemapParser.ReadFromStreamAsync(stream).ToListAsync(); // Assert await parse.Should().NotThrowAsync(); @@ -104,14 +104,13 @@ public async Task ReadAsStreamAsync_ValidFile_ReturnSitemap() // Arrange var file = @"https://github.com/organisations https://github.com/people"; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act - var sitemap = await SimpleTextSitemapParser.ReadFromStreamAsync(stream); + var urlSet = await SimpleTextSitemapParser.ReadFromStreamAsync(stream).ToListAsync(); // Assert - sitemap.Should().NotBe(null); - sitemap.UrlSet.Should().BeEquivalentTo(new HashSet + urlSet.Should().BeEquivalentTo(new HashSet { new (new Uri("https://github.com/organisations"), null, null, null), new (new Uri("https://github.com/people"), null, null, null), @@ -126,14 +125,13 @@ public async Task ReadAsStreamAsync_ValidFileWithWhitespaceLines_ReturnSitemap() https://github.com/organisations https://github.com/people"; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act - var sitemap = await SimpleTextSitemapParser.ReadFromStreamAsync(stream); + var urlSet = await SimpleTextSitemapParser.ReadFromStreamAsync(stream).ToListAsync(); // Assert - sitemap.Should().NotBe(null); - sitemap.UrlSet.Should().BeEquivalentTo(new HashSet + urlSet.Should().BeEquivalentTo(new HashSet { new (new Uri("https://github.com/organisations"), null, null, null), new (new Uri("https://github.com/people"), null, null, null), diff --git a/tests/Robots.Txt.Parser.Tests.Unit/SitemapParserTests.cs b/tests/Robots.Txt.Parser.Tests.Unit/SitemapParserTests.cs index 59c8cd4..39f11d3 100644 --- a/tests/Robots.Txt.Parser.Tests.Unit/SitemapParserTests.cs +++ b/tests/Robots.Txt.Parser.Tests.Unit/SitemapParserTests.cs @@ -1,5 +1,6 @@ using System; using System.IO; +using System.Linq; using System.Text; using System.Threading.Tasks; using FluentAssertions; @@ -14,7 +15,7 @@ public async Task ReadFromStreamAsync_EmptyFile_ThrowSitemapException() { // Arrange var file = @""; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var parse = async () => await SitemapParser.ReadFromStreamAsync(stream); @@ -35,7 +36,7 @@ public async Task ReadFromStreamAsync_ImproperXmlFormat_ThrowSitemapException() 2023-08-23 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var parse = async () => await SitemapParser.ReadFromStreamAsync(stream); @@ -45,7 +46,7 @@ public async Task ReadFromStreamAsync_ImproperXmlFormat_ThrowSitemapException() } [Fact] - public async Task ReadFromStreamAsync_SitemapIndexIncorrectLocationFormat_ThrowSitemapException() + public async Task ParseSitemapIndex_IncorrectLocationFormat_ThrowSitemapException() { // Arrange var file = @@ -55,17 +56,18 @@ public async Task ReadFromStreamAsync_SitemapIndexIncorrectLocationFormat_ThrowS invalid[/]location "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + var sitemap = (SitemapIndex)await SitemapParser.ReadFromStreamAsync(stream); // Act - var parse = async () => await SitemapParser.ReadFromStreamAsync(stream); + var parse = async () => await sitemap.SitemapUris.ToListAsync(); // Assert await parse.Should().ThrowExactlyAsync(); } [Fact] - public async Task ReadFromStreamAsync_SitemapIndexIncorrectDateFormat_ThrowSitemapException() + public async Task ParseSitemapIndex_IncorrectDateFormat_ThrowSitemapException() { // Arrange var file = @@ -76,17 +78,18 @@ public async Task ReadFromStreamAsync_SitemapIndexIncorrectDateFormat_ThrowSitem not-a-real-date "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + var sitemap = (SitemapIndex)await SitemapParser.ReadFromStreamAsync(stream); // Act - var parse = async () => await SitemapParser.ReadFromStreamAsync(stream); + var parse = async () => await sitemap.SitemapUris.ToListAsync(); // Assert await parse.Should().ThrowExactlyAsync(); } [Fact] - public async Task ReadFromStreamAsync_UrlSetIncorrectLocationFormat_ThrowSitemapException() + public async Task ParseUrlSet_IncorrectLocationFormat_ThrowSitemapException() { // Arrange var file = @@ -96,17 +99,18 @@ public async Task ReadFromStreamAsync_UrlSetIncorrectLocationFormat_ThrowSitemap invalid[/]location "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + var sitemap = await SitemapParser.ReadFromStreamAsync(stream); // Act - var parse = async () => await SitemapParser.ReadFromStreamAsync(stream); + var parse = async () => await sitemap.UrlSet.ToListAsync(); // Assert await parse.Should().ThrowExactlyAsync(); } [Fact] - public async Task ReadFromStreamAsync_UrlSetIncorrectDateFormat_ThrowSitemapException() + public async Task ParseUrlSet_IncorrectDateFormat_ThrowSitemapException() { // Arrange var file = @@ -117,17 +121,18 @@ public async Task ReadFromStreamAsync_UrlSetIncorrectDateFormat_ThrowSitemapExce not-a-real-date "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + var sitemap = await SitemapParser.ReadFromStreamAsync(stream); // Act - var parse = async () => await SitemapParser.ReadFromStreamAsync(stream); + var parse = async () => await sitemap.UrlSet.ToListAsync(); // Assert await parse.Should().ThrowExactlyAsync(); } [Fact] - public async Task ReadFromStreamAsync_UrlSetIncorrectChangeFrequencyFormat_ThrowSitemapException() + public async Task ParseUrlSet_IncorrectChangeFrequencyFormat_ThrowSitemapException() { // Arrange var file = @@ -138,17 +143,18 @@ public async Task ReadFromStreamAsync_UrlSetIncorrectChangeFrequencyFormat_Throw 1 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + var sitemap = await SitemapParser.ReadFromStreamAsync(stream); // Act - var parse = async () => await SitemapParser.ReadFromStreamAsync(stream); + var parse = async () => await sitemap.UrlSet.ToListAsync(); // Assert await parse.Should().ThrowExactlyAsync(); } [Fact] - public async Task ReadFromStreamAsync_UrlSetIncorrectPriorityFormat_ThrowSitemapException() + public async Task ParseUrlSet_IncorrectPriorityFormat_ThrowSitemapException() { // Arrange var file = @@ -159,10 +165,11 @@ public async Task ReadFromStreamAsync_UrlSetIncorrectPriorityFormat_ThrowSitemap high "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + var sitemap = await SitemapParser.ReadFromStreamAsync(stream); // Act - var parse = async () => await SitemapParser.ReadFromStreamAsync(stream); + var parse = async () => await sitemap.UrlSet.ToListAsync(); // Assert await parse.Should().ThrowExactlyAsync(); @@ -184,15 +191,17 @@ public async Task ReadFromStreamAsync_SitemapIndexNoModifiedDateFilter_ParseCorr 2023-10-01 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var sitemap = await SitemapParser.ReadFromStreamAsync(stream); // Assert var sitemapRoot = sitemap.Should().BeOfType().Subject; - sitemap.UrlSet.Should().BeEmpty(); - sitemapRoot.SitemapUris.Should().BeEquivalentTo(new[] + var urlSet = await sitemap.UrlSet.ToListAsync(); + var sitemapUris = await sitemapRoot.SitemapUris.ToListAsync(); + urlSet.Should().BeEmpty(); + sitemapUris.Should().BeEquivalentTo(new[] { new Uri("https://www.github.com/organisations.xml"), new Uri("https://www.github.com/people.xml"), @@ -215,15 +224,17 @@ public async Task ReadFromStreamAsync_SitemapIndexEarlierModifiedDateFilter_Pars 2023-10-01 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var sitemap = await SitemapParser.ReadFromStreamAsync(stream, new DateTime(2023, 08, 22)); // Assert var sitemapRoot = sitemap.Should().BeOfType().Subject; - sitemap.UrlSet.Should().BeEmpty(); - sitemapRoot.SitemapUris.Should().BeEquivalentTo(new[] + var urlSet = await sitemap.UrlSet.ToListAsync(); + var sitemapUris = await sitemapRoot.SitemapUris.ToListAsync(); + urlSet.Should().BeEmpty(); + sitemapUris.Should().BeEquivalentTo(new[] { new Uri("https://www.github.com/organisations.xml"), new Uri("https://www.github.com/people.xml"), @@ -246,15 +257,17 @@ public async Task ReadFromStreamAsync_SitemapIndexSameModifiedDateFilter_ParseCo 2023-10-01 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var sitemap = await SitemapParser.ReadFromStreamAsync(stream, new DateTime(2023, 08, 23)); // Assert var sitemapRoot = sitemap.Should().BeOfType().Subject; - sitemap.UrlSet.Should().BeEmpty(); - sitemapRoot.SitemapUris.Should().BeEquivalentTo(new[] + var urlSet = await sitemap.UrlSet.ToListAsync(); + var sitemapUris = await sitemapRoot.SitemapUris.ToListAsync(); + urlSet.Should().BeEmpty(); + sitemapUris.Should().BeEquivalentTo(new[] { new Uri("https://www.github.com/organisations.xml"), new Uri("https://www.github.com/people.xml"), @@ -277,15 +290,17 @@ public async Task ReadFromStreamAsync_SitemapIndexExceedsModifiedDateFilter_Pars 2023-10-01 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var sitemap = await SitemapParser.ReadFromStreamAsync(stream, new DateTime(2023, 08, 24)); // Assert var sitemapRoot = sitemap.Should().BeOfType().Subject; - sitemap.UrlSet.Should().BeEmpty(); - sitemapRoot.SitemapUris.Should().BeEquivalentTo(new[] { new Uri("https://www.github.com/people.xml") }); + var urlSet = await sitemap.UrlSet.ToListAsync(); + var sitemapUris = await sitemapRoot.SitemapUris.ToListAsync(); + urlSet.Should().BeEmpty(); + sitemapUris.Should().BeEquivalentTo(new[] { new Uri("https://www.github.com/people.xml") }); } [Fact] @@ -302,13 +317,14 @@ public async Task ReadFromStreamAsync_UrlSetLocationOnlyNoModifiedDateFilter_Par https://www.github.com/drmathias/Robots.Txt.Parser "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var sitemap = await SitemapParser.ReadFromStreamAsync(stream); // Assert - sitemap.UrlSet.Should().BeEquivalentTo(new[] + var urlSet = await sitemap.UrlSet.ToListAsync(); + urlSet.Should().BeEquivalentTo(new[] { new UrlSetItem(new Uri("https://www.github.com/drmathias"), null, null, null), new UrlSetItem(new Uri("https://www.github.com/drmathias/Robots.Txt.Parser"), null, null, null), @@ -329,13 +345,14 @@ public async Task ReadFromStreamAsync_UrlSetLocationOnlyModifiedDateFilter_Parse https://www.github.com/drmathias/Robots.Txt.Parser "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var sitemap = await SitemapParser.ReadFromStreamAsync(stream, new DateTime(2024, 01, 01)); // Assert - sitemap.UrlSet.Should().BeEquivalentTo(new[] + var urlSet = await sitemap.UrlSet.ToListAsync(); + urlSet.Should().BeEquivalentTo(new[] { new UrlSetItem(new Uri("https://www.github.com/drmathias"), null, null, null), new UrlSetItem(new Uri("https://www.github.com/drmathias/Robots.Txt.Parser"), null, null, null), @@ -362,13 +379,14 @@ public async Task ReadFromStreamAsync_UrlSetAllPropertiesNoFilter_ParseCorrectly 0.5 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var sitemap = await SitemapParser.ReadFromStreamAsync(stream); // Assert - sitemap.UrlSet.Should().BeEquivalentTo(new[] + var urlSet = await sitemap.UrlSet.ToListAsync(); + urlSet.Should().BeEquivalentTo(new[] { new UrlSetItem(new Uri("https://www.github.com/drmathias"), new DateTime(2023, 06, 01), ChangeFrequency.Daily, 0.8m), new UrlSetItem(new Uri("https://www.github.com/drmathias/Robots.Txt.Parser"), new DateTime(2023, 05, 12), ChangeFrequency.Monthly, 0.5m), @@ -395,13 +413,14 @@ public async Task ReadFromStreamAsync_UrlSetAllPropertiesEarlierModifiedDateFilt 0.5 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var sitemap = await SitemapParser.ReadFromStreamAsync(stream, new DateTime(2023, 01, 01)); // Assert - sitemap.UrlSet.Should().BeEquivalentTo(new[] + var urlSet = await sitemap.UrlSet.ToListAsync(); + urlSet.Should().BeEquivalentTo(new[] { new UrlSetItem(new Uri("https://www.github.com/drmathias"), new DateTime(2023, 06, 01), ChangeFrequency.Daily, 0.8m), new UrlSetItem(new Uri("https://www.github.com/drmathias/Robots.Txt.Parser"), new DateTime(2023, 05, 12), ChangeFrequency.Monthly, 0.5m), @@ -428,13 +447,14 @@ public async Task ReadFromStreamAsync_UrlSetAllPropertiesEqualModifiedDateFilter 0.5 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var sitemap = await SitemapParser.ReadFromStreamAsync(stream, new DateTime(2023, 05, 12)); // Assert - sitemap.UrlSet.Should().BeEquivalentTo(new[] + var urlSet = await sitemap.UrlSet.ToListAsync(); + urlSet.Should().BeEquivalentTo(new[] { new UrlSetItem(new Uri("https://www.github.com/drmathias"), new DateTime(2023, 06, 01), ChangeFrequency.Daily, 0.8m), new UrlSetItem(new Uri("https://www.github.com/drmathias/Robots.Txt.Parser"), new DateTime(2023, 05, 12), ChangeFrequency.Monthly, 0.5m), @@ -461,13 +481,14 @@ public async Task ReadFromStreamAsync_UrlSetAllPropertiesLaterModifiedDateFilter 0.5 "; - var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(file)); // Act var sitemap = await SitemapParser.ReadFromStreamAsync(stream, new DateTime(2023, 05, 13)); // Assert - sitemap.UrlSet.Should().BeEquivalentTo(new[] + var urlSet = await sitemap.UrlSet.ToListAsync(); + urlSet.Should().BeEquivalentTo(new[] { new UrlSetItem(new Uri("https://www.github.com/drmathias"), new DateTime(2023, 06, 01), ChangeFrequency.Daily, 0.8m), });