Skip to content

Commit

Permalink
add: exclude archive.org & archive-it.org by default
Browse files Browse the repository at this point in the history
  • Loading branch information
CorentinB committed Aug 21, 2024
1 parent f3ae9cf commit 1257ff4
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion internal/pkg/crawl/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,10 @@ func GenerateCrawlConfig(config *config.Config) (*Crawl, error) {
c.DomainsCrawl = config.DomainsCrawl
c.DisableAssetsCapture = config.DisableAssetsCapture
c.DisabledHTMLTags = config.DisableHTMLTag
c.ExcludedHosts = config.ExcludeHosts

// We exclude some hosts by default
c.ExcludedHosts = utils.DedupeStrings(append(config.ExcludeHosts, "archive.org", "archive-it.org"))

c.IncludedHosts = config.IncludeHosts
c.CaptureAlternatePages = config.CaptureAlternatePages
c.ExcludedStrings = config.ExcludeString
Expand Down

0 comments on commit 1257ff4

Please sign in to comment.