Skip to content

Commit

Permalink
Cosmetic rules (#168)
Browse files Browse the repository at this point in the history
* Add cosmetic rules store with tests

* Add support for cosmetic hiding rules

* Refactor cosmetic rule handling, remove cosmetic rules from ignoreLineRegex

* Rewrite cosmetic rule store to triestore for better wildcards support, change ignoreLineRegexp to match hosts comments

* Optimize CSS injection by batching selectors for improved performance

* Use htmlrewrite package to replace head contents

* Sanitize css selecter when adding cosmetic rules

* Refactor CSS selector sanitization and improve error handling
  • Loading branch information
AitakattaSora authored Dec 20, 2024
1 parent 61525ec commit 3761c04
Show file tree
Hide file tree
Showing 8 changed files with 644 additions and 14 deletions.
10 changes: 9 additions & 1 deletion internal/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import (
"github.com/anfragment/zen/internal/certgen"
"github.com/anfragment/zen/internal/certstore"
"github.com/anfragment/zen/internal/cfg"
"github.com/anfragment/zen/internal/cosmetic"
cosmeticTrieStore "github.com/anfragment/zen/internal/cosmetic/triestore"
"github.com/anfragment/zen/internal/filter"
"github.com/anfragment/zen/internal/jsrule"
"github.com/anfragment/zen/internal/logger"
Expand Down Expand Up @@ -172,9 +174,15 @@ func (a *App) StartProxy() (err error) {
return fmt.Errorf("create scriptlets injector: %v", err)
}

cosmeticRulesStore := cosmeticTrieStore.NewTrieStore()
cosmeticRulesInjector, err := cosmetic.NewInjector(cosmeticRulesStore)
if err != nil {
return fmt.Errorf("create cosmetic rules injector: %v", err)
}

jsRuleInjector := jsrule.NewInjector()

filter, err := filter.NewFilter(a.config, ruleMatcher, exceptionRuleMatcher, scriptletInjector, jsRuleInjector, a.eventsHandler)
filter, err := filter.NewFilter(a.config, ruleMatcher, exceptionRuleMatcher, scriptletInjector, cosmeticRulesInjector, jsRuleInjector, a.eventsHandler)
if err != nil {
return fmt.Errorf("create filter: %v", err)
}
Expand Down
55 changes: 55 additions & 0 deletions internal/cosmetic/addrule.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package cosmetic

import (
"errors"
"fmt"
"net"
"regexp"
"strings"
)

var (
// RuleRegex matches cosmetic rules.
RuleRegex = regexp.MustCompile(`^(?:([^#$]+?)##|##)(.+)$`)

errUnsupportedSyntax = errors.New("unsupported syntax")
)

func (inj *Injector) AddRule(rule string) error {

var rawHostnames string
var selector string

if match := RuleRegex.FindStringSubmatch(rule); match != nil {
rawHostnames = match[1]
selector = match[2]
} else {
return errUnsupportedSyntax
}

sanitizedSelector, err := sanitizeCSSSelector(selector)
if err != nil {
return fmt.Errorf("failed to sanitize selector: %w", err)
}

if len(rawHostnames) == 0 {
inj.store.Add(nil, sanitizedSelector)
return nil
}

hostnames := strings.Split(rawHostnames, ",")
subdomainHostnames := make([]string, 0, len(hostnames))
for _, hostname := range hostnames {
if len(hostname) == 0 {
return errors.New("empty hostnames are not allowed")
}

if net.ParseIP(hostname) == nil && !strings.HasPrefix(hostname, "*.") {
subdomainHostnames = append(subdomainHostnames, "*."+hostname)
}
}
inj.store.Add(hostnames, sanitizedSelector)
inj.store.Add(subdomainHostnames, sanitizedSelector)

return nil
}
77 changes: 77 additions & 0 deletions internal/cosmetic/injector.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package cosmetic

import (
"bytes"
"errors"
"fmt"
"log"
"net/http"
"strings"

"github.com/anfragment/zen/internal/htmlrewrite"
"github.com/anfragment/zen/internal/logger"
)

var (
styleOpeningTag = []byte("<style>")
styleClosingTag = []byte("</style>")
)

type Injector struct {
// store stores and retrieves css by hostname.
store Store
}

type Store interface {
Add(hostnames []string, selector string)
Get(hostname string) []string
}

func NewInjector(store Store) (*Injector, error) {
if store == nil {
return nil, errors.New("store is nil")
}

return &Injector{
store: store,
}, nil
}

func (inj *Injector) Inject(req *http.Request, res *http.Response) error {
hostname := req.URL.Hostname()
selectors := inj.store.Get(hostname)
log.Printf("got %d cosmetic rules for %q", len(selectors), logger.Redacted(hostname))
if len(selectors) == 0 {
return nil
}

var ruleInjection bytes.Buffer
ruleInjection.Write(styleOpeningTag)
css := generateBatchedCSS(selectors)
ruleInjection.WriteString(css)
ruleInjection.Write(styleClosingTag)

htmlrewrite.ReplaceHeadContents(res, func(match []byte) []byte {
return bytes.Join([][]byte{match, ruleInjection.Bytes()}, nil)
})

return nil
}

func generateBatchedCSS(selectors []string) string {
const batchSize = 100

var builder strings.Builder
for i := 0; i < len(selectors); i += batchSize {
end := i + batchSize
if end > len(selectors) {
end = len(selectors)
}
batch := selectors[i:end]

joinedSelectors := strings.Join(batch, ",")
builder.WriteString(fmt.Sprintf("%s{display:none!important;}", joinedSelectors))
}

return builder.String()
}
168 changes: 168 additions & 0 deletions internal/cosmetic/sanitizer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
package cosmetic

import (
"errors"
"fmt"
"regexp"
"strconv"
"strings"
)

// sanitizeCSSSelector validates and sanitizes a CSS selector.
func sanitizeCSSSelector(selectorInput string) (string, error) {
if strings.Contains(selectorInput, "</style>") {
return "", errors.New("selector contains '</style>' which is not allowed")
}

selector := decodeUnicodeEscapes(selectorInput)
if !hasBalancedQuotesAndBrackets(selector) {
return "", errors.New("selector has unbalanced quotes or brackets")
}

if err := validateSelector(selector); err != nil {
return "", fmt.Errorf("sanitize selector: %w", err)
}

return selector, nil
}

// decodeUnicodeEscapes replaces CSS Unicode escapes with their actual characters.
func decodeUnicodeEscapes(s string) string {
re := regexp.MustCompile(`\\([0-9A-Fa-f]{1,6})(\s)?`)
return re.ReplaceAllStringFunc(s, func(match string) string {
submatches := re.FindStringSubmatch(match)
if len(submatches) < 2 {
return match
}
hexDigits := submatches[1]
r, err := strconv.ParseInt(hexDigits, 16, 32)
if err != nil {
return match
}
return string(rune(r))
})
}

// hasBalancedQuotesAndBrackets checks for balanced quotes and brackets in the selector.
func hasBalancedQuotesAndBrackets(s string) bool {
var stack []rune
inSingleQuote := false
inDoubleQuote := false
escaped := false

for _, c := range s {
if escaped {
escaped = false
continue
}

if c == '\\' {
escaped = true
continue
}

if inSingleQuote {
if c == '\'' {
inSingleQuote = false
}
continue
}

if inDoubleQuote {
if c == '"' {
inDoubleQuote = false
}
continue
}

if c == '\'' {
inSingleQuote = true
continue
}

if c == '"' {
inDoubleQuote = true
continue
}

if c == '(' || c == '[' || c == '{' {
stack = append(stack, c)
} else if c == ')' || c == ']' || c == '}' {
if len(stack) == 0 {
return false
}
last := stack[len(stack)-1]
if (c == ')' && last != '(') ||
(c == ']' && last != '[') ||
(c == '}' && last != '{') {
return false
}
stack = stack[:len(stack)-1]
}
}

return !inSingleQuote && !inDoubleQuote && len(stack) == 0 && !escaped
}

// validateSelector checks for dangerous sequences in the selector.
func validateSelector(s string) error {
inSingleQuote := false
inDoubleQuote := false
escaped := false
runes := []rune(s)

for i := 0; i < len(runes); i++ {
c := runes[i]

if escaped {
escaped = false
continue
}

if c == '\\' {
escaped = true
continue
}

if inSingleQuote {
if c == '\'' {
inSingleQuote = false
}
continue
}

if inDoubleQuote {
if c == '"' {
inDoubleQuote = false
}
continue
}

if c == '\'' {
inSingleQuote = true
continue
}

if c == '"' {
inDoubleQuote = true
continue
}

if !inSingleQuote && !inDoubleQuote {
// Check for dangerous sequences.
if c == '/' && i+1 < len(runes) && runes[i+1] == '*' {
return errors.New("found '/*' outside of quotes")
}

if c == '*' && i+1 < len(runes) && runes[i+1] == '/' {
return errors.New("found '*/' outside of quotes")
}

if c == '{' || c == '}' || c == ';' || c == '@' {
return fmt.Errorf("found dangerous character '%c' outside of quotes", c)
}
}
}

return nil
}
Loading

0 comments on commit 3761c04

Please sign in to comment.