-
Notifications
You must be signed in to change notification settings - Fork 68
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add cosmetic rules store with tests * Add support for cosmetic hiding rules * Refactor cosmetic rule handling, remove cosmetic rules from ignoreLineRegex * Rewrite cosmetic rule store to triestore for better wildcards support, change ignoreLineRegexp to match hosts comments * Optimize CSS injection by batching selectors for improved performance * Use htmlrewrite package to replace head contents * Sanitize css selecter when adding cosmetic rules * Refactor CSS selector sanitization and improve error handling
- Loading branch information
1 parent
61525ec
commit 3761c04
Showing
8 changed files
with
644 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
package cosmetic | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"net" | ||
"regexp" | ||
"strings" | ||
) | ||
|
||
var ( | ||
// RuleRegex matches cosmetic rules. | ||
RuleRegex = regexp.MustCompile(`^(?:([^#$]+?)##|##)(.+)$`) | ||
|
||
errUnsupportedSyntax = errors.New("unsupported syntax") | ||
) | ||
|
||
func (inj *Injector) AddRule(rule string) error { | ||
|
||
var rawHostnames string | ||
var selector string | ||
|
||
if match := RuleRegex.FindStringSubmatch(rule); match != nil { | ||
rawHostnames = match[1] | ||
selector = match[2] | ||
} else { | ||
return errUnsupportedSyntax | ||
} | ||
|
||
sanitizedSelector, err := sanitizeCSSSelector(selector) | ||
if err != nil { | ||
return fmt.Errorf("failed to sanitize selector: %w", err) | ||
} | ||
|
||
if len(rawHostnames) == 0 { | ||
inj.store.Add(nil, sanitizedSelector) | ||
return nil | ||
} | ||
|
||
hostnames := strings.Split(rawHostnames, ",") | ||
subdomainHostnames := make([]string, 0, len(hostnames)) | ||
for _, hostname := range hostnames { | ||
if len(hostname) == 0 { | ||
return errors.New("empty hostnames are not allowed") | ||
} | ||
|
||
if net.ParseIP(hostname) == nil && !strings.HasPrefix(hostname, "*.") { | ||
subdomainHostnames = append(subdomainHostnames, "*."+hostname) | ||
} | ||
} | ||
inj.store.Add(hostnames, sanitizedSelector) | ||
inj.store.Add(subdomainHostnames, sanitizedSelector) | ||
|
||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
package cosmetic | ||
|
||
import ( | ||
"bytes" | ||
"errors" | ||
"fmt" | ||
"log" | ||
"net/http" | ||
"strings" | ||
|
||
"github.com/anfragment/zen/internal/htmlrewrite" | ||
"github.com/anfragment/zen/internal/logger" | ||
) | ||
|
||
var ( | ||
styleOpeningTag = []byte("<style>") | ||
styleClosingTag = []byte("</style>") | ||
) | ||
|
||
type Injector struct { | ||
// store stores and retrieves css by hostname. | ||
store Store | ||
} | ||
|
||
type Store interface { | ||
Add(hostnames []string, selector string) | ||
Get(hostname string) []string | ||
} | ||
|
||
func NewInjector(store Store) (*Injector, error) { | ||
if store == nil { | ||
return nil, errors.New("store is nil") | ||
} | ||
|
||
return &Injector{ | ||
store: store, | ||
}, nil | ||
} | ||
|
||
func (inj *Injector) Inject(req *http.Request, res *http.Response) error { | ||
hostname := req.URL.Hostname() | ||
selectors := inj.store.Get(hostname) | ||
log.Printf("got %d cosmetic rules for %q", len(selectors), logger.Redacted(hostname)) | ||
if len(selectors) == 0 { | ||
return nil | ||
} | ||
|
||
var ruleInjection bytes.Buffer | ||
ruleInjection.Write(styleOpeningTag) | ||
css := generateBatchedCSS(selectors) | ||
ruleInjection.WriteString(css) | ||
ruleInjection.Write(styleClosingTag) | ||
|
||
htmlrewrite.ReplaceHeadContents(res, func(match []byte) []byte { | ||
return bytes.Join([][]byte{match, ruleInjection.Bytes()}, nil) | ||
}) | ||
|
||
return nil | ||
} | ||
|
||
func generateBatchedCSS(selectors []string) string { | ||
const batchSize = 100 | ||
|
||
var builder strings.Builder | ||
for i := 0; i < len(selectors); i += batchSize { | ||
end := i + batchSize | ||
if end > len(selectors) { | ||
end = len(selectors) | ||
} | ||
batch := selectors[i:end] | ||
|
||
joinedSelectors := strings.Join(batch, ",") | ||
builder.WriteString(fmt.Sprintf("%s{display:none!important;}", joinedSelectors)) | ||
} | ||
|
||
return builder.String() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
package cosmetic | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"regexp" | ||
"strconv" | ||
"strings" | ||
) | ||
|
||
// sanitizeCSSSelector validates and sanitizes a CSS selector. | ||
func sanitizeCSSSelector(selectorInput string) (string, error) { | ||
if strings.Contains(selectorInput, "</style>") { | ||
return "", errors.New("selector contains '</style>' which is not allowed") | ||
} | ||
|
||
selector := decodeUnicodeEscapes(selectorInput) | ||
if !hasBalancedQuotesAndBrackets(selector) { | ||
return "", errors.New("selector has unbalanced quotes or brackets") | ||
} | ||
|
||
if err := validateSelector(selector); err != nil { | ||
return "", fmt.Errorf("sanitize selector: %w", err) | ||
} | ||
|
||
return selector, nil | ||
} | ||
|
||
// decodeUnicodeEscapes replaces CSS Unicode escapes with their actual characters. | ||
func decodeUnicodeEscapes(s string) string { | ||
re := regexp.MustCompile(`\\([0-9A-Fa-f]{1,6})(\s)?`) | ||
return re.ReplaceAllStringFunc(s, func(match string) string { | ||
submatches := re.FindStringSubmatch(match) | ||
if len(submatches) < 2 { | ||
return match | ||
} | ||
hexDigits := submatches[1] | ||
r, err := strconv.ParseInt(hexDigits, 16, 32) | ||
if err != nil { | ||
return match | ||
} | ||
return string(rune(r)) | ||
}) | ||
} | ||
|
||
// hasBalancedQuotesAndBrackets checks for balanced quotes and brackets in the selector. | ||
func hasBalancedQuotesAndBrackets(s string) bool { | ||
var stack []rune | ||
inSingleQuote := false | ||
inDoubleQuote := false | ||
escaped := false | ||
|
||
for _, c := range s { | ||
if escaped { | ||
escaped = false | ||
continue | ||
} | ||
|
||
if c == '\\' { | ||
escaped = true | ||
continue | ||
} | ||
|
||
if inSingleQuote { | ||
if c == '\'' { | ||
inSingleQuote = false | ||
} | ||
continue | ||
} | ||
|
||
if inDoubleQuote { | ||
if c == '"' { | ||
inDoubleQuote = false | ||
} | ||
continue | ||
} | ||
|
||
if c == '\'' { | ||
inSingleQuote = true | ||
continue | ||
} | ||
|
||
if c == '"' { | ||
inDoubleQuote = true | ||
continue | ||
} | ||
|
||
if c == '(' || c == '[' || c == '{' { | ||
stack = append(stack, c) | ||
} else if c == ')' || c == ']' || c == '}' { | ||
if len(stack) == 0 { | ||
return false | ||
} | ||
last := stack[len(stack)-1] | ||
if (c == ')' && last != '(') || | ||
(c == ']' && last != '[') || | ||
(c == '}' && last != '{') { | ||
return false | ||
} | ||
stack = stack[:len(stack)-1] | ||
} | ||
} | ||
|
||
return !inSingleQuote && !inDoubleQuote && len(stack) == 0 && !escaped | ||
} | ||
|
||
// validateSelector checks for dangerous sequences in the selector. | ||
func validateSelector(s string) error { | ||
inSingleQuote := false | ||
inDoubleQuote := false | ||
escaped := false | ||
runes := []rune(s) | ||
|
||
for i := 0; i < len(runes); i++ { | ||
c := runes[i] | ||
|
||
if escaped { | ||
escaped = false | ||
continue | ||
} | ||
|
||
if c == '\\' { | ||
escaped = true | ||
continue | ||
} | ||
|
||
if inSingleQuote { | ||
if c == '\'' { | ||
inSingleQuote = false | ||
} | ||
continue | ||
} | ||
|
||
if inDoubleQuote { | ||
if c == '"' { | ||
inDoubleQuote = false | ||
} | ||
continue | ||
} | ||
|
||
if c == '\'' { | ||
inSingleQuote = true | ||
continue | ||
} | ||
|
||
if c == '"' { | ||
inDoubleQuote = true | ||
continue | ||
} | ||
|
||
if !inSingleQuote && !inDoubleQuote { | ||
// Check for dangerous sequences. | ||
if c == '/' && i+1 < len(runes) && runes[i+1] == '*' { | ||
return errors.New("found '/*' outside of quotes") | ||
} | ||
|
||
if c == '*' && i+1 < len(runes) && runes[i+1] == '/' { | ||
return errors.New("found '*/' outside of quotes") | ||
} | ||
|
||
if c == '{' || c == '}' || c == ';' || c == '@' { | ||
return fmt.Errorf("found dangerous character '%c' outside of quotes", c) | ||
} | ||
} | ||
} | ||
|
||
return nil | ||
} |
Oops, something went wrong.