grafana · cyriltovena · Jan 16, 2025 · Jan 15, 2025 · Jan 16, 2025 · Jan 16, 2025
@@ -421,6 +421,9 @@ func (l equalFilter) String() string {
 }
 
 func newEqualFilter(match []byte, caseInsensitive bool) MatcherFilterer {
+	if caseInsensitive {
+		match = bytes.ToLower(match)
+	}
 	return equalFilter{match, caseInsensitive}
 }
 
@@ -441,7 +444,7 @@ func contains(line, substr []byte, caseInsensitive bool) bool {
 }
 
 // containsLower verifies if substr is a substring of line, with case insensitive comparison.
-// substr is expected to be in lowercase.
+// substr MUST be in lowercase before calling this function.
 func containsLower(line, substr []byte) bool {
 	if len(substr) == 0 {
 		return true
@@ -458,7 +461,11 @@ func containsLower(line, substr []byte) bool {
 	for i <= maxIndex {
 		// Find potential first byte match
 		c := line[i]
-		if c != firstByte && c+'a'-'A' != firstByte && c != firstByte+'a'-'A' {
+		// Fast path for ASCII - if c is uppercase letter, convert to lowercase
+		if c >= 'A' && c <= 'Z' {
+			c += 'a' - 'A'
+		}
+		if c != firstByte {
 			i++
 			continue
 		}
@@ -472,9 +479,13 @@ func containsLower(line, substr []byte) bool {
 			c := line[linePos]
 			s := substr[substrPos]
 
-			// Fast ASCII comparison
+			// Fast path for ASCII
 			if c < utf8.RuneSelf && s < utf8.RuneSelf {
-				if c != s && c+'a'-'A' != s && c != s+'a'-'A' {
+				// Convert line char to lowercase if needed
+				if c >= 'A' && c <= 'Z' {
+					c += 'a' - 'A'
+				}
+				if c != s {
 					matched = false
 					break
 				}
@@ -485,13 +496,28 @@ func containsLower(line, substr []byte) bool {
 
 			// Slower Unicode path only when needed
 			lr, lineSize := utf8.DecodeRune(line[linePos:])
-			mr, substrSize := utf8.DecodeRune(substr[substrPos:])
+			if lr == utf8.RuneError && lineSize == 1 {
+				// Invalid UTF-8, treat as raw bytes
+				if c >= 'A' && c <= 'Z' {
+					c += 'a' - 'A'
+				}
+				if c != s {
+					matched = false
+					break
+				}
+				linePos++
+				substrPos++
+				continue
+			}
 
-			if lr == utf8.RuneError || mr == utf8.RuneError {
+			mr, substrSize := utf8.DecodeRune(substr[substrPos:])
+			if mr == utf8.RuneError && substrSize == 1 {
+				// Invalid UTF-8 in pattern (shouldn't happen as substr should be valid)
 				matched = false
 				break
 			}
 
+			// Compare line rune converted to lowercase with pattern (which is already lowercase)
 			if unicode.ToLower(lr) != mr {
 				matched = false
 				break

@@ -220,87 +220,158 @@ func Test_rune(t *testing.T) {
 	require.True(t, newContainsFilter([]byte("foo"), true).Filter([]byte("foo")))
 }
 
-func BenchmarkContainsLower(b *testing.B) {
-	cases := []struct {
-		name     string
-		line     string
-		substr   string
-		expected bool
-	}{
-		{
-			name:     "short_line_no_match",
-			line:     "this is a short log line",
-			substr:   "missing",
-			expected: false,
-		},
-		{
-			name:     "short_line_with_match",
-			line:     "this is a short log line",
-			substr:   "SHORT",
-			expected: true,
-		},
-		{
-			name:     "long_line_no_match",
-			line:     "2023-06-14T12:34:56.789Z INFO  [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
-			substr:   "nonexistent",
-			expected: false,
-		},
-		{
-			name:     "long_line_match_start",
-			line:     "2023-06-14T12:34:56.789Z INFO  [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
-			substr:   "2023",
-			expected: true,
-		},
-		{
-			name:     "long_line_match_middle",
-			line:     "2023-06-14T12:34:56.789Z INFO  [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
-			substr:   "LEVELS",
-			expected: true,
-		},
-		{
-			name:     "long_line_match_end",
-			line:     "2023-06-14T12:34:56.789Z INFO  [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
-			substr:   "status",
-			expected: true,
-		},
-		{
-			name:     "short_unicode_line_no_match",
-			line:     "🌟 Unicode line with emojis 🎉 and special chars ñ é ß",
-			substr:   "missing",
-			expected: false,
-		},
-		{
-			name:     "short_unicode_line_with_match",
-			line:     "🌟 Unicode line with emojis 🎉 and special chars ñ é ß",
-			substr:   "EMOJIS",
-			expected: true,
-		},
-		{
-			name:     "long_unicode_line_no_match",
-			line:     "2023-06-14T12:34:56.789Z 🚀 [микросервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
-			substr:   "nonexistent",
-			expected: false,
-		},
-		{
-			name:     "long_unicode_line_match_start",
-			line:     "2023-06-14T12:34:56.789Z 🚀[МИКРОСервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
-			substr:   "микросервис",
-			expected: true,
-		},
-		{
-			name:     "long_unicode_line_match_middle",
-			line:     "2023-06-14T12:34:56.789Z 🚀 [микросервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
-			substr:   "UNICODE",
-			expected: true,
-		},
-		{
-			name:     "long_unicode_line_match_end",
-			line:     "2023-06-14T12:34:56.789Z 🚀 [микросервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
-			substr:   "τέλος",
-			expected: true,
-		},
+var cases = []struct {
+	name     string
+	line     string
+	substr   string
+	expected bool
+}{
+	{
+		name:     "short_line_no_match",
+		line:     "this is a short log line",
+		substr:   "missing",
+		expected: false,
+	},
+	{
+		name:     "short_line_no_match_special_chars",
+		line:     "this contains a \\ character",
+		substr:   "|",
+		expected: false,
+	},
+	{
+		name:     "short_line_no_match_special_chars_match",
+		line:     "this contains a | character",
+		substr:   "|",
+		expected: true,
+	},
+	{
+		name:     "short_line_with_match",
+		line:     "this is a shorT log line",
+		substr:   "short",
+		expected: true,
+	},
+	{
+		name:     "long_line_no_match",
+		line:     "2023-06-14T12:34:56.789Z INFO  [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
+		substr:   "nonexistent",
+		expected: false,
+	},
+	{
+		name:     "long_line_match_start",
+		line:     "2023-06-14T12:34:56.789Z INFO  [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
+		substr:   "2023",
+		expected: true,
+	},
+	{
+		name:     "long_line_match_middle",
+		line:     "2023-06-14T12:34:56.789Z INFO  [service_name] This is a much longer log line with timestamps, leVelS and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
+		substr:   "levels",
+		expected: true,
+	},
+	{
+		name:     "long_line_match_end",
+		line:     "2023-06-14T12:34:56.789Z INFO  [service_name] This is a much longer log line with timestamps, levels and other information that typically appears in production logs. RequestID=123456 UserID=789 Action=GetUser Duration=123ms Status=200",
+		substr:   "status",
+		expected: true,
+	},
+	{
+		name:     "short_unicode_line_no_match",
+		line:     "🌟 Unicode line with emojis 🎉 and special chars ñ é ß",
+		substr:   "missing",
+		expected: false,
+	},
+	{
+		name:     "short_unicode_line_with_match",
+		line:     "🌟 Unicode line with eMojiS 🎉 and special chars ñ é ß",
+		substr:   "emojis",
+		expected: true,
+	},
+	{
+		name:     "long_unicode_line_no_match",
+		line:     "2023-06-14T12:34:56.789Z 🚀 [микросервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
+		substr:   "nonexistent",
+		expected: false,
+	},
+	{
+		name:     "long_unicode_line_match_start",
+		line:     "2023-06-14T12:34:56.789Z 🚀[МИКРОСервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
+		substr:   "микросервис",
+		expected: true,
+	},
+	{
+		name:     "long_unicode_line_match_middle",
+		line:     "2023-06-14T12:34:56.789Z 🚀 [микросервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
+		substr:   "unicode",
+		expected: true,
+	},
+	{
+		name:     "long_unicode_line_match_end",
+		line:     "2023-06-14T12:34:56.789Z 🚀 [микросервис] Длинное сообщение с Unicode символами 统一码 が大好き! エラー分析: システムは正常に動作しています。RequestID=123456 状態=良好 Résultat=Succès ß=γ 🎯 τέλος",
+		substr:   "τέλος",
+		expected: true,
+	},
+	{
+		name:     "utf8_case_insensitive_match_middle",
+		line:     "ΣΑΣ ΓΕΙΑ ΚΟΣΜΕ", // "WORLD HELLO WORLD" in Greek uppercase
+		substr:   "γεια",           // "hello" in Greek lowercase
+		expected: true,
+	},
+	{
+		name:     "utf8_case_insensitive_no_match",
+		line:     "ΣΑΣ ΚΟΣΜΕ", // "WORLD WORLD" in Greek uppercase
+		substr:   "γεια",      // "hello" in Greek lowercase
+		expected: false,
+	},
+	{
+		name:     "empty_substr",
+		line:     "any line",
+		substr:   "",
+		expected: true,
+	},
+	{
+		name:     "empty_line",
+		line:     "",
+		substr:   "something",
+		expected: false,
+	},
+	{
+		name:     "both_empty",
+		line:     "",
+		substr:   "",
+		expected: true,
+	},
+	{
+		name:     "substr_longer_than_line",
+		line:     "short",
+		substr:   "longer than line",
+		expected: false,
+	},
+	{
+		name:     "invalid_utf8_in_line",
+		line:     string([]byte{0xFF, 0xFE, 0xFD}),
+		substr:   "test",
+		expected: false,
+	},
+	{
+		name:     "partial_utf8_match",
+		line:     "Hello 世界", // "Hello World" with CJK characters
+		substr:   "世",        // Just "World"
+		expected: true,
+	},
+}
+
+func Test_containsLower(t *testing.T) {
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			line := []byte(c.line)
+			substr := []byte(c.substr)
+			m := containsLower(line, substr)
+			require.Equal(t, c.expected, m, "line: %s substr: %s", c.line, c.substr)
+		})
 	}
+}
 
+func BenchmarkContainsLower(b *testing.B) {
 	var m bool
 	for _, c := range cases {
 		b.Run(c.name, func(b *testing.B) {

@@ -586,6 +586,20 @@ func Test_FilterMatcher(t *testing.T) {
 			},
 			[]linecheck{{"counter=1", false}, {"counter=0", false}, {"counter=-1", true}, {"counter=-2", true}},
 		},
+		{
+			`{app="foo"} |~ "\\|"`,
+			[]*labels.Matcher{
+				mustNewMatcher(labels.MatchEqual, "app", "foo"),
+			},
+			[]linecheck{{"\\", false}, {"|", true}},
+		},
+		{
+			`{app="foo"} |~ "(?i)\\|"`,
+			[]*labels.Matcher{
+				mustNewMatcher(labels.MatchEqual, "app", "foo"),
+			},
+			[]linecheck{{"\\", false}, {"|", true}},
+		},
 	} {
 		t.Run(tt.q, func(t *testing.T) {
 			t.Parallel()