Skip to content

Commit

Permalink
Ability to sanitise metric identifiers (#50)
Browse files Browse the repository at this point in the history
* Sanitize metric identifiers

* fix linter issues

* cleanup m3 repo

* Add sanitise functor

* Address feedback

* Add benchmarks

* cleanup comments

* sanitise fn comments

* better benchmarks

* s/generation/allocation

* typo fix

* feedback: range check in hot path

* feedback: no ptr receiver for sanitiser

* add unit tests for sanitised scopes

* cleanup unexported types

* relocate test vars

* feedback: immutabletags rename

* sanitise fullyQualifiedName

* optimise santisation

* fix typo

* range check optimise
  • Loading branch information
prateek authored and xichen2020 committed Jul 27, 2017
1 parent 88e9c75 commit ae42cdc
Show file tree
Hide file tree
Showing 7 changed files with 485 additions and 53 deletions.
51 changes: 23 additions & 28 deletions m3/reporter_benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,6 @@ package m3
import (
"testing"
"time"

customtransport "github.com/uber-go/tally/m3/customtransports"
m3thrift "github.com/uber-go/tally/m3/thrift"

"github.com/apache/thrift/lib/go/thrift"
)

const (
Expand All @@ -41,10 +36,13 @@ const (
)

func BenchmarkNewMetric(b *testing.B) {
protocolFactory := thrift.NewTCompactProtocolFactory()
resourcePool := newResourcePool(protocolFactory)
benchReporter := &reporter{resourcePool: resourcePool}

r, _ := NewReporter(Options{
HostPorts: []string{"127.0.0.1:9052"},
Service: "test-service",
CommonTags: defaultCommonTags,
})
defer r.Close()
benchReporter := r.(*reporter)
b.ResetTimer()

for n := 0; n < b.N; n++ {
Expand All @@ -53,9 +51,13 @@ func BenchmarkNewMetric(b *testing.B) {
}

func BenchmarkCalulateSize(b *testing.B) {
protocolFactory := thrift.NewTCompactProtocolFactory()
resourcePool := newResourcePool(protocolFactory)
benchReporter := &reporter{resourcePool: resourcePool}
r, _ := NewReporter(Options{
HostPorts: []string{"127.0.0.1:9052"},
Service: "test-service",
CommonTags: defaultCommonTags,
})
defer r.Close()
benchReporter := r.(*reporter)

val := int64(123456)
met := benchReporter.newMetric("foo", nil, counterType)
Expand All @@ -69,26 +71,19 @@ func BenchmarkCalulateSize(b *testing.B) {
}

func BenchmarkTimer(b *testing.B) {
protocolFactory := thrift.NewTCompactProtocolFactory()
resourcePool := newResourcePool(protocolFactory)
tags := resourcePool.getTagList()
batch := resourcePool.getBatch()
batch.CommonTags = tags
batch.Metrics = []*m3thrift.Metric{}
proto := resourcePool.getProto()
batch.Write(proto)
calc := proto.Transport().(*customtransport.TCalcTransport)
calc.ResetCount()
benchReporter := &reporter{
calc: calc,
calcProto: proto,
resourcePool: resourcePool,
metCh: make(chan sizedMetric, DefaultMaxQueueSize),
}
r, _ := NewReporter(Options{
HostPorts: []string{"127.0.0.1:9052"},
Service: "test-service",
CommonTags: defaultCommonTags,
})
defer r.Close()
benchReporter := r.(*reporter)
benchReporter.metCh = make(chan sizedMetric, DefaultMaxQueueSize)
// Close the met ch to end consume metrics loop
defer close(benchReporter.metCh)

go func() {
resourcePool := benchReporter.resourcePool
// Blindly consume metrics
for met := range benchReporter.metCh {
resourcePool.releaseShallowMetric(met.m)
Expand Down
44 changes: 44 additions & 0 deletions m3/sanitise.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (c) 2017 Uber Technologies, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

package m3

import (
"github.com/uber-go/tally"
)

var (
// DefaultSanitiserOpts are the options for the default M3 Sanitiser
DefaultSanitiserOpts = tally.SanitiseOptions{
NameCharacters: tally.ValidCharacters{
Ranges: tally.AlphanumericRange,
Characters: tally.UnderscoreDashDotCharacters,
},
KeyCharacters: tally.ValidCharacters{
Ranges: tally.AlphanumericRange,
Characters: tally.UnderscoreDashCharacters,
},
ValueCharacters: tally.ValidCharacters{
Ranges: tally.AlphanumericRange,
Characters: tally.UnderscoreDashDotCharacters,
},
ReplacementCharacter: tally.DefaultReplacementCharacter,
}
)
172 changes: 172 additions & 0 deletions sanitise.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// Copyright (c) 2017 Uber Technologies, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

package tally

import (
"bytes"
)

var (
// DefaultReplacementCharacter is the default character used for
// replacements.
DefaultReplacementCharacter = '_'

// AlphanumericRange is the range of alphanumeric characters.
AlphanumericRange = []SanitiseRange{
{rune('a'), rune('z')},
{rune('A'), rune('Z')},
{rune('0'), rune('9')}}

// UnderscoreDashCharacters is a slice of underscore, and
// dash characters.
UnderscoreDashCharacters = []rune{
'-',
'_'}

// UnderscoreDashDotCharacters is a slice of underscore,
// dash, and dot characters.
UnderscoreDashDotCharacters = []rune{
'.',
'-',
'_'}
)

// SanitiseFn returns a sanitised version of the input string.
type SanitiseFn func(string) string

// SanitiseRange is a range of characters (inclusive on both ends).
type SanitiseRange [2]rune

// ValidCharacters is a collection of valid characters.
type ValidCharacters struct {
Ranges []SanitiseRange
Characters []rune
}

// SanitiseOptions are the set of configurable options for sanitisation.
type SanitiseOptions struct {
NameCharacters ValidCharacters
KeyCharacters ValidCharacters
ValueCharacters ValidCharacters
ReplacementCharacter rune
}

// Sanitiser sanitises the provided input based on the function executed.
type Sanitiser interface {
// Name sanitises the provided `name` string.
Name(n string) string

// Key sanitises the provided `key` string.
Key(k string) string

// Value sanitises the provided `value` string.
Value(v string) string
}

// NewSanitiser returns a new sanitiser based on provided options.
func NewSanitiser(opts SanitiseOptions) Sanitiser {
return sanitiser{
nameFn: opts.NameCharacters.sanitiseFn(opts.ReplacementCharacter),
keyFn: opts.KeyCharacters.sanitiseFn(opts.ReplacementCharacter),
valueFn: opts.ValueCharacters.sanitiseFn(opts.ReplacementCharacter),
}
}

// NoOpSanitiseFn returns the input un-touched.
func NoOpSanitiseFn(v string) string { return v }

// NewNoOpSanitiser returns a sanitiser which returns all inputs un-touched.
func NewNoOpSanitiser() Sanitiser {
return sanitiser{
nameFn: NoOpSanitiseFn,
keyFn: NoOpSanitiseFn,
valueFn: NoOpSanitiseFn,
}
}

type sanitiser struct {
nameFn SanitiseFn
keyFn SanitiseFn
valueFn SanitiseFn
}

func (s sanitiser) Name(n string) string {
return s.nameFn(n)
}

func (s sanitiser) Key(k string) string {
return s.keyFn(k)
}

func (s sanitiser) Value(v string) string {
return s.valueFn(v)
}

func (c *ValidCharacters) sanitiseFn(repChar rune) SanitiseFn {
return func(value string) string {
var buf *bytes.Buffer
for idx, ch := range value {
// first check if the provided character is valid
validCurr := false
for i := 0; !validCurr && i < len(c.Ranges); i++ {
if ch >= c.Ranges[i][0] && ch <= c.Ranges[i][1] {
validCurr = true
break
}
}
for i := 0; !validCurr && i < len(c.Characters); i++ {
if c.Characters[i] == ch {
validCurr = true
break
}
}

// if it's valid, we can optimise allocations by avoiding copying
if validCurr {
if buf == nil {
continue // haven't deviated from string, still no need to init buffer
}
buf.WriteRune(ch) // we've deviated from string, write to buffer
continue
}

// ie the character is invalid, and the buffer has not been initialised
// so we initialise buffer and backfill
if buf == nil {
buf = bytes.NewBuffer(make([]byte, 0, len(value)))
if idx > 0 {
buf.WriteString(value[:idx])
}
}

// write the replacement character
buf.WriteRune(repChar)
}

// return input un-touched if the buffer has been not initialised
if buf == nil {
return value
}

// otherwise, return the newly constructed buffer
return buf.String()
}
}
60 changes: 60 additions & 0 deletions sanitise_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright (c) 2017 Uber Technologies, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

package tally

import (
"testing"

"github.com/stretchr/testify/require"
)

func newTestSanitiser() SanitiseFn {
c := &ValidCharacters{
Ranges: AlphanumericRange,
Characters: UnderscoreDashCharacters,
}
return c.sanitiseFn(DefaultReplacementCharacter)
}

func TestSanitiseIdentifierAllValidCharacters(t *testing.T) {
allValidChars := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_"
fn := newTestSanitiser()
require.Equal(t, allValidChars, fn(allValidChars))
}

func TestSanitiseTestCases(t *testing.T) {
fn := newTestSanitiser()
type testCase struct {
input string
output string
}

testCases := []testCase{
{"abcdef0AxS-s_Z", "abcdef0AxS-s_Z"},
{"a:b", "a_b"},
{"a! b", "a__b"},
{"?bZ", "_bZ"},
}

for _, tc := range testCases {
require.Equal(t, tc.output, fn(tc.input))
}
}
Loading

0 comments on commit ae42cdc

Please sign in to comment.