Skip to content

Commit

Permalink
stats: Measure percentiles, max etc.
Browse files Browse the repository at this point in the history
  • Loading branch information
varungandhi-src committed Apr 26, 2024
1 parent f6c9fe6 commit 4ac738a
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 7 deletions.
76 changes: 69 additions & 7 deletions cmd/scip/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ import (
"path/filepath"

"github.com/hhatto/gocloc"
"github.com/montanaflynn/stats"
"github.com/urfave/cli/v2"
"google.golang.org/protobuf/proto"

"github.com/sourcegraph/sourcegraph/lib/errors"

Expand Down Expand Up @@ -59,35 +61,95 @@ func statsMain(flags statsFlags) error {
return nil
}

type Stats struct {
Percentiles struct {
Fifty int32 `json:"50"`
Ninety int32 `json:"90"`
NinetyFive int32 `json:"95"`
NinetyNine int32 `json:"99"`
NinetyNinePointNine int32 `json:"99.9"`
}
Mean int32 `json:"mean"`
Stddev int32 `json:"stddev"`
Max int32 `json:"max"`
Sum int32 `json:"sum"`
Comment string `json:"comment"`
}

func NewStats(values []float64) Stats {
s := Stats{}
s.Percentiles.Fifty = percentile(values, 50)
s.Percentiles.Ninety = percentile(values, 90)
s.Percentiles.NinetyFive = percentile(values, 95)
s.Percentiles.NinetyNine = percentile(values, 99)
s.Percentiles.NinetyNinePointNine = percentile(values, 99.9)
mean, _ := stats.Mean(values)
s.Mean = int32(mean)
stddev, _ := stats.StandardDeviation(values)
s.Stddev = int32(stddev)
max, _ := stats.Max(values)
s.Max = int32(max)
sum, _ := stats.Sum(values)
s.Sum = int32(sum)
return s
}

type indexStatistics struct {
Documents int32 `json:"documents"`
LinesOfCode int32 `json:"linesOfCode"`
Occurrences int32 `json:"occurrences"`
Definitions int32 `json:"definitions"`
Documents int32 `json:"documents"`
DocumentSizes Stats `json:"documentSizes"`
LinesOfCode int32 `json:"linesOfCode"`
Occurrences int32 `json:"occurrences"`
OccurrenceCounts Stats `json:"occurrenceCounts"`
Definitions int32 `json:"definitions"`
DefinitionCounts Stats `json:"definitionCounts"`
}

func countStatistics(index *scip.Index, customProjectRoot string) (*indexStatistics, error) {
loc, err := countLinesOfCode(index, customProjectRoot)
var linesOfCode int32
if err != nil {
return nil, err
// Keep this a non-fatal error so that we can measure other index stats
// even if the project is not cloned locally (e.g. if it's a huge
// project like Chromium or the Linux kernel).
log.Printf("Couldn't count lines of code: %s", err)
} else {
linesOfCode = loc.Total.Code
}
stats := &indexStatistics{
Documents: int32(len(index.Documents)),
LinesOfCode: loc.Total.Code,
LinesOfCode: linesOfCode,
Occurrences: 0,
Definitions: 0,
}
documentSizes := []float64{}
occurrenceCounts := []float64{}
definitionCounts := []float64{}
for _, document := range index.Documents {
bytes, _ := proto.Marshal(document)
documentSizes = append(documentSizes, float64(len(bytes)))
stats.Occurrences += int32(len(document.Occurrences))
occurrenceCounts = append(occurrenceCounts, float64(len(document.Occurrences)))
definitionCounts = append(definitionCounts, 0)
for _, occurrence := range document.Occurrences {
stats.Occurrences += 1
if scip.SymbolRole_Definition.Matches(occurrence) {
stats.Definitions += 1
definitionCounts[len(definitionCounts)-1] += 1
}
}
}
stats.DocumentSizes = NewStats(documentSizes)
stats.DocumentSizes.Comment = "sizes are in bytes"
stats.OccurrenceCounts = NewStats(occurrenceCounts)
stats.DefinitionCounts = NewStats(definitionCounts)
stats.DefinitionCounts.Comment = "counted using occurrences"
return stats, nil
}

func percentile(buf []float64, percent float64) int32 {
res, _ := stats.Percentile(buf, percent)
return int32(res)
}

func countLinesOfCode(index *scip.Index, customProjectRoot string) (*gocloc.Result, error) {
var localSource string
root, err := url.Parse(index.Metadata.ProjectRoot)
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require (
github.com/hexops/gotextdiff v1.0.3
github.com/hhatto/gocloc v0.4.2
github.com/k0kubun/pp/v3 v3.1.0
github.com/montanaflynn/stats v0.7.1
github.com/pseudomuto/protoc-gen-doc v1.5.1
github.com/smacker/go-tree-sitter v0.0.0-20220209044044-0d3022e933c3
github.com/sourcegraph/sourcegraph/lib v0.0.0-20220511160847-5a43d3ea24eb
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,8 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE=
github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ=
Expand Down

0 comments on commit 4ac738a

Please sign in to comment.