forked from janisz/junit2jira
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add cli for checking CI test flakiness
- Loading branch information
Showing
6 changed files
with
574 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
/flakechecker | ||
/junit2jira | ||
.idea | ||
# Binaries for programs and plugins | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
package main | ||
|
||
import ( | ||
"cloud.google.com/go/bigquery" | ||
"context" | ||
"github.com/pkg/errors" | ||
log "github.com/sirupsen/logrus" | ||
"google.golang.org/api/iterator" | ||
"time" | ||
) | ||
|
||
const projectID = "acs-san-stackroxci" | ||
const queryTimeout = 1 * time.Minute | ||
const queryStrGetFailureRatio = ` | ||
SELECT | ||
JobName, | ||
FilteredName, | ||
Classname, | ||
TotalAll, | ||
FailRatio | ||
FROM | ||
` + "`acs-san-stackroxci.ci_metrics.stackrox_tests_recent_flaky_tests`" + ` | ||
WHERE | ||
JobName = @jobName | ||
AND FilteredName = @filteredName | ||
AND Classname = @classname | ||
` | ||
|
||
type biqQueryClient interface { | ||
GetRatioForTest(flakeTestConfig *flakeCheckerRecord, testName string) (int, int, error) | ||
} | ||
|
||
type biqQueryClientImpl struct { | ||
client *bigquery.Client | ||
} | ||
|
||
func getNewBigQueryClient() (biqQueryClient, error) { | ||
ctx := context.Background() | ||
|
||
client, err := bigquery.NewClient(ctx, projectID) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "creating BigQuery client") | ||
} | ||
|
||
return &biqQueryClientImpl{client: client}, nil | ||
} | ||
|
||
func (c *biqQueryClientImpl) GetRatioForTest(flakeTestRec *flakeCheckerRecord, testName string) (int, int, error) { | ||
query := c.client.Query(queryStrGetFailureRatio) | ||
query.Parameters = []bigquery.QueryParameter{ | ||
{Name: "jobName", Value: flakeTestRec.config.RatioJobName}, | ||
{Name: "filteredName", Value: testName}, | ||
{Name: "classname", Value: flakeTestRec.config.Classname}, | ||
} | ||
|
||
ctx, cancelBigQueryRequest := context.WithTimeout(context.Background(), queryTimeout) | ||
defer cancelBigQueryRequest() | ||
|
||
resIter, err := query.Read(ctx) | ||
if err != nil { | ||
return 0, 0, errors.Wrap(err, "query data from BigQuery") | ||
} | ||
|
||
// We need only first record. No need to loop over iterator. | ||
var record recentFlakyTestsRecord | ||
if errNext := resIter.Next(&record); errNext != nil { | ||
return 0, 0, errors.Wrap(errNext, "read BigQuery record") | ||
} | ||
|
||
if errNext := resIter.Next(&record); !errors.Is(errNext, iterator.Done) { | ||
log.Warnf("Expected to find one row in DB, but got more for query params: %v - query: %s", query.Parameters, queryStrGetFailureRatio) | ||
} | ||
|
||
return record.TotalAll, record.FailRatio, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
package main | ||
|
||
import ( | ||
"encoding/json" | ||
"fmt" | ||
"github.com/pkg/errors" | ||
"io" | ||
"os" | ||
"regexp" | ||
) | ||
|
||
// flakeCheckerRecord represents configuration record used by flakechecker to evaluate failed tests | ||
// | ||
// It contains the following fields: | ||
// match_job_name - name of the job that should be evaluated by flakechecker. i.e. (branch should be evaluated, but main not) | ||
// ratio_job_name - job name that should be used for ratio calculation. i.e. we take main branch test runs as base for evaluation of flake ratio | ||
// test_name_regex - regex used to match test names. Some test names contain detailed information (i.e. version 4.4.4), but we want to use ratio for all tests in that group (i.e. 4.4.z). Using regex allow us to group tests differently. | ||
// classname - class name of the test that should be isolated. With this option we can isolate single flake test from suite and isolate only that one from the rest. | ||
// ratio_threshold - failure percentage that is allowed for this test. This information is usually fetched from historical executions and data collected in DB. | ||
type flakeCheckerRecordConfig struct { | ||
MatchJobName string `json:"match_job_name"` | ||
RatioJobName string `json:"ratio_job_name"` | ||
TestNameRegex string `json:"test_name_regex"` | ||
Classname string `json:"classname"` | ||
RatioThreshold int `json:"ratio_threshold"` | ||
} | ||
|
||
type flakeCheckerRecord struct { | ||
config *flakeCheckerRecordConfig | ||
regexMatchJobName *regexp.Regexp | ||
regexTestNameRegex *regexp.Regexp | ||
} | ||
|
||
func newFlakeCheckerRecord(config *flakeCheckerRecordConfig) (*flakeCheckerRecord, error) { | ||
regexMatchJobName, err := regexp.Compile(fmt.Sprintf("^%s$", config.MatchJobName)) | ||
if err != nil { | ||
return nil, errors.Wrap(err, fmt.Sprintf("invalid flake config match job regex: %v", config.MatchJobName)) | ||
} | ||
|
||
regexTestNameRegex, err := regexp.Compile(fmt.Sprintf("^%s$", config.TestNameRegex)) | ||
if err != nil { | ||
return nil, errors.Wrap(err, fmt.Sprintf("invalid flake config test name regex: %v", config.TestNameRegex)) | ||
} | ||
|
||
return &flakeCheckerRecord{ | ||
config: config, | ||
regexMatchJobName: regexMatchJobName, | ||
regexTestNameRegex: regexTestNameRegex, | ||
}, nil | ||
} | ||
|
||
// newFlakeCheckerRecordMust - is primarily used in tests. | ||
func newFlakeCheckerRecordMust(config *flakeCheckerRecordConfig) *flakeCheckerRecord { | ||
flakeRecord, err := newFlakeCheckerRecord(config) | ||
if err != nil { | ||
panic(err) | ||
} | ||
|
||
return flakeRecord | ||
} | ||
|
||
func (r *flakeCheckerRecord) matchJobName(jobName string) (bool, error) { | ||
return r.regexMatchJobName.MatchString(jobName), nil | ||
} | ||
|
||
func (r *flakeCheckerRecord) matchTestName(testName string) (bool, error) { | ||
return r.regexTestNameRegex.MatchString(testName), nil | ||
} | ||
|
||
func (r *flakeCheckerRecord) matchClassname(classname string) (bool, error) { | ||
return classname == r.config.Classname, nil | ||
} | ||
|
||
func loadFlakeConfigFile(fileName string) ([]*flakeCheckerRecord, error) { | ||
jsonConfigFile, err := os.Open(fileName) | ||
if err != nil { | ||
return nil, errors.Wrap(err, fmt.Sprintf("open flake config file: %s", fileName)) | ||
} | ||
defer jsonConfigFile.Close() | ||
|
||
jsonConfigFileData, err := io.ReadAll(jsonConfigFile) | ||
if err != nil { | ||
return nil, errors.Wrap(err, fmt.Sprintf("read flake config file: %s", fileName)) | ||
} | ||
|
||
flakeConfigs := make([]*flakeCheckerRecordConfig, 0) | ||
err = json.Unmarshal(jsonConfigFileData, &flakeConfigs) | ||
if err != nil { | ||
return nil, errors.Wrap(err, fmt.Sprintf("parse flake config file: %s", fileName)) | ||
} | ||
|
||
flakeRecords := make([]*flakeCheckerRecord, 0, len(flakeConfigs)) | ||
for _, flakeConfig := range flakeConfigs { | ||
flakeRecord, errFlakeRecord := newFlakeCheckerRecord(flakeConfig) | ||
if errFlakeRecord != nil { | ||
return nil, errors.Wrap(err, fmt.Sprintf("create record from config: %v", flakeConfig)) | ||
} | ||
|
||
flakeRecords = append(flakeRecords, flakeRecord) | ||
} | ||
|
||
return flakeRecords, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
package main | ||
|
||
import ( | ||
_ "embed" | ||
"flag" | ||
"fmt" | ||
"github.com/carlmjohnson/versioninfo" | ||
junit "github.com/joshdk/go-junit" | ||
"github.com/pkg/errors" | ||
log "github.com/sirupsen/logrus" | ||
"github.com/stackrox/junit2jira/pkg/testcase" | ||
"os" | ||
) | ||
|
||
const totalRunsLimit = 30 | ||
|
||
const errDescNoMatch = "there is no match in allowed flake tests" | ||
const errDescAboveThreshold = "allowed flake ratio for test is above threshold" | ||
const errDescShortHistory = "total runs for test is under history count threshold" | ||
const errDescGetRatio = "get ratio for test failed" | ||
|
||
type flakeCheckerParams struct { | ||
junitReportsDir string | ||
configFile string | ||
|
||
jobName string | ||
orchestrator string | ||
|
||
dryRun bool | ||
} | ||
|
||
func main() { | ||
var debug bool | ||
var err error | ||
|
||
p := flakeCheckerParams{} | ||
flag.StringVar(&p.junitReportsDir, "junit-reports-dir", os.Getenv("ARTIFACT_DIR"), "Dir that contains jUnit reports XML files") | ||
flag.StringVar(&p.configFile, "config-file", "", "Config file with defined failure ratios") | ||
|
||
flag.StringVar(&p.jobName, "job-name", "", "Name of CI job.") | ||
flag.StringVar(&p.orchestrator, "orchestrator", "", "orchestrator name (such as GKE or OpenShift), if any.") | ||
|
||
flag.BoolVar(&p.dryRun, "dry-run", false, "When set to true issues will NOT be created.") | ||
flag.BoolVar(&debug, "debug", false, "Enable debug log level") | ||
versioninfo.AddFlag(flag.CommandLine) | ||
flag.Parse() | ||
|
||
if debug { | ||
log.SetLevel(log.DebugLevel) | ||
} | ||
|
||
err = p.run() | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
} | ||
|
||
type recentFlakyTestsRecord struct { | ||
JobName string | ||
FilteredName string | ||
Classname string | ||
TotalAll int | ||
FailRatio int | ||
} | ||
|
||
func (p *flakeCheckerParams) checkFailedTests(bqClient biqQueryClient, failedTests []testcase.TestCase, flakeCheckerRecs []*flakeCheckerRecord) error { | ||
for _, failedTest := range failedTests { | ||
found := false | ||
log.Infof("Checking failed test: %q / %q / %q", p.jobName, failedTest.Name, failedTest.Classname) | ||
for _, flakeCheckerRec := range flakeCheckerRecs { | ||
match, err := flakeCheckerRec.matchJobName(p.jobName) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if !match { | ||
continue | ||
} | ||
|
||
match, err = flakeCheckerRec.matchTestName(failedTest.Name) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if !match { | ||
continue | ||
} | ||
|
||
match, err = flakeCheckerRec.matchClassname(failedTest.Classname) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if !match { | ||
continue | ||
} | ||
|
||
found = true | ||
log.Infof("Match found: %q / %q / %q", flakeCheckerRec.config.MatchJobName, flakeCheckerRec.config.TestNameRegex, flakeCheckerRec.config.Classname) | ||
totalRuns, failRatio, err := bqClient.GetRatioForTest(flakeCheckerRec, failedTest.Name) | ||
if err != nil { | ||
return errors.Wrap(err, errDescGetRatio) | ||
} | ||
|
||
if totalRuns < totalRunsLimit { | ||
return errors.Wrap(fmt.Errorf("%d", totalRuns), errDescShortHistory) | ||
} | ||
|
||
if failRatio > flakeCheckerRec.config.RatioThreshold { | ||
return errors.Wrap(fmt.Errorf("(%d > %d)", failRatio, flakeCheckerRec.config.RatioThreshold), errDescAboveThreshold) | ||
} | ||
|
||
log.Infof("Ratio is below threshold: (%d <= %d)", failRatio, flakeCheckerRec.config.RatioThreshold) | ||
} | ||
|
||
if !found { | ||
return errors.Wrap(errors.New(failedTest.Name), errDescNoMatch) | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (p *flakeCheckerParams) run() error { | ||
testSuites, err := junit.IngestDir(p.junitReportsDir) | ||
if err != nil { | ||
log.Fatalf("could not read files: %s", err) | ||
} | ||
|
||
failedTests, err := testcase.GetFailedTests(testSuites) | ||
if err != nil { | ||
return errors.Wrap(err, "could not find failed tests") | ||
} | ||
|
||
if len(failedTests) == 0 { | ||
log.Info("No failed tests to process") | ||
return nil | ||
} | ||
|
||
log.Infof("Found %d failed tests", len(failedTests)) | ||
|
||
flakeConfigs, err := loadFlakeConfigFile(p.configFile) | ||
if err != nil { | ||
log.Fatalf("unable to load config file (%s): %s", p.configFile, err) | ||
} | ||
|
||
bqClient, err := getNewBigQueryClient() | ||
if err != nil { | ||
log.Fatalf("unable to create BigQuery client: %s", err) | ||
} | ||
|
||
if err = p.checkFailedTests(bqClient, failedTests, flakeConfigs); err != nil { | ||
log.Fatal(err) | ||
} | ||
|
||
log.Info("All failed tests are within allowed flake thresholds") | ||
|
||
return nil | ||
} |
Oops, something went wrong.