Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/worker-fail-fast'
Browse files Browse the repository at this point in the history
* origin/worker-fail-fast:
  Update CHANGELOG for #38
  Update README.md
  tests: Add fail-fast tests
  tests: Enhance failing_suite
  reporter: Add report for builds that failed fast
  queue: Enable fast failure
  • Loading branch information
Kostas Pelelis committed Oct 5, 2020
2 parents efebb24 + 7a552dd commit 36bd4ba
Show file tree
Hide file tree
Showing 13 changed files with 117 additions and 18 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ Breaking changes are prefixed with a "[BREAKING]" label.

## master (unreleased)

### Added

- Builds can be configured to terminate after a specified number of failures,
using the `--fail-fast` option.


## 0.3.0 (2020-10-05)

### Added
Expand Down
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ OPTIONS:
Exits with a non-zero status code if there were any failures.
--report-timeout N Fail if build is not finished after N seconds. Only applicable if --report is enabled (default: 3600).
--max-requeues N Retry failed examples up to N times before considering them legit failures (default: 3).
--fail-fast N Abort build with a non-zero status code after N failed examples.
-h, --help Show this message.
-v, --version Print the version and exit.
```
Expand Down Expand Up @@ -133,6 +134,16 @@ final report.
Flaky tests are also detected and printed as such in the final report. They are
also emitted to Sentry (see [Sentry integration](#sentry-integration)).

### Fail-fast

In order to prevent large suites running for a long time with a lot of
failures, a threshold can be set to control the number of failed examples that
will render the build unsuccessful. This is in par with RSpec's
[--fail-fast](https://relishapp.com/rspec/rspec-core/docs/command-line/fail-fast-option).

This feature is disabled by default, and can be controlled via the
`--fail-fast` command line option.

### Worker failures

It's not uncommon for CI processes to encounter unrecoverable failures for
Expand Down
8 changes: 8 additions & 0 deletions bin/rspecq
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ require "rspecq"
DEFAULT_REDIS_HOST = "127.0.0.1"
DEFAULT_REPORT_TIMEOUT = 3600 # 1 hour
DEFAULT_MAX_REQUEUES = 3
DEFAULT_FAIL_FAST = 0

def env_set?(var)
["1", "true"].include?(ENV[var])
Expand Down Expand Up @@ -83,6 +84,11 @@ OptionParser.new do |o|
opts[:max_requeues] = v
end

o.on("--fail-fast N", Integer, "Abort build with a non-zero status code " \
"after N failed examples." ) do |v|
opts[:fail_fast] = v
end

o.on_tail("-h", "--help", "Show this message.") do
puts o
exit
Expand All @@ -103,6 +109,7 @@ opts[:report] ||= env_set?("RSPECQ_REPORT")
opts[:report_timeout] ||= Integer(ENV["RSPECQ_REPORT_TIMEOUT"] || DEFAULT_REPORT_TIMEOUT)
opts[:max_requeues] ||= Integer(ENV["RSPECQ_MAX_REQUEUES"] || DEFAULT_MAX_REQUEUES)
opts[:redis_url] ||= ENV["RSPECQ_REDIS_URL"]
opts[:fail_fast] ||= Integer(ENV["RSPECQ_FAIL_FAST"] || DEFAULT_FAIL_FAST)

raise OptionParser::MissingArgument.new(:build) if opts[:build].nil?
raise OptionParser::MissingArgument.new(:worker) if !opts[:report] && opts[:worker].nil?
Expand Down Expand Up @@ -134,5 +141,6 @@ else
worker.populate_timings = opts[:timings]
worker.file_split_threshold = opts[:file_split_threshold]
worker.max_requeues = opts[:max_requeues]
worker.fail_fast = opts[:fail_fast]
worker.work
end
34 changes: 32 additions & 2 deletions lib/rspecq/queue.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ def initialize(build_id, worker_id, redis_opts)
end

# NOTE: jobs will be processed from head to tail (lpop)
def publish(jobs)
def publish(jobs, fail_fast = 0)
@redis.multi do
@redis.hset(key_queue_config, 'fail_fast', fail_fast)
@redis.rpush(key_queue_unprocessed, jobs)
@redis.set(key_queue_status, STATUS_READY)
end.first
Expand Down Expand Up @@ -232,7 +233,9 @@ def unprocessed_jobs
# after being retried). Must be called after the build is complete,
# otherwise an exception will be raised.
def flaky_jobs
raise "Queue is not yet exhausted" if !exhausted?
if !exhausted? && !build_failed_fast?
raise "Queue is not yet exhausted"
end

requeued = @redis.hkeys(key_requeues)

Expand All @@ -241,11 +244,38 @@ def flaky_jobs
requeued - @redis.hkeys(key_failures)
end

# Returns the number of failures that will trigger the build to fail-fast.
# Returns 0 if this feature is disabled and nil if the Queue is not yet
# published
def fail_fast
return nil unless published?

@fail_fast ||= Integer(@redis.hget(key_queue_config, 'fail_fast'))
end

# Returns true if the number of failed tests, has surpassed the threshold
# to render the run unsuccessful and the build should be terminated.
def build_failed_fast?
if fail_fast.nil? || fail_fast.zero?
return false
end

@redis.multi do
@redis.hlen(key_failures)
@redis.hlen(key_errors)
end.inject(:+) >= fail_fast
end

# redis: STRING [STATUS_INITIALIZING, STATUS_READY]
def key_queue_status
key("queue", "status")
end

# redis: HASH<config_key => config_value>
def key_queue_config
key("queue", "config")
end

# redis: LIST<job>
def key_queue_unprocessed
key("queue", "unprocessed")
Expand Down
9 changes: 8 additions & 1 deletion lib/rspecq/reporter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def report
puts failure_formatted(rspec_output)
end

if !@queue.exhausted?
unless @queue.exhausted? || @queue.build_failed_fast?
sleep 1
next
end
Expand Down Expand Up @@ -83,6 +83,13 @@ def summary(failures, errors, flaky_jobs, duration)
end

summary = ""
if @queue.build_failed_fast?
summary << "\n\n"
summary << "The limit of #{@queue.fail_fast} failures has been reached\n"
summary << "Aborting..."
summary << "\n"
end

summary << failed_examples_section if !failures.empty?

errors.each { |_job, msg| summary << msg }
Expand Down
13 changes: 11 additions & 2 deletions lib/rspecq/worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,19 @@ class Worker
# Defaults to 3
attr_accessor :max_requeues

# Stop the execution after N failed tests. Do not stop at any point when
# set to 0.
#
# Defaults to 0
attr_accessor :fail_fast

attr_reader :queue

def initialize(build_id:, worker_id:, redis_opts:)
@build_id = build_id
@worker_id = worker_id
@queue = Queue.new(build_id, worker_id, redis_opts)
@fail_fast = 0
@files_or_dirs_to_run = "spec"
@populate_timings = false
@file_split_threshold = 999999
Expand All @@ -69,6 +76,8 @@ def work
# to `requeue_lost_job` inside the work loop
update_heartbeat

return if queue.build_failed_fast?

lost = queue.requeue_lost_job
puts "Requeued lost job: #{lost}" if lost

Expand Down Expand Up @@ -121,7 +130,7 @@ def try_publish_queue!(queue)

timings = queue.timings
if timings.empty?
q_size = queue.publish(files_to_run.shuffle)
q_size = queue.publish(files_to_run.shuffle, fail_fast)
log_event(
"No timings found! Published queue in random order (size=#{q_size})",
"warning"
Expand Down Expand Up @@ -160,7 +169,7 @@ def try_publish_queue!(queue)
# sort jobs based on their timings (slowest to be processed first)
jobs = jobs.sort_by { |_j, t| -t }.map(&:first)

puts "Published queue (size=#{queue.publish(jobs)})"
puts "Published queue (size=#{queue.publish(jobs, fail_fast)})"
end

private
Expand Down
4 changes: 0 additions & 4 deletions test/sample_suites/failing_suite/spec/bar_spec.rb

This file was deleted.

4 changes: 4 additions & 0 deletions test/sample_suites/failing_suite/spec/fail_1_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
RSpec.describe do
it { expect(false).to eq false }
it { expect(1).to eq 2 }
end
4 changes: 4 additions & 0 deletions test/sample_suites/failing_suite/spec/fail_2_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
RSpec.describe do
it { expect(false).to eq false }
it { expect(1).to eq 2 }
end
3 changes: 0 additions & 3 deletions test/sample_suites/failing_suite/spec/foo_spec.rb

This file was deleted.

3 changes: 3 additions & 0 deletions test/sample_suites/failing_suite/spec/success_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
RSpec.describe do
it { expect(true).to eq true }
end
34 changes: 29 additions & 5 deletions test/test_e2e.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,24 @@ def test_suite_with_legit_failures
queue = exec_build("failing_suite")

refute queue.build_successful?
assert queue.fail_fast.zero?
refute queue.build_failed_fast?

assert_empty queue.unprocessed_jobs
assert_processed_jobs [
"./spec/foo_spec.rb",
"./spec/bar_spec.rb",
"./spec/bar_spec.rb[1:2]",
"./spec/fail_1_spec.rb",
"./spec/fail_1_spec.rb[1:2]",
"./spec/fail_2_spec.rb",
"./spec/fail_2_spec.rb[1:2]",
"./spec/success_spec.rb",
], queue

assert_equal 3 + 3, queue.example_count
assert_equal 3 + 3 + 5, queue.example_count

assert_equal({ "./spec/bar_spec.rb[1:2]" => "3" }, queue.requeued_jobs)
assert_equal({
"./spec/fail_1_spec.rb[1:2]" => "3",
"./spec/fail_2_spec.rb[1:2]" => "3",
}, queue.requeued_jobs)
end

def test_passing_suite
Expand Down Expand Up @@ -104,4 +112,20 @@ def test_spec_file_splitting
"./spec/fast_spec.rb",
], queue)
end

def test_suite_with_failures_and_fail_fast
queue = exec_build("failing_suite", "--fail-fast 1")

assert_equal 1, queue.fail_fast
assert queue.build_failed_fast?
refute queue.build_successful?
assert_equal queue.fail_fast, queue.example_failures.length +
queue.non_example_errors.length

# 1 <= unprocessed_jobs <= 2
# Either Success, Fail (after N requeues), or Fail (after N requeues)
assert_includes [1, 2], queue.unprocessed_jobs.length

assert_includes [2, 3], queue.processed_jobs.length
end
end
2 changes: 1 addition & 1 deletion test/test_helpers/assertions.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def assert_queue_well_formed(queue, msg=nil)
queue.send(:key_worker_heartbeats), 0, -1, withscores: true)

assert queue.published?
assert queue.exhausted?
assert (queue.build_failed_fast? || queue.exhausted?)
assert_operator heartbeats.size, :>=, 0
assert heartbeats.all? { |hb| Time.at(hb.last) <= Time.now }
end
Expand Down

0 comments on commit 36bd4ba

Please sign in to comment.