Skip to content

Commit

Permalink
feat: add failed resque jobs data for zabbix
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhidkovDenis committed Feb 15, 2017
1 parent 9518bfc commit 64b383d
Show file tree
Hide file tree
Showing 10 changed files with 239 additions and 10 deletions.
4 changes: 4 additions & 0 deletions app/controllers/resque/queues/status_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,14 @@ def show
age(params['queue'])
when 'size'
size(params['queue'])
when 'failures_count'
Resque.queues_info.failures_count_for_queue(params['queue'])
when 'threshold_size'
Resque.queues_info.threshold_size(params.fetch('queue'))
when 'threshold_age'
Resque.queues_info.threshold_age(params.fetch('queue'))
when /^threshold_failures_per_(?<period>\w+)$/
Resque.queues_info.threshold_failures_count(params.fetch('queue'), $LAST_MATCH_INFO['period'])
else
0
end.to_s
Expand Down
1 change: 1 addition & 0 deletions lib/resque/integration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ module Integration
autoload :LogsRotator, 'resque/integration/logs_rotator'
autoload :QueuesInfo, 'resque/integration/queues_info'
autoload :Extensions, 'resque/integration/extensions'
autoload :FailureBackends, 'resque/integration/failure_backends'

extend ActiveSupport::Concern

Expand Down
5 changes: 4 additions & 1 deletion lib/resque/integration/engine.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ class Engine < Rails::Engine
require 'resque/failure'
require 'resque/failure/redis'

Resque::Failure::MultipleWithRetrySuppression.classes = [Resque::Failure::Redis]
Resque::Failure::MultipleWithRetrySuppression.classes = [
Resque::Failure::Redis,
Resque::Integration::FailureBackends::QueuesTotals
]

if Resque.config.failure_notifier.enabled?
require 'resque_failed_job_mailer'
Expand Down
7 changes: 7 additions & 0 deletions lib/resque/integration/failure_backends.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
module Resque
module Integration
module FailureBackends
autoload :QueuesTotals, 'resque/integration/failure_backends/queues_totals'
end
end
end
37 changes: 37 additions & 0 deletions lib/resque/integration/failure_backends/queues_totals.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
module Resque
module Integration
module FailureBackends
class QueuesTotals < ::Resque::Failure::Base
REDIS_COUNTER_KEY = 'resque:integration:failure_backends:queues_totals'.freeze
MAX_COUNTER_VALUE = 10_000_000

private_constant :REDIS_COUNTER_KEY

def save
current_value = Resque.redis.hincrby(REDIS_COUNTER_KEY, queue, 1)
Resque.redis.hset(REDIS_COUNTER_KEY, queue, 1) if current_value >= MAX_COUNTER_VALUE
end

def self.queues
Resque.redis.hkeys(REDIS_COUNTER_KEY)
end

def self.count(queue = nil, _class_name = nil)
if queue.nil?
Resque.redis.hvals(REDIS_COUNTER_KEY).map(&:to_i).sum
else
Resque.redis.hget(REDIS_COUNTER_KEY, queue).to_i
end
end

def self.clear(queue = nil)
if queue.nil?
Resque.redis.del(REDIS_COUNTER_KEY)
else
Resque.redis.hdel(REDIS_COUNTER_KEY, queue)
end
end
end
end
end
end
8 changes: 8 additions & 0 deletions lib/resque/integration/queues_info.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ def size_overall
@size.overall
end

def failures_count_for_queue(queue)
Resque::Integration::FailureBackends::QueuesTotals.count(queue)
end

def threshold_size(queue)
@config.max_size(queue)
end
Expand All @@ -35,6 +39,10 @@ def threshold_age(queue)
@config.max_age(queue)
end

def threshold_failures_count(queue, period)
@config.max_failures_count(queue, period)
end

def data
@config.data
end
Expand Down
17 changes: 11 additions & 6 deletions lib/resque/integration/queues_info/config.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
require 'yaml'

module Resque
module Integration
class QueuesInfo
Expand All @@ -16,6 +18,10 @@ def max_size(queue)
threshold(queue, 'max_size')
end

def max_failures_count(queue, period)
threshold(queue, "max_failures_count_per_#{period}")
end

def data
@data ||= @queues.map do |k, v|
{
Expand All @@ -37,19 +43,18 @@ def load_config(path)
end

def expand_config(config)
keys = config.keys.dup

keys.each do |key|
v = config.delete(key)
expanded_config = {}

config.keys.each do |key|
key.split(',').each do |queue|
queue.chomp!
queue.strip!
config[queue] = v

(expanded_config[queue] ||= {}).merge!(config[key])
end
end

config
expanded_config
end
end
end
Expand Down
14 changes: 13 additions & 1 deletion spec/fixtures/resque_queues.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
defaults:
max_age: 10
max_size: 10
max_failures_count_per_5m: 5
max_failures_count_per_1h: 60

queues:
first:
?
>
first,
third
:
max_age: 20
max_size: 100
max_failures_count_per_5m: 15
max_failures_count_per_1h: 90

third:
max_age: 30
max_failures_count_per_1h: 70
105 changes: 105 additions & 0 deletions spec/resque/integration/failure_backends/queues_totals_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
require 'spec_helper'

describe Resque::Integration::FailureBackends::QueuesTotals do
let(:failure) { double('UnbelievableError') }
let(:worker) { double('Worker') }
let(:payload) { double('Payload') }

describe '#save' do
let(:queue) { 'images' }
let(:backend) { described_class.new(failure, worker, queue, payload) }

before { stub_const('Resque::Integration::FailureBackends::QueuesTotals::MAX_COUNTER_VALUE', 3) }

it 'increments failures count for specified queue' do
expect do
2.times { backend.save }
end.to change { described_class.count(queue) }.from(0).to(2)
end

context 'when counter overflows' do
it 'resets failures count for specified queue to 1' do
expect do
3.times { backend.save }
end.to change { described_class.count(queue) }.from(0).to(1)
end
end
end

describe '.count' do
let(:images_queue) { 'images' }
let(:products_queue) { 'products' }
let(:images_failure_backend) { described_class.new(failure, worker, images_queue, payload) }
let(:products_failure_backend) { described_class.new(failure, worker, products_queue, payload) }

before do
2.times { images_failure_backend.save }
3.times { products_failure_backend.save }
end

context 'with specified queue' do
it 'returns failures count for specified queue' do
expect(described_class.count(images_queue)).to eq(2)
expect(described_class.count(products_queue)).to eq(3)
end
end

context 'with queue which has no failures' do
it 'returns 0' do
expect(described_class.count('not_failed')).to eq(0)
end
end

context 'without specified queue' do
it 'returns aggregated failures count from all queues' do
expect(described_class.count).to eq(5)
end
end
end

describe '.queues' do
context 'when has failures data' do
let(:images_queue) { 'images' }
let(:products_queue) { 'products' }

before do
described_class.new(failure, worker, images_queue, payload).save
described_class.new(failure, worker, products_queue, payload).save
end

it 'returns names of failed queues' do
expect(described_class.queues).to match_array([images_queue, products_queue])
end
end

context 'when does not have failures data' do
it { expect(described_class.queues).to be_empty }
end
end

describe '.clear' do
let(:images_queue) { 'images' }
let(:products_queue) { 'products' }

before do
described_class.new(failure, worker, images_queue, payload).save
described_class.new(failure, worker, products_queue, payload).save
end

context 'with specified queue' do
it 'deletes counter data for specified queue' do
expect { described_class.clear(products_queue) }.to change { described_class.count }.from(2).to(1)
expect(described_class.count(images_queue)).to eq(1)
expect(described_class.count(products_queue)).to eq(0)
end
end

context 'without specified queue' do
it 'deletes counter data for all queues' do
expect { described_class.clear }.to change { described_class.count }.from(2).to(0)
expect(described_class.count(images_queue)).to eq(0)
expect(described_class.count(products_queue)).to eq(0)
end
end
end
end
51 changes: 49 additions & 2 deletions spec/resque/integration/queues_info_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@
end
end

describe '#age_threshold' do
describe '#threshold_age' do
context 'when queue defined in config' do
let(:queue_name) { 'first' }

Expand All @@ -236,7 +236,7 @@
end
end

describe '#size_threshold' do
describe '#threshold_size' do
context 'when queue defined in config' do
let(:queue_name) { 'first' }

Expand All @@ -253,4 +253,51 @@
end
end
end

describe '#threshold_failures_count' do
context 'when queue is defined in config' do
let(:queue_name) { 'first' }

it 'returns failures count threshold for specified queue and time period' do
expect(queue_info.threshold_failures_count(queue_name, '5m')).to eq 15
expect(queue_info.threshold_failures_count(queue_name, '1h')).to eq 90
end
end

context 'when queue is not defined in config' do
let(:queue_name) { 'second' }

it 'returns default failures count threshold for specified time period' do
expect(queue_info.threshold_failures_count(queue_name, '5m')).to eq 5
expect(queue_info.threshold_failures_count(queue_name, '1h')).to eq 60
end
end
end

describe '#failures_count_for_queue' do
before do
allow(Resque::Integration::FailureBackends::QueuesTotals).to receive(:count).with('first').and_return(14)
end

it 'returns total failures count for specified queue' do
expect(queue_info.failures_count_for_queue('first')).to eq 14
end
end

describe 'configuration merging' do
let(:first_queue_name) { 'first' }
let(:third_queue_name) { 'third' }

it 'merges configs for queue in order of appearance' do
expect(queue_info.threshold_age(first_queue_name)).to eq 20
expect(queue_info.threshold_size(first_queue_name)).to eq 100
expect(queue_info.threshold_failures_count(first_queue_name, '5m')).to eq 15
expect(queue_info.threshold_failures_count(first_queue_name, '1h')).to eq 90

expect(queue_info.threshold_age(third_queue_name)).to eq 30
expect(queue_info.threshold_size(third_queue_name)).to eq 100
expect(queue_info.threshold_failures_count(third_queue_name, '5m')).to eq 15
expect(queue_info.threshold_failures_count(third_queue_name, '1h')).to eq 70
end
end
end

0 comments on commit 64b383d

Please sign in to comment.