From d94de7bf8847e3535d70180b8457cb5eea614dbe Mon Sep 17 00:00:00 2001 From: bjoerrrn <91031217+bjoerrrn@users.noreply.github.com> Date: Mon, 19 Aug 2024 10:59:31 +0200 Subject: [PATCH] Update storj-system-health.sh * fixed some issues with the latest release --- storj-system-health.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/storj-system-health.sh b/storj-system-health.sh index 6dfb5a8..ac57116 100644 --- a/storj-system-health.sh +++ b/storj-system-health.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# v1.11 +# v1.11.1 # # storj-system-health.sh - storagenode health checks and notifications to discord / by email # by dusselmann, https://github.com/dusselmann/storj-system-health.sh @@ -533,12 +533,12 @@ audit_difference=0 # select error messages in detail (partially extracted text log) [[ "$VERBOSE" == "true" ]] && INFO="$(echo "$LOG1H" 2>&1 | grep '[[:blank:]]*INFO')" -AUDS="$(echo "$LOG1H" 2>&1 | grep -E '[[:blank:]]*GET_AUDIT' | grep 'failed')" -AUDS=$(echo "$AUDS" 2>&1 | grep -v 'read: connection timed out') +AUDS="$(echo "$LOG1H" 2>&1 | grep -E 'GET_AUDIT' | grep 'failed')" +AUDS=$(echo "$AUDS" 2>&1 | grep -v -e 'connection timed out' -e 'connection reset by peer') FATS="$(echo "$LOG1H" 2>&1 | grep '[[:blank:]]*FATAL' | grep -v '[[:blank:]]*INFO')" ERRS="$(echo "$LOG1H" 2>&1 | grep '[[:blank:]]*ERROR' | grep -v -e '[[:blank:]]*INFO' -e '[[:blank:]]*FATAL' -e 'collector' -e 'piecestore' -e 'pieces error: filestore error: context canceled' -e 'piecedeleter' -e 'emptying trash failed' -e 'service ping satellite failed' -e 'timeout: no recent network activity' -e 'connection reset by peer' -e 'context canceled' -e 'tcp connector failed' -e 'node rate limited by id' -e 'manager closed: read tcp' -e 'connection timed out')" -DREPS="$(echo "$LOG1H" 2>&1 | grep -E '[[:blank:]]*GET_REPAIR' | grep 'failed')" -DREPS=$(echo "$DREPS" 2>&1 | grep -v 'connection timed out') +DREPS="$(echo "$LOG1H" 2>&1 | grep -E 'GET_REPAIR' | grep 'failed')" +DREPS=$(echo "$DREPS" 2>&1 | grep -v -e 'connection timed out' -e 'connection reset by peer') # added "severe" errors in order to recognize e.g. docker issues, connectivity issues etc. SEVERE="$(echo "$LOG1H" 2>&1 | grep -i -e 'error:' -e 'fatal:' -e 'unexpected shutdown' -e 'fatal error' -e 'transport endpoint is not connected' -e 'Unable to read the disk' -e 'software caused connection abort' | grep -v -e 'emptying trash failed' -e '[[:blank:]]*INFO' -e '[[:blank:]]*FATAL' -e 'collector' -e 'piecestore' -e 'pieces error: filestore error: context canceled' -e 'piecedeleter' -e 'emptying trash failed' -e 'service ping satellite failed' -e 'timeout: no recent network activity' -e 'failed to settle orders for satellite' -e 'rpc client' -e 'manager closed: read tcp' -e 'connection timed out')" @@ -549,7 +549,7 @@ SEVERE="$(echo "$LOG1H" 2>&1 | grep -i -e 'error:' -e 'fatal:' -e 'unexpected sh # count errors [[ "$VERBOSE" == "true" ]] && tmp_info="$(echo "$INFO" 2>&1 | grep '[[:blank:]]*INFO' -c)" tmp_fatal_errors="$(echo "$FATS" 2>&1 | grep '[[:blank:]]*FATAL' -c)" -tmp_audits_failed="$(echo "$AUDS" 2>&1 | grep -E '[[:blank:]]*GET_AUDIT' | grep 'failed' -c)" +tmp_audits_failed="$(echo "$AUDS" 2>&1 | grep -E 'GET_AUDIT' | grep 'failed' -c)" tmp_reps_failed="$(echo "$DREPS" 2>&1 | grep 'failed' -c)" tmp_rest_of_errors="$(echo "$ERRS" 2>&1 | grep '[[:blank:]]*ERROR' -c)" tmp_io_errors="$(echo "$ERRS" 2>&1 | grep '[[:blank:]]*ERROR' | grep -e 'timeout' -e 'connection reset' -e 'tcp connector failed' -e 'node rate limited by id' -c)"