VaKeR CYBER ARMY
Logo of a company Server : Apache
System : Linux host44.registrar-servers.com 4.18.0-513.18.1.lve.2.el8.x86_64 #1 SMP Sat Mar 30 15:36:11 UTC 2024 x86_64
User : vapecompany ( 2719)
PHP Version : 7.4.33
Disable Function : NONE
Directory :  /lib64/nagios/plugins/nccustom/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Current File : //lib64/nagios/plugins/nccustom/check_smart_logs.sh
#!/bin/bash
#####################################################
# check smartctl output for errors in selftest logs #
#                                                   #
#           Created by Bogdan Kukharskiy            #
#                    Namecheap                      #
#####################################################
# This script is for checking if there are errors or failures in smart selftest logs, with the possibility to exclude drives from check (by serial number)
#
# Usage: "check_smart_logs.sh [-c path_to_smartd.conf] [-x exclude drives (can be as list separated by ',')] [-s include short test] [-v verbose output]"
#
# Returns the nagios native status codes:
# Nagios Status
# 0 = OK (All drives have completed smartctl selftest without errors)
# 1 = WARNING (one or more drives have no self-tests have been logged)
# 2 = CRITICAL (one or more drives have failed self-tests)
# 3 = UNKNOWN (Wrong usage)
#
# Now with NVME support
#

declare -a EXCLUDE_ARRAY
RXSAS="Background long" #default regexp pattern to grep tests (SAS disks)
RXATA="Extended offline" #default regexp pattern to grep tests (ATA disks)
OKSAS="Completed|test in progress" #default regexp pattern for passed selftest (SAS disks) or test in progress
OKATA="Completed without error|routine in progress" #default regexp pattern for passed selftest (ATA disks) or test in progress
NVME_HEALTH="PASSED" #default regexp pattern for passed selftest (NVME disks)
NVME_PERCENT=98 #default percent of NVME usage (wear-out), it can be more than 100, up to 255
verbose=0
LOGTYPE="selftest" #default type of the log, in some cases it is needed to use 'xselftest'

## USAGE MESSAGE
usage() {
cat << EOF
usage: $0 options

Now with NVME support

This script is for checking if there are errors or failures in smart selftest logs, with the possibility to exclude drives from check (by serial number)
OPTIONS:
   -h Show this message
   -x exclude drives by serial number (can be as list separated by ',')
   -c path_to_smartd.conf, default '/etc/smartmontools/smartd.conf' and '/etc/smartd.conf'
   -s include short tests
   -p percent of NVME usage (wear-out), it can be more than 100, up to 255 (default 98)
   -l quantity ERR LOG ENTRIES
   -v be more verbose

EOF
}

#Function for checking an existance in an array ($1) of an element ($2)
#'n' means NOT found in array, 'y' is for found)
function contains() {
    local n=$#
    local value=${!n}
    for ((i=1;i < $#;i++)) {
        if [ "${!i}" == "${value}" ]; then
            echo "y"
            return 0
        fi
    }
    echo "n"
    return 1
}

## FETCH ARGUMENTS
while getopts "hvsx:c:p:l:" OPTION; do
        case "${OPTION}" in
                h)
                        usage
                        exit 3
                        ;;
                x)
                        IFS=, read -r -a EXCLUDE_ARRAY <<<"${OPTARG}" unset IFS
                        ;;
                c)
                        SMARTDPATH=${OPTARG}
                        ;;
                p)
                        NVME_PERCENT=${OPTARG}
                        ;;
                l)
                        ERR_LOG_ENTRIES_param=${OPTARG}
                        ;;
                s)
                        RXSAS="${RXSAS}\|Background short"
                        RXATA="${RXATA}\|Short offline"
                        ;;
                v)
                        verbose=1
                        ;;

                \?)     echo "No reasonable options found!"
                        exit 3
                        ;;
        esac
done

## CHECK ARGUMENTS
if [[ -z ${SMARTDPATH} ]]; then
        if [ -f /etc/smartmontools/smartd.conf ]; then
                SMARTDPATH="/etc/smartmontools/smartd.conf"
        else
                SMARTDPATH="/etc/smartd.conf"
        fi
fi

if [[ ! -e ${SMARTDPATH} ]]; then
        echo "Error! File '${SMARTDPATH}' does not exists"
        exit 3
fi

if [[ ! -f ${SMARTDPATH} ]]; then
        echo "Error! '${SMARTDPATH}' is not a file"
        exit 3
fi

if [[ ! -r ${SMARTDPATH} ]]; then
        echo "Error! File '${SMARTDPATH}' is not readable"
        exit 3
fi

if [[ ${NVME_PERCENT} -lt 1 ]] || [[ ${NVME_PERCENT} -gt 255 ]]; then
        echo "Error! Treshold for percent of NVME usage (wear-out) should be > 0 and < 255; ${NVME_PERCENT}"
        exit 3
fi

## MAIN ROUTINE

#echo "-= DEBUG start =-"
#while read FLINE; do 
#        DRIVE=$(echo ${FLINE} | cut -d' ' -f1,2,3)
#        if [[ ${DRIVE} == *"nvme"* ]]; then
#               DRIVE=$(echo ${DRIVE} | cut -d' ' -f2)
#               echo "NVME drive found:${DRIVE}:"
#               echo -n "FOR ${DRIVE} : "
#               SERIAL=$(sudo smartctl -i ${DRIVE} | grep -i "Serial number" | awk '{print$3}')
#               echo ${SERIAL}
#       fi
#done < ${SMARTDPATH}
#echo "EA: ${EXCLUDE_ARRAY[*]}"
#echo "RXATA: ${RXATA}"
#echo "RXSAS: ${RXSAS}"
#echo "-= DEBUG end =-"

declare -a WARNING_ARRAY
declare -a CRITICAL_ARRAY

while read -r FLINE; do 
        DRIVELINE=$(echo "${FLINE}" | cut -d' ' -f1,2,3)
        if [[ ${DRIVELINE} == *"nvme"* ]]; then
                DRIVELINE=$(echo "${DRIVELINE}" | cut -d' ' -f2)
        fi
        SERIAL=$(sudo smartctl -i ${DRIVELINE} | grep -i "Serial number" | awk '{print$3}')

        if [[ $(contains "${EXCLUDE_ARRAY[@]}" "${SERIAL}") == "n" ]]; then  #checking that drive serial number is NOT in excluded array
                if [[ ${DRIVELINE} == *"nvme"* ]]; then
                # NVME device found
                        PERCENT_USED=$(sudo smartctl -a ${DRIVELINE} | grep -i "Percentage Used" | awk '{print$3}' | cut -d"%" -f1)
                        OVERALL_HEALTH=$(sudo smartctl -H ${DRIVELINE} | grep "SMART overall-health self-assessment test result" | cut -d":" -f2 | tr -d " ")
                        ERR_LOG_ENTRIES=$(sudo smartctl -a ${DRIVELINE} | grep -i "Error Information Log Entries" | awk '{print$5}')
                        if [[ ${verbose} == 1 ]]; then
                                echo "FOR ${DRIVELINE} |Serial: ${SERIAL}| Health-check: ${OVERALL_HEALTH}| Wear-out,%: ${PERCENT_USED}| ErrLogEntries: ${ERR_LOG_ENTRIES}"
#                                if [[ ${ERR_LOG_ENTRIES} -nq 0 ]]; then
#                                echo "${ERR_LOG}"
                        fi
                        if [[ ${ERR_LOG_ENTRIES} -gt ${ERR_LOG_ENTRIES_param} ]] || ! [[ ${OVERALL_HEALTH} =~ ${NVME_HEALTH} ]]; then
                                CRITICAL_ARRAY+=("${SERIAL}")  #adding serial to critical array
                        elif [[ ${PERCENT_USED} -gt ${NVME_PERCENT} ]]; then
                                WARNING_ARRAY+=("${SERIAL}")  #adding serial to warning array
                        fi
                # Non-NVME devices
                else
                        #let's check if the device supports simple selftest log
                        if sudo smartctl -l selftest ${DRIVELINE} | grep -q -i "SMART Self-test Log not supported"; then
                            LOGTYPE="xselftest";
                        else
                            LOGTYPE="selftest";
                        fi
                        RESULTSTRATA=$(sudo smartctl -l ${LOGTYPE} ${DRIVELINE} | grep -e "${RXATA}" |head -n 1)
                        RESULTSTRSAS=$(sudo smartctl -l ${LOGTYPE} ${DRIVELINE} | grep -e "${RXSAS}" |head -n 1)
                        if [[ ${verbose} == 1 ]]; then
                                echo "FOR ${DRIVELINE} |${SERIAL}| : ${RESULTSTRATA} ${RESULTSTRSAS}"
                        fi
                        if [[ -z ${RESULTSTRATA} ]] && [[ -z ${RESULTSTRSAS} ]] ; then  #no acceptable results were found
#                       echo "WARNING! For drive with serial ${SERIAL} no self-tests have been logged"
                                WARNING_ARRAY+=("${SERIAL}")  #adding serial to warning array
                        elif ! [[ ${RESULTSTRATA} =~ ${OKATA} ]] && ! [[ ${RESULTSTRSAS} =~ ${OKSAS} ]]; then
                                CRITICAL_ARRAY+=("${SERIAL}")  #adding serial to critical array
                        fi
                fi
        fi
done < ${SMARTDPATH}

if ! [[ ${#CRITICAL_ARRAY[@]} -eq 0 ]]; then
        echo "CRITICAL! Following drives have failed SMART selftest: ${CRITICAL_ARRAY[*]}"
        exit 2
elif ! [[ ${#WARNING_ARRAY[@]} -eq 0 ]]; then
        echo "WARNING! Following drives have no SMART selftest logged or NVME devices close to end of usage: ${WARNING_ARRAY[*]}"
        exit 1
else
        echo "OK! All drives passed SMART selftest succesfully"
        if ! [[ ${#EXCLUDE_ARRAY[@]} -eq 0 ]]; then
                echo "Drives excluded: ${EXCLUDE_ARRAY[*]}"
        fi
        exit 0
fi

VaKeR 2022