Sunday, September 11, 2011

How to check LVM health in AIX

What to check for LVM

We can check the state, the utilisation of the PV and VG, whether is the state open, closed, stale, syncd or whether the utilisation is max out already.

As LVM consist of many components like, PP, PV, LV, VG and so on, the best way is to script the things to check. For details of what is the acronyms, please refer to AIX LVM concept and Disk structure.

My newbie demo script for checking LVM health. Gurus out there can really help me along if you think i can do better with some suggestions. :D

#!/bin/ksh
# FILENAME   : checkLVM.ksh
# AUTHOR     : Victor Kwan
# EMAIL     : victorkk [AT] gmail [DOT] com
# PURPOSE    : To check the health of PV, VG and LV
#            : and alert sys admin if threshold is breached.
# DATE       : Feb 2011
#

#
# Parameter setup
OUTPUTFILE="checkLVM.PV.`hostname`.`date '+%d%b%Y'`.output"
NOTIFICATION_MSG="checkLVM.PV.`hostname`.`date '+%d%b%Y'`.message"
isFOUND=0
isERROR=0

if [ $# -ne 2 ]
then
        printf "Usage: \n\t$0  \n\n"
        exit
fi

PV_THRESHOLD=$1
EMAIL="$2"

#
# Extract PV Information from ODM
lspv | while read PV; do
        printf "\n$PV\n" >> $OUTPUTFILE
        printf "--------------------------\n" >> $OUTPUTFILE
        lspv $PV >> $OUTPUTFILE
done

#
# Check for PV Errors
grep -n "PV STATE" $OUTPUTFILE > $OUTPUTFILE.PV

printf "\n\n\n------------------------------\n" > $NOTIFICATION_MSG
printf " Check for PV errors\n" >> $NOTIFICATION_MSG
printf "------------------------------\n" >> $NOTIFICATION_MSG

cat $OUTPUTFILE.PV | while read PVLINE
do
        isLOGICAL_CHECK=`echo $PVLINE | grep "PV STATE" | awk -F: '{print $3}' | grep -v "active" | wc -l`
        #printf "[DEBUG]isLOGICAL_CHECK is %d.\n" $isLOGICAL_CHECK

        if [ $isLOGICAL_CHECK == 1 ]
        then
                PV_STATUS=`echo $PVLINE | grep "PV STATE" | awk -F: '{print $3}' | grep -v "active"`
                PV_LINE=`echo $PVLINE | grep "PV STATE" | awk -F: '{print $1}'`
                PV=`head -$PV_LINE $OUTPUTFILE | tail -3 | head -1 | awk '{print $3}'`
                VG=`head -$PV_LINE $OUTPUTFILE | tail -3 | head -1 | awk '{print $6}'`
                PV_LINE_TOTALPP=`echo $PV_LINE + 3 | bc`
                PV_SIZE=`head -$PV_LINE_TOTALPP $OUTPUTFILE | tail -1 | awk '{print $4}' | awk -F\( '{print $2}'`
                PV_LINE_USEDPP=`echo $PV_LINE + 5 | bc`
                PV_USED=`head -$PV_LINE_USEDPP $OUTPUTFILE | tail -1 | awk '{print $4}' | awk -F\( '{print $2}'`

                #printf "[DEBUG]The line is $PVLINE\n"
                printf "Volume Group: %s\n" $VG >> $NOTIFICATION_MSG
                printf "Physical Volume: %s\n" $PV >> $NOTIFICATION_MSG
                printf "Status     | Size (Mb) | Used (Mb)\n" >> $NOTIFICATION_MSG
                printf "%-10s | %-9d | %-8d\n\n" $PV_STATUS $PV_SIZE $PV_USED >> $NOTIFICATION_MSG
                isFOUND=1
                isERROR=1
        fi
done

if [ $isERROR == 0 ]
then
        printf "All Physical Volumes are clean.\n" >> $NOTIFICATION_MSG
fi

# Check for PV full
grep -n "PV STATE" $OUTPUTFILE > $OUTPUTFILE.PV
isERROR=0

printf "\n\n\n------------------------------\n" >> $NOTIFICATION_MSG
printf " Check for PV utilisation\n" >> $NOTIFICATION_MSG
printf " PV Threshold: $PV_THRESHOLD \n" >> $NOTIFICATION_MSG
printf "------------------------------\n" >> $NOTIFICATION_MSG

cat $OUTPUTFILE.PV | while read PVLINE
do
        isLOGICAL_CHECK=`echo $PVLINE | grep "PV STATE" | awk -F: '{print $3}' | wc -l`
        #printf "[DEBUG]isLOGICAL_CHECK is %d.\n" $isLOGICAL_CHECK

        if [ $isLOGICAL_CHECK == 1 ]
        then

                PV_STATUS=`echo $PVLINE | grep "PV STATE" | awk -F: '{print $3}'`
                PV_LINE=`echo $PVLINE | grep "PV STATE" | awk -F: '{print $1}'`
                PV=`head -$PV_LINE $OUTPUTFILE | tail -3 | head -1 | awk '{print $3}'`
                VG=`head -$PV_LINE $OUTPUTFILE | tail -3 | head -1 | awk '{print $6}'`
                PV_LINE_TOTALPP=`echo $PV_LINE + 3 | bc`
                PV_SIZE=`head -$PV_LINE_TOTALPP $OUTPUTFILE | tail -1 | awk '{print $4}' | awk -F\( '{print $2}'`
                PV_LINE_USEDPP=`echo $PV_LINE + 5 | bc`
                PV_USED=`head -$PV_LINE_USEDPP $OUTPUTFILE | tail -1 | awk '{print $4}' | awk -F\( '{print $2}'`
                PV_PERCENTAGE=$(echo "scale=8; $PV_USED / $PV_SIZE * 100" | bc)

                if [ $PV_PERCENTAGE -ge $PV_THRESHOLD ]
                then
                        #printf "[DEBUG]The line is $PVLINE\n" >> $NOTIFICATION_MSG
                        printf "Volume Group: %s\n" $VG >> $NOTIFICATION_MSG
                        printf "Physical Volume: %s\n" $PV >> $NOTIFICATION_MSG
                        printf "Status     | Size (Mb) | Used (%%)\n" >> $NOTIFICATION_MSG
                        printf "%-10s | %-9d | %-5.2f\n\n" $PV_STATUS $PV_SIZE $PV_PERCENTAGE >> $NOTIFICATION_MSG
                        isFOUND=1
                        isERROR=1
                fi
        fi
done

if [ $isERROR == 0 ]
then
        printf "All Physical Volume within threshold.\n" >> $NOTIFICATION_MSG
fi

rm $OUTPUTFILE
rm $OUTPUTFILE.PV

# Extract VG Information from ODM
lsvg | while read VG
do
        print "\nListing $VG:\n" >> $OUTPUTFILE
        lsvg $VG >> $OUTPUTFILE
        lsvg -l $VG >> $OUTPUTFILE
        #lsvg -l $VG | egrep -v "^$VG:" | egrep -v "^LV NAME" | while read LV JUNK
        #do
        #       lslv $LV >> $OUTPUTFILE
        #done
done

# Check for VG errors in ODM
grep -n "VG STATE" $OUTPUTFILE > $OUTPUTFILE.VG
isERROR=0

printf "\n\n\n------------------------------\n" >> $NOTIFICATION_MSG
printf " Check for VG errors\n" >> $NOTIFICATION_MSG
printf "------------------------------\n" >> $NOTIFICATION_MSG

cat $OUTPUTFILE.VG | while read VGLINE
do
        isVG_CHECK=`echo $VGLINE | grep "VG STATE" | awk -F: '{print $3}' | wc -l`
        #printf "[DEBUG]isVG_CHECK is %d.\n" $isVG_CHECK

        if [ $isVG_CHECK == 1 ]
        then
                VG_STATUS=`echo $VGLINE | grep "VG STATE" | awk '{print $3}'`
                VG_LINE=`echo $VGLINE | grep "VG STATE" | awk -F: '{print $1}'`
                VG=`head -$VG_LINE $OUTPUTFILE | tail -2 | head -1 | awk '{print $3}'`
                VG_LINE_TOTALPP=`echo $VG_LINE + 1 | bc`
                VG_TOTALPP=`head -$VG_LINE_TOTALPP $OUTPUTFILE | tail -1 | awk '{print $7}' | awk -F\( '{print $2}'`
                VG_LINE_USEDPP=`echo $VG_LINE + 3 | bc`
                VG_USEDPP=`head -$VG_LINE_USEDPP $OUTPUTFILE | tail -1 | awk '{print $6}' | awk -F\( '{print $2}'`
                VG_LINE_TOTALPV=`echo $VG_LINE + 5 | bc`
                VG_TOTALPV=`head -$VG_LINE_TOTALPV $OUTPUTFILE | tail -1 | awk '{print $3}'`
                VG_LINE_STALEPV=`echo $VG_LINE + 6 | bc`
                VG_STALEPV=`head -$VG_LINE_STALEPV $OUTPUTFILE | tail -1 | awk '{print $3}'`
                VG_STALEPP=`head -$VG_LINE_STALEPV $OUTPUTFILE | tail -1 | awk '{print $6}'`
                VG_LINE_ACTIVEPV=`echo $VG_LINE + 7 | bc`
                VG_ACTIVEPV=`head -$VG_LINE_ACTIVEPV $OUTPUTFILE | tail -1 | awk '{print $3}'`

                PV_LINE=`lsvg -p $VG | wc -l`
                PV_NUMOFMEMBERS=`echo $PV_LINE - 2 | bc`
                PV_NAME="`lsvg -p $VG | tail -$PV_NUMOFMEMBERS | awk '{print $1}' | xargs`"
                PV_STALENAME=`lsvg -p $VG | tail -$PV_NUMOFMEMBERS | grep -v active | awk '{print $1}' | xargs`

                if [ -z "$PV_NAME" ]
                then
                        $PV_NAME="NA"
                fi

                LV_LINE=`lsvg -l $VG | wc -l`
                LV_NUMOFMEMBERS=`echo $LV_LINE - 2 | bc`
                LV_NUMOFPROBLEM=`lsvg -l $VG | tail -$LV_NUMOFMEMBERS | grep -v "open/syncd" |  wc -l`
                LV_NUMOFOPEN=`echo $LV_NUMOFMEMBERS - $LV_NUMOFPROBLEM | bc`
                LV_PROBLEMNAME=`lsvg -l $VG | tail -$LV_NUMOFMEMBERS | grep -v "open/syncd" | awk '{print $1}' | xargs`
                LV_NAME="`lsvg -l $VG | tail -$LV_NUMOFMEMBERS | awk '{print $1}' | xargs`"

                if [ -z "$LV_NAME" ]
                then
                        $LV_NAME="NA"
                fi

                #printf "[DEBUG]The line is $VGLINE\n"

                if [ $VG_STALEPP -ge 1 -o $VG_STALEPV -ge 1 -o $LV_NUMOFPROBLEM -ge 1 ]
                then
                        printf "Volume Group: %s\nVolume Group Status: %s\n\n" $VG $VG_STATUS >> $NOTIFICATION_MSG

                        printf "Total PP Size (Mb) | Used PP Size (Mb) | Stale PP\n" >> $NOTIFICATION_MSG
                        printf "%-18d | %-17d | %-5d \n\n" $VG_TOTALPP $VG_USEDPP $VG_STALEPP >> $NOTIFICATION_MSG

                        printf "Total PV | Active PV | All PV members\n" >> $NOTIFICATION_MSG
                        printf "%-8d | %-9d | %-s\n\n" $VG_TOTALPV $VG_ACTIVEPV "$PV_NAME" >> $NOTIFICATION_MSG

                        printf "Total LV | Open LV | All LV members\n" >> $NOTIFICATION_MSG
                        printf "%-8d | %-7d | %-s\n\n" $LV_NUMOFMEMBERS $LV_NUMOFOPEN "$LV_NAME" >> $NOTIFICATION_MSG

                        if [ $VG_STALEPV -ge 1 ]
                        then
                                printf "Status of PV with problems:\n" >> $NOTIFICATION_MSG
                                for i in $PV_STALENAME
                                do
                                        THIS_PV=`lspv $i | grep "PV STATE" | awk '{print $1,$2,$3}'`
                                        printf "$i ($THIS_PV) \n" >> $NOTIFICATION_MSG
                                done
                        fi
                        printf "\n" >> $NOTIFICATION_MSG

                        if [ $LV_NUMOFPROBLEM -ge 1 ]
                        then
                                LV_FLAG=0
                                for i in $LV_PROBLEMNAME
                                do
                                        THIS_LV=`lslv $i | grep "LV STATE" | awk '{print $4,$5,$6}'`
                                        THIS_STATE=`lslv $i | grep "LV STATE" | awk '{print $6}'`
                                        THIS_BOOT=`lslv $i | grep "TYPE" | awk '{print $2}' | grep "boot" | wc -l`
                                        THIS_DUMP=`lslv $i | grep "TYPE" | awk '{print $2}' | grep "sysdump" | wc -l`
                                        if [ "$THIS_STATE" != "closed/syncd" ]
                                        then
                                                if [ $LV_FLAG = 0 ]
                                                then
                                                        printf "Status of LV with problems:\n" >> $NOTIFICATION_MSG
                                                        LV_FLAG=1
                                                fi
                                                printf "$i ($THIS_LV) \n" >> $NOTIFICATION_MSG
                                        elif [ "$THIS_STATE" != "open/syncd" -a $THIS_DUMP = "1" ]
                                        then
                                                if [ $LV_FLAG = 0 ]
                                                then
                                                        printf "Status of LV with problems:\n" >> $NOTIFICATION_MSG
                                                        LV_FLAG=1
                                                fi
                                                printf "$i ($THIS_LV) \n" >> $NOTIFICATION_MSG
                                        elif [ "$THIS_STATE" != "closed/syncd" -a $THIS_BOOT = "1" ]
                                        then
                                                if [ $LV_FLAG = 0 ]
                                                then
                                                        printf "Status of LV with problems:\n" >> $NOTIFICATION_MSG
                                                        LV_FLAG=1
                                                fi
                                                printf "$i ($THIS_LV) \n" >> $NOTIFICATION_MSG
                                        fi
                                done
                        fi
                        printf "\n\n" >> $NOTIFICATION_MSG
                        isFOUND=1
                        isERROR=1
                fi
        fi
done

if [ $isERROR == 0 ]
then
        printf "All Volume Groups are clean.\n" >> $NOTIFICATION_MSG
fi

if [ $isFOUND == 1 ]
then
        cat $NOTIFICATION_MSG | mailx -s "[`hostname`] LVM Errors" $EMAIL
fi

rm $OUTPUTFILE
rm $OUTPUTFILE.VG
rm $NOTIFICATION_MSG

No comments: