What to check for LVM
We can check the state, the utilisation of the PV and VG, whether is the state open, closed, stale, syncd or whether the utilisation is max out already.As LVM consist of many components like, PP, PV, LV, VG and so on, the best way is to script the things to check. For details of what is the acronyms, please refer to AIX LVM concept and Disk structure.
My newbie demo script for checking LVM health. Gurus out there can really help me along if you think i can do better with some suggestions. :D
#!/bin/ksh # FILENAME : checkLVM.ksh # AUTHOR : Victor Kwan # EMAIL : victorkk [AT] gmail [DOT] com # PURPOSE : To check the health of PV, VG and LV # : and alert sys admin if threshold is breached. # DATE : Feb 2011 # # # Parameter setup OUTPUTFILE="checkLVM.PV.`hostname`.`date '+%d%b%Y'`.output" NOTIFICATION_MSG="checkLVM.PV.`hostname`.`date '+%d%b%Y'`.message" isFOUND=0 isERROR=0 if [ $# -ne 2 ] then printf "Usage: \n\t$0\n\n" exit fi PV_THRESHOLD=$1 EMAIL="$2" # # Extract PV Information from ODM lspv | while read PV; do printf "\n$PV\n" >> $OUTPUTFILE printf "--------------------------\n" >> $OUTPUTFILE lspv $PV >> $OUTPUTFILE done # # Check for PV Errors grep -n "PV STATE" $OUTPUTFILE > $OUTPUTFILE.PV printf "\n\n\n------------------------------\n" > $NOTIFICATION_MSG printf " Check for PV errors\n" >> $NOTIFICATION_MSG printf "------------------------------\n" >> $NOTIFICATION_MSG cat $OUTPUTFILE.PV | while read PVLINE do isLOGICAL_CHECK=`echo $PVLINE | grep "PV STATE" | awk -F: '{print $3}' | grep -v "active" | wc -l` #printf "[DEBUG]isLOGICAL_CHECK is %d.\n" $isLOGICAL_CHECK if [ $isLOGICAL_CHECK == 1 ] then PV_STATUS=`echo $PVLINE | grep "PV STATE" | awk -F: '{print $3}' | grep -v "active"` PV_LINE=`echo $PVLINE | grep "PV STATE" | awk -F: '{print $1}'` PV=`head -$PV_LINE $OUTPUTFILE | tail -3 | head -1 | awk '{print $3}'` VG=`head -$PV_LINE $OUTPUTFILE | tail -3 | head -1 | awk '{print $6}'` PV_LINE_TOTALPP=`echo $PV_LINE + 3 | bc` PV_SIZE=`head -$PV_LINE_TOTALPP $OUTPUTFILE | tail -1 | awk '{print $4}' | awk -F\( '{print $2}'` PV_LINE_USEDPP=`echo $PV_LINE + 5 | bc` PV_USED=`head -$PV_LINE_USEDPP $OUTPUTFILE | tail -1 | awk '{print $4}' | awk -F\( '{print $2}'` #printf "[DEBUG]The line is $PVLINE\n" printf "Volume Group: %s\n" $VG >> $NOTIFICATION_MSG printf "Physical Volume: %s\n" $PV >> $NOTIFICATION_MSG printf "Status | Size (Mb) | Used (Mb)\n" >> $NOTIFICATION_MSG printf "%-10s | %-9d | %-8d\n\n" $PV_STATUS $PV_SIZE $PV_USED >> $NOTIFICATION_MSG isFOUND=1 isERROR=1 fi done if [ $isERROR == 0 ] then printf "All Physical Volumes are clean.\n" >> $NOTIFICATION_MSG fi # Check for PV full grep -n "PV STATE" $OUTPUTFILE > $OUTPUTFILE.PV isERROR=0 printf "\n\n\n------------------------------\n" >> $NOTIFICATION_MSG printf " Check for PV utilisation\n" >> $NOTIFICATION_MSG printf " PV Threshold: $PV_THRESHOLD \n" >> $NOTIFICATION_MSG printf "------------------------------\n" >> $NOTIFICATION_MSG cat $OUTPUTFILE.PV | while read PVLINE do isLOGICAL_CHECK=`echo $PVLINE | grep "PV STATE" | awk -F: '{print $3}' | wc -l` #printf "[DEBUG]isLOGICAL_CHECK is %d.\n" $isLOGICAL_CHECK if [ $isLOGICAL_CHECK == 1 ] then PV_STATUS=`echo $PVLINE | grep "PV STATE" | awk -F: '{print $3}'` PV_LINE=`echo $PVLINE | grep "PV STATE" | awk -F: '{print $1}'` PV=`head -$PV_LINE $OUTPUTFILE | tail -3 | head -1 | awk '{print $3}'` VG=`head -$PV_LINE $OUTPUTFILE | tail -3 | head -1 | awk '{print $6}'` PV_LINE_TOTALPP=`echo $PV_LINE + 3 | bc` PV_SIZE=`head -$PV_LINE_TOTALPP $OUTPUTFILE | tail -1 | awk '{print $4}' | awk -F\( '{print $2}'` PV_LINE_USEDPP=`echo $PV_LINE + 5 | bc` PV_USED=`head -$PV_LINE_USEDPP $OUTPUTFILE | tail -1 | awk '{print $4}' | awk -F\( '{print $2}'` PV_PERCENTAGE=$(echo "scale=8; $PV_USED / $PV_SIZE * 100" | bc) if [ $PV_PERCENTAGE -ge $PV_THRESHOLD ] then #printf "[DEBUG]The line is $PVLINE\n" >> $NOTIFICATION_MSG printf "Volume Group: %s\n" $VG >> $NOTIFICATION_MSG printf "Physical Volume: %s\n" $PV >> $NOTIFICATION_MSG printf "Status | Size (Mb) | Used (%%)\n" >> $NOTIFICATION_MSG printf "%-10s | %-9d | %-5.2f\n\n" $PV_STATUS $PV_SIZE $PV_PERCENTAGE >> $NOTIFICATION_MSG isFOUND=1 isERROR=1 fi fi done if [ $isERROR == 0 ] then printf "All Physical Volume within threshold.\n" >> $NOTIFICATION_MSG fi rm $OUTPUTFILE rm $OUTPUTFILE.PV # Extract VG Information from ODM lsvg | while read VG do print "\nListing $VG:\n" >> $OUTPUTFILE lsvg $VG >> $OUTPUTFILE lsvg -l $VG >> $OUTPUTFILE #lsvg -l $VG | egrep -v "^$VG:" | egrep -v "^LV NAME" | while read LV JUNK #do # lslv $LV >> $OUTPUTFILE #done done # Check for VG errors in ODM grep -n "VG STATE" $OUTPUTFILE > $OUTPUTFILE.VG isERROR=0 printf "\n\n\n------------------------------\n" >> $NOTIFICATION_MSG printf " Check for VG errors\n" >> $NOTIFICATION_MSG printf "------------------------------\n" >> $NOTIFICATION_MSG cat $OUTPUTFILE.VG | while read VGLINE do isVG_CHECK=`echo $VGLINE | grep "VG STATE" | awk -F: '{print $3}' | wc -l` #printf "[DEBUG]isVG_CHECK is %d.\n" $isVG_CHECK if [ $isVG_CHECK == 1 ] then VG_STATUS=`echo $VGLINE | grep "VG STATE" | awk '{print $3}'` VG_LINE=`echo $VGLINE | grep "VG STATE" | awk -F: '{print $1}'` VG=`head -$VG_LINE $OUTPUTFILE | tail -2 | head -1 | awk '{print $3}'` VG_LINE_TOTALPP=`echo $VG_LINE + 1 | bc` VG_TOTALPP=`head -$VG_LINE_TOTALPP $OUTPUTFILE | tail -1 | awk '{print $7}' | awk -F\( '{print $2}'` VG_LINE_USEDPP=`echo $VG_LINE + 3 | bc` VG_USEDPP=`head -$VG_LINE_USEDPP $OUTPUTFILE | tail -1 | awk '{print $6}' | awk -F\( '{print $2}'` VG_LINE_TOTALPV=`echo $VG_LINE + 5 | bc` VG_TOTALPV=`head -$VG_LINE_TOTALPV $OUTPUTFILE | tail -1 | awk '{print $3}'` VG_LINE_STALEPV=`echo $VG_LINE + 6 | bc` VG_STALEPV=`head -$VG_LINE_STALEPV $OUTPUTFILE | tail -1 | awk '{print $3}'` VG_STALEPP=`head -$VG_LINE_STALEPV $OUTPUTFILE | tail -1 | awk '{print $6}'` VG_LINE_ACTIVEPV=`echo $VG_LINE + 7 | bc` VG_ACTIVEPV=`head -$VG_LINE_ACTIVEPV $OUTPUTFILE | tail -1 | awk '{print $3}'` PV_LINE=`lsvg -p $VG | wc -l` PV_NUMOFMEMBERS=`echo $PV_LINE - 2 | bc` PV_NAME="`lsvg -p $VG | tail -$PV_NUMOFMEMBERS | awk '{print $1}' | xargs`" PV_STALENAME=`lsvg -p $VG | tail -$PV_NUMOFMEMBERS | grep -v active | awk '{print $1}' | xargs` if [ -z "$PV_NAME" ] then $PV_NAME="NA" fi LV_LINE=`lsvg -l $VG | wc -l` LV_NUMOFMEMBERS=`echo $LV_LINE - 2 | bc` LV_NUMOFPROBLEM=`lsvg -l $VG | tail -$LV_NUMOFMEMBERS | grep -v "open/syncd" | wc -l` LV_NUMOFOPEN=`echo $LV_NUMOFMEMBERS - $LV_NUMOFPROBLEM | bc` LV_PROBLEMNAME=`lsvg -l $VG | tail -$LV_NUMOFMEMBERS | grep -v "open/syncd" | awk '{print $1}' | xargs` LV_NAME="`lsvg -l $VG | tail -$LV_NUMOFMEMBERS | awk '{print $1}' | xargs`" if [ -z "$LV_NAME" ] then $LV_NAME="NA" fi #printf "[DEBUG]The line is $VGLINE\n" if [ $VG_STALEPP -ge 1 -o $VG_STALEPV -ge 1 -o $LV_NUMOFPROBLEM -ge 1 ] then printf "Volume Group: %s\nVolume Group Status: %s\n\n" $VG $VG_STATUS >> $NOTIFICATION_MSG printf "Total PP Size (Mb) | Used PP Size (Mb) | Stale PP\n" >> $NOTIFICATION_MSG printf "%-18d | %-17d | %-5d \n\n" $VG_TOTALPP $VG_USEDPP $VG_STALEPP >> $NOTIFICATION_MSG printf "Total PV | Active PV | All PV members\n" >> $NOTIFICATION_MSG printf "%-8d | %-9d | %-s\n\n" $VG_TOTALPV $VG_ACTIVEPV "$PV_NAME" >> $NOTIFICATION_MSG printf "Total LV | Open LV | All LV members\n" >> $NOTIFICATION_MSG printf "%-8d | %-7d | %-s\n\n" $LV_NUMOFMEMBERS $LV_NUMOFOPEN "$LV_NAME" >> $NOTIFICATION_MSG if [ $VG_STALEPV -ge 1 ] then printf "Status of PV with problems:\n" >> $NOTIFICATION_MSG for i in $PV_STALENAME do THIS_PV=`lspv $i | grep "PV STATE" | awk '{print $1,$2,$3}'` printf "$i ($THIS_PV) \n" >> $NOTIFICATION_MSG done fi printf "\n" >> $NOTIFICATION_MSG if [ $LV_NUMOFPROBLEM -ge 1 ] then LV_FLAG=0 for i in $LV_PROBLEMNAME do THIS_LV=`lslv $i | grep "LV STATE" | awk '{print $4,$5,$6}'` THIS_STATE=`lslv $i | grep "LV STATE" | awk '{print $6}'` THIS_BOOT=`lslv $i | grep "TYPE" | awk '{print $2}' | grep "boot" | wc -l` THIS_DUMP=`lslv $i | grep "TYPE" | awk '{print $2}' | grep "sysdump" | wc -l` if [ "$THIS_STATE" != "closed/syncd" ] then if [ $LV_FLAG = 0 ] then printf "Status of LV with problems:\n" >> $NOTIFICATION_MSG LV_FLAG=1 fi printf "$i ($THIS_LV) \n" >> $NOTIFICATION_MSG elif [ "$THIS_STATE" != "open/syncd" -a $THIS_DUMP = "1" ] then if [ $LV_FLAG = 0 ] then printf "Status of LV with problems:\n" >> $NOTIFICATION_MSG LV_FLAG=1 fi printf "$i ($THIS_LV) \n" >> $NOTIFICATION_MSG elif [ "$THIS_STATE" != "closed/syncd" -a $THIS_BOOT = "1" ] then if [ $LV_FLAG = 0 ] then printf "Status of LV with problems:\n" >> $NOTIFICATION_MSG LV_FLAG=1 fi printf "$i ($THIS_LV) \n" >> $NOTIFICATION_MSG fi done fi printf "\n\n" >> $NOTIFICATION_MSG isFOUND=1 isERROR=1 fi fi done if [ $isERROR == 0 ] then printf "All Volume Groups are clean.\n" >> $NOTIFICATION_MSG fi if [ $isFOUND == 1 ] then cat $NOTIFICATION_MSG | mailx -s "[`hostname`] LVM Errors" $EMAIL fi rm $OUTPUTFILE rm $OUTPUTFILE.VG rm $NOTIFICATION_MSG
No comments:
Post a Comment