Difference between revisions of "Nagios check mk"

From Proxmox VE
Jump to: navigation, search
m
(Updated for use in Proxmox VE 5&6; based on https://github.com/Adorfer/check_mk-1/blob/patch-1/qemu_kvm/checks/qemu and https://github.com/Adorfer/check_mk-1/blob/patch-1/qemu_kvm/checks/qemu)
 
Line 1: Line 1:
This is a check for the Nagios check addon "check_mk" (http://mathias-kettner.de/check_mk.html). The idea is that only VM's that are running while doing the inventory are getting monitored. If a VM is added or removed do a:
+
This is a check for the Nagios check addon "check_mk" (http://mathias-kettner.de/check_mk.html). The idea is that only VM's that are running while doing the inventory are getting monitored. If a VM is added or removed do "Full Scan"
  
check_mk -II tcp proxmoxservername
+
The check also collects performance data for %CPU load caused by the VM and % of total memory consumption of the VM as well as it's reserved virtual memory size (VSZ) of the related kvm process.
 
 
The check also collects performance data for %CPU load caused by the VM and % of memory consumption of the VM.
 
 
===Client-Check===
 
===Client-Check===
 
/usr/lib/check_mk_agent/plugins/mh_qemu
 
/usr/lib/check_mk_agent/plugins/mh_qemu
 
<pre>
 
<pre>
#!/bin/sh
+
#!/bin/bash
 +
#/usr/lib/check_mk_agent/plugins
  
# check_mk check f. LSI Controller
+
# based upon 'qemu' from
 +
# 12/2010 Matthias Henze
 +
# Lizenz: GPL v2
 
#
 
#
# 10/2010 Matthias Henze
+
# updated for libvirtd (virsh) by
# Lizenz: GPL v2
+
# Jonathan Mills 09/2011
 
+
#
# sampel output
+
# updated by
#       101 oracle              stopped    1024              8.00 0       
+
# Christian Burmeister 05/2015
#       102 server              running    3072              50.00 2634     
+
# updated by
#       103 monitoring          running    2048              32.00 5139     
+
# adorfer 01/2017 for proxmox 4 pve
#       104 nagios              running    1024              32.00 9030
+
# updated by proxmox 07/2019 for proxmox ve 5&6
  
 
if which qm >/dev/null ; then
 
if which qm >/dev/null ; then
    echo '<<<qemu>>>'
+
        echo '<<<qemu>>>'
    qm list | grep -v VMID | while read L
+
        qm list | grep -v VMID | while read L
    do
+
        do
        PID=$(echo $L | awk -- '{print $6}')
+
                if [[ ! -z $L ]]; then
        if [ $PID -gt 0 ]; then
+
                       
            DATA=$(top -p $PID -n 1 -b | tail -n 2 | head -n 1 | awk -- '{print $9" "$10}')
+
                        ID=$(echo $L | awk '{print $1}')
        else
+
                        XNAME=$(echo $L | awk '{$1=$NF=$(NF-1)=$(NF-2)=$(NF-3)="";print $0}')
            DATA=""
+
                        NAME=`echo $XNAME | sed 's/ /_/g'`
        fi
+
                        STATE=$(echo $L | awk '{print $(NF-3)}')
        echo $L" "$DATA
+
                        PID=$(ps aux | grep kvm | grep "id $ID" | head -1 | tail -1| awk '{print $2}')
    done
+
                        if [[ ! -z $PID ]] && [ "$PID" -gt "0" ]; then
 +
                                PS=$(ps aux | grep kvm | grep $PID | head -1|tail -1)
 +
                                MEM=$(echo $PS|awk -- '{print $5}')
 +
                                MEM=$(echo $MEM / 1024 | bc)
 +
                                DATA=$(top -p $PID -n 1 -b | tail -1)
 +
                                PCPU=$(echo $DATA | awk -- '{print $9}'|tr , .)
 +
                                PMEM=$(echo $DATA | awk -- '{print $10}'|tr , .)
 +
                                MCPU=$(echo $PS | sed 's/.*maxcpus=\([^ ]*\)\ .*/\1/' )
 +
                                RCPU=$(echo "scale=1; $PCPU / $MCPU"| bc)
 +
                        else
 +
                                MEM=""
 +
                                RCPU=""
 +
                                PMEM=""
 +
                        fi
 +
                        echo $ID" "$NAME" "$STATE" "$MEM" "$RCPU" "$PMEM
 +
                fi
 +
        done
 
fi
 
fi
 +
 
</pre>
 
</pre>
  
 
===Plugin===
 
===Plugin===
/omd/versions/0.44/share/check_mk/checks/qemu
+
/omd/versions/default/share/check_mk/checks/qemu
  
 
The path asumes the use of OMD (Open source Monitoring Distribution).
 
The path asumes the use of OMD (Open source Monitoring Distribution).
Line 43: Line 61:
 
# -*- encoding: utf-8; py-indent-offset: 4 -*-
 
# -*- encoding: utf-8; py-indent-offset: 4 -*-
  
# check_mk check f. LSI Controlle
+
# based upon 'qemu' from
#
 
 
# 12/2010 Matthias Henze
 
# 12/2010 Matthias Henze
 
# Lizenz: GPL v2
 
# Lizenz: GPL v2
 +
#
 +
# updated for libvirtd (virsh) by
 +
# Jonathan Mills 09/2011
 +
#
 +
# updated by
 +
# Christian Burmeister 05/2015
 +
 +
# updated by Proxmox 07/2019
  
  
 
# Example output from agent:
 
# Example output from agent:
#<<<qemu>>>
+
# <<<qemu>>>
#     VMID NAME                STATUS    MEM(MB)    BOOTDISK(GB) PID      CUP  RAM
+
# 4 i-4B9008BE running 2048 4.0 2.7
#      101 oracle              stopped    1024              8.00 0        0    0
+
# 5 i-44F608B6 running 2048 0.0 0.7
#      102 server              running    3072              50.00 2634      0    0
+
 
#       103 monitoring          running   2048             32.00 5139      0    0
 
#      104 nagios              running    1024              32.00 9030      0    0
 
  
  
Line 64: Line 87:
 
     for line in info:
 
     for line in info:
 
         if line[2] == "running":  # only VM's running while inventory are monitored !
 
         if line[2] == "running":  # only VM's running while inventory are monitored !
             vm = line[0]
+
             vm = line[1] # we want to capture hostname, not vm id here
 +
 
 +
    # Fix annoying OpenStack misnaming of VMs
 +
    name = vm.split('-')
 +
    if name[0] == "instance":
 +
name[0] = "i"
 +
    vm = '-'.join(name)
 +
    ##
 +
 
 
             inventory.append( (vm, None) )
 
             inventory.append( (vm, None) )
 
     return inventory
 
     return inventory
  
 
# check
 
# check
def check_qemu(item, param, info):
+
def check_qemu(name, param, info):
 +
 
 
     for line in info:
 
     for line in info:
 
         perfdata = []
 
         perfdata = []
        if line[0] == item:
+
 
             name = line[1]
+
vm = line[1]
 +
host = vm.split('-')
 +
if host[0] == "instance":
 +
    host[0] = "i"
 +
vm = '-'.join(host)
 +
 
 +
        if vm == name:
 +
             item = line[0]
 
             status = line[2]
 
             status = line[2]
             ram = line[4]
+
             assigned_mem = line[3]
            infotext = "%s  (id: %s, name: %s ram: %s MB)" % (status, item, name, ram)
+
 
 +
    infotext = "%s  (id: %s" % (status, item)
 +
 
 
             if status == "running":
 
             if status == "running":
                 perfdata.append( ( "CPU%", int(round(float(line[6]))) ) )
+
# 4 i-4B9008BE running 2048 4.0 2.7
                perfdata.append( ( "RAM%", int(round(float(line[7]))) ) )
+
                 if len(line) == 6:
                return (0, "OK - status is " + infotext, perfdata)
+
    current_cpu = int(round(float(line[4])))
 +
    infotext += ", CPU: %s%%" % (current_cpu)
 +
                    perfdata.append( ( "cpu_%", current_cpu ) )
 +
 
 +
    current_mem = int(round(float(line[5])))
 +
    infotext += ", Memory: (Virtual SiZe: %s MB, used: %s%%" % (assigned_mem ,current_mem)
 +
                    perfdata.append( ( "memory_current_%", current_mem ) )
 +
    perfdata.append( ( "memory__assigned_MB", assigned_mem ) )
 +
 
 +
 
 +
infotext += "))"
 +
 
 +
warn = 80
 +
if current_cpu > warn or current_mem > warn:
 +
return (1, "WARN - status is " + infotext, perfdata)
 +
else:
 +
                return (0, "OK - status is " + infotext, perfdata)
 
             else:
 
             else:
 +
infotext += ")"
 
                 return (2, "CRITICAL - status is " + infotext, perfdata)
 
                 return (2, "CRITICAL - status is " + infotext, perfdata)
     return (3, "UNKNOWN - VM %s not found in agent output" % item)  
+
 
 +
     return (3, "UNKNOWN - VM not found in agent output")
  
 
# declare the check to Check_MK
 
# declare the check to Check_MK
 
check_info['qemu'] = \
 
check_info['qemu'] = \
        (check_qemu, "QEMU VM %s", 1, inventory_qemu)
+
(check_qemu, "VM %s", 1, inventory_qemu)
 +
 
 
</pre>
 
</pre>
  
[[Category: Archive]]
+
[[Category: HOWTO]]

Latest revision as of 11:21, 29 July 2019

This is a check for the Nagios check addon "check_mk" (http://mathias-kettner.de/check_mk.html). The idea is that only VM's that are running while doing the inventory are getting monitored. If a VM is added or removed do "Full Scan"

The check also collects performance data for %CPU load caused by the VM and % of total memory consumption of the VM as well as it's reserved virtual memory size (VSZ) of the related kvm process.

Client-Check

/usr/lib/check_mk_agent/plugins/mh_qemu

#!/bin/bash
#/usr/lib/check_mk_agent/plugins

# based upon 'qemu' from
# 12/2010 Matthias Henze
# Lizenz: GPL v2
#
# updated for libvirtd (virsh) by
# Jonathan Mills 09/2011
#
# updated by
# Christian Burmeister 05/2015
# updated by
# adorfer 01/2017 for proxmox 4 pve
# updated by proxmox 07/2019 for proxmox ve 5&6

if which qm >/dev/null ; then
        echo '<<<qemu>>>'
        qm list | grep -v VMID | while read L
        do
                if [[ ! -z $L ]]; then
                        
                        ID=$(echo $L | awk '{print $1}')
                        XNAME=$(echo $L | awk '{$1=$NF=$(NF-1)=$(NF-2)=$(NF-3)="";print $0}')
                        NAME=`echo $XNAME | sed 's/ /_/g'`
                        STATE=$(echo $L | awk '{print $(NF-3)}')
                        PID=$(ps aux | grep kvm | grep "id $ID" | head -1 | tail -1| awk '{print $2}')
                        if [[ ! -z $PID ]] && [ "$PID" -gt "0" ]; then
                                PS=$(ps aux | grep kvm | grep $PID | head -1|tail -1)
                                MEM=$(echo $PS|awk -- '{print $5}')
                                MEM=$(echo $MEM / 1024 | bc)
                                DATA=$(top -p $PID -n 1 -b | tail -1)
                                PCPU=$(echo $DATA | awk -- '{print $9}'|tr , .)
                                PMEM=$(echo $DATA | awk -- '{print $10}'|tr , .)
                                MCPU=$(echo $PS | sed 's/.*maxcpus=\([^ ]*\)\ .*/\1/' )
                                RCPU=$(echo "scale=1; $PCPU / $MCPU"| bc)
                        else
                                MEM=""
                                RCPU=""
                                PMEM=""
                        fi
                        echo $ID" "$NAME" "$STATE" "$MEM" "$RCPU" "$PMEM 
                fi
        done
fi

Plugin

/omd/versions/default/share/check_mk/checks/qemu

The path asumes the use of OMD (Open source Monitoring Distribution).

#!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-

# based upon 'qemu' from
# 12/2010 Matthias Henze
# Lizenz: GPL v2
#
# updated for libvirtd (virsh) by
# Jonathan Mills 09/2011
#
# updated by
# Christian Burmeister 05/2015

# updated by Proxmox 07/2019


# Example output from agent:
# <<<qemu>>>
# 4 i-4B9008BE running 2048 4.0 2.7
# 5 i-44F608B6 running 2048 0.0 0.7




# inventory
def inventory_qemu(checkname, info):
    inventory = []
    for line in info:
        if line[2] == "running":  # only VM's running while inventory are monitored !
            vm = line[1] # we want to capture hostname, not vm id here

	    # Fix annoying OpenStack misnaming of VMs
	    name = vm.split('-')
	    if name[0] == "instance":
		name[0] = "i"
	    vm = '-'.join(name)
	    ##

            inventory.append( (vm, None) )
    return inventory

# check
def check_qemu(name, param, info):

    for line in info:
        perfdata = []

	vm = line[1]
	host = vm.split('-')
	if host[0] == "instance":
	    host[0] = "i"
	vm = '-'.join(host)

        if vm == name:
            item = line[0]
            status = line[2]
            assigned_mem = line[3]

	    infotext = "%s  (id: %s" % (status, item)

            if status == "running":
		# 4 i-4B9008BE running 2048 4.0 2.7
                if len(line) == 6:
		    current_cpu = int(round(float(line[4])))
		    infotext += ", CPU: %s%%" % (current_cpu)
                    perfdata.append( ( "cpu_%", current_cpu ) )

		    current_mem = int(round(float(line[5])))
		    infotext += ", Memory: (Virtual SiZe: %s MB, used: %s%%" % (assigned_mem ,current_mem)
                    perfdata.append( ( "memory_current_%", current_mem ) )
		    perfdata.append( ( "memory__assigned_MB", assigned_mem ) )


		infotext += "))"

		warn = 80
		if current_cpu > warn or current_mem > warn:
			return (1, "WARN - status is " + infotext, perfdata)
		else:
                	return (0, "OK - status is " + infotext, perfdata)
            else:
		infotext += ")"
                return (2, "CRITICAL - status is " + infotext, perfdata)

    return (3, "UNKNOWN - VM not found in agent output")

# declare the check to Check_MK
check_info['qemu'] = \
(check_qemu, "VM %s", 1, inventory_qemu)