[pve-devel] [PATCH qemu-server v4 1/3] add qmeventd

Dominik Csapak d.csapak at proxmox.com
Mon Nov 5 13:59:59 CET 2018


this adds a program that can listen to qemu qmp events on a given socket
and if a shutdown event followed by a disconnected socket occurs,
executes qm cleanup with arguments that indicate if the
vm was closed gracefully and whether the guest initiated it

this is useful if we want to cleanup after the qemu process exited,
e.g. tap devices, vgpus, etc.

Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
 Makefile         |  19 ++-
 debian/control   |   3 +
 debian/rules     |   2 +-
 qmeventd.c       | 433 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 qmeventd.h       |  55 +++++++
 qmeventd.service |   9 ++
 6 files changed, 517 insertions(+), 4 deletions(-)
 create mode 100644 qmeventd.c
 create mode 100644 qmeventd.h
 create mode 100644 qmeventd.service

diff --git a/Makefile b/Makefile
index c531d04..9af6df6 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,9 @@ VERSION=5.0
 PACKAGE=qemu-server
 PKGREL=38
 
-CFLAGS= -O2 -Werror -Wall -Wtype-limits -Wl,-z,relro 
+CFLAGS=-O2 -Werror -Wall -Wextra -Wpedantic -Wtype-limits -Wl,-z,relro -std=gnu11
+JSON_CFLAGS=$(shell pkg-config --cflags json-c)
+JSON_LIBS=$(shell pkg-config --libs json-c)
 
 DESTDIR=
 PREFIX=/usr
@@ -10,11 +12,13 @@ BINDIR=${PREFIX}/bin
 SBINDIR=${PREFIX}/sbin
 BINDIR=${PREFIX}/bin
 LIBDIR=${PREFIX}/lib/${PACKAGE}
+SERVICEDIR=/lib/systemd/system
 VARLIBDIR=/var/lib/${PACKAGE}
 MANDIR=${PREFIX}/share/man
 DOCDIR=${PREFIX}/share/doc
 MAN1DIR=${MANDIR}/man1/
 MAN5DIR=${MANDIR}/man5/
+MAN8DIR=${MANDIR}/man8/
 BASHCOMPLDIR=${PREFIX}/share/bash-completion/completions/
 export PERLDIR=${PREFIX}/share/perl5
 PERLINCDIR=${PERLDIR}/asm-x86_64
@@ -36,6 +40,9 @@ all:
 dinstall: deb
 	dpkg -i ${DEB}
 
+qmeventd: qmeventd.c
+	$(CC) $(CFLAGS) ${JSON_CFLAGS} -o $@ $< ${JSON_LIBS} 
+
 qm.bash-completion:
 	PVE_GENERATING_DOCS=1 perl -I. -T -e "use PVE::CLI::qm; PVE::CLI::qm->generate_bash_completions();" >$@.tmp
 	mv $@.tmp $@
@@ -44,15 +51,17 @@ qmrestore.bash-completion:
 	PVE_GENERATING_DOCS=1 perl -I. -T -e "use PVE::CLI::qmrestore; PVE::CLI::qmrestore->generate_bash_completions();" >$@.tmp
 	mv $@.tmp $@
 
-PKGSOURCES=qm qm.1 qmrestore qmrestore.1 qmextract qm.conf.5 qm.bash-completion qmrestore.bash-completion
+PKGSOURCES=qm qm.1 qmrestore qmrestore.1 qmextract qm.conf.5 qm.bash-completion qmrestore.bash-completion qmeventd qmeventd.8
 
 .PHONY: install
 install: ${PKGSOURCES}
 	install -d ${DESTDIR}/${SBINDIR}
 	install -d ${DESTDIR}${LIBDIR}
 	install -d ${DESTDIR}${VARLIBDIR}
+	install -d ${DESTDIR}${SERVICEDIR}
 	install -d ${DESTDIR}/${MAN1DIR}
 	install -d ${DESTDIR}/${MAN5DIR}
+	install -d ${DESTDIR}/${MAN8DIR}
 	install -d ${DESTDIR}/usr/share/man/man5
 	install -d ${DESTDIR}/usr/share/${PACKAGE}
 	install -m 0644 pve-usb.cfg ${DESTDIR}/usr/share/${PACKAGE}
@@ -63,6 +72,8 @@ install: ${PKGSOURCES}
 	make -C PVE install
 	install -m 0755 qm ${DESTDIR}${SBINDIR}
 	install -m 0755 qmrestore ${DESTDIR}${SBINDIR}
+	install -m 0755 qmeventd ${DESTDIR}${SBINDIR}
+	install -m 0644 qmeventd.service ${DESTDIR}${SERVICEDIR}
 	install -m 0755 pve-bridge ${DESTDIR}${VARLIBDIR}/pve-bridge
 	install -m 0755 pve-bridge-hotplug ${DESTDIR}${VARLIBDIR}/pve-bridge-hotplug
 	install -m 0755 pve-bridgedown ${DESTDIR}${VARLIBDIR}/pve-bridgedown
@@ -70,6 +81,8 @@ install: ${PKGSOURCES}
 	install -m 0755 qmextract ${DESTDIR}${LIBDIR}
 	install -m 0644 qm.1 ${DESTDIR}/${MAN1DIR}
 	gzip -9 -n -f ${DESTDIR}/${MAN1DIR}/qm.1
+	install -m 0644 qmeventd.8 ${DESTDIR}/${MAN8DIR}
+	gzip -9 -n -f ${DESTDIR}/${MAN8DIR}/qmeventd.8
 	install -m 0644 qmrestore.1 ${DESTDIR}/${MAN1DIR}
 	gzip -9 -n -f ${DESTDIR}/${MAN1DIR}/qmrestore.1
 	install -m 0644 qm.conf.5 ${DESTDIR}/${MAN5DIR}
@@ -97,7 +110,7 @@ upload: ${DEB}
 .PHONY: clean
 clean:
 	make cleanup-docgen
-	rm -rf build *.deb *.buildinfo *.changes
+	rm -rf build *.deb *.buildinfo *.changes qmeventd
 	find . -name '*~' -exec rm {} ';'
 
 
diff --git a/debian/control b/debian/control
index f3b9ca0..7fc27ed 100644
--- a/debian/control
+++ b/debian/control
@@ -3,7 +3,9 @@ Section: admin
 Priority: optional
 Maintainer: Proxmox Support Team <support at proxmox.com>
 Build-Depends: debhelper (>= 7.0.50~),
+               docutils,
                libio-multiplex-perl,
+               libjson-c-dev,
                libpve-common-perl,
                libpve-guest-common-perl (>= 2.0-18),
                lintian,
@@ -19,6 +21,7 @@ Depends: dbus,
          genisoimage,
          libc6 (>= 2.7-18),
          libio-multiplex-perl,
+         libjson-c3,
          libjson-perl,
          libjson-xs-perl,
          libnet-ssleay-perl,
diff --git a/debian/rules b/debian/rules
index 955dd78..97112b4 100755
--- a/debian/rules
+++ b/debian/rules
@@ -10,4 +10,4 @@
 #export DH_VERBOSE=1
 
 %:
-	dh $@
+	dh $@ --with systemd
diff --git a/qmeventd.c b/qmeventd.c
new file mode 100644
index 0000000..9cfe5ff
--- /dev/null
+++ b/qmeventd.c
@@ -0,0 +1,433 @@
+/*
+
+    Copyright (C) 2018 Proxmox Server Solutions GmbH
+
+    Copyright: qemumonitor is under GNU GPL, the GNU General Public License.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; version 2 dated June, 1991.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+    02111-1307, USA.
+
+    Author: Dominik Csapak <d.csapak at proxmox.com>
+
+    qmevend listens on a given socket, and waits for qemu processes
+    to connect
+
+    it then waits for shutdown events followed by the closing of the socket,
+    it then calls /usr/sbin/qm cleanup with following arguments
+
+    /usr/sbin/qm cleanup VMID <graceful> <guest>
+
+    parameter explanation:
+
+    graceful:
+    1|0 depending if it saw a shutdown event before the socket closed
+
+    guest:
+    1|0 depending if the shutdown was requested from the guest
+
+*/
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <errno.h>
+#include <fcntl.h>
+#include <json.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "qmeventd.h"
+
+static int verbose = 0;
+static int epoll_fd = 0;
+static const char *progname;
+/*
+ * Helper functions
+ */
+
+static void
+usage()
+{
+    fprintf(stderr, "Usage: %s [-f] [-v] PATH\n", progname);
+    fprintf(stderr, "  -f       run in foreground (default: false)\n");
+    fprintf(stderr, "  -v       verbose (default: false)\n");
+    fprintf(stderr, "  PATH     use PATH for socket\n");
+}
+
+static pid_t
+get_pid_from_fd(int fd)
+{
+    struct ucred credentials = { .pid = 0, .uid = 0, .gid = 0 };
+    socklen_t len = sizeof(struct ucred);
+    log_neg(getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &credentials, &len), "getsockopt");
+    return credentials.pid;
+}
+
+/*
+ * reads the vmid from /proc/<pid>/cmdline
+ * after the '-id' argument
+ */
+static unsigned long
+get_vmid_from_pid(pid_t pid)
+{
+    char filename[32] = { 0 };
+    int len = snprintf(filename, sizeof(filename), "/proc/%d/cmdline", pid);
+    if (len < 0) {
+	fprintf(stderr, "error during snprintf for %d: %s\n", pid,
+		strerror(errno));
+	return 0;
+    }
+    if ((size_t)len >= sizeof(filename)) {
+	fprintf(stderr, "error: pid %d too long\n", pid);
+	return 0;
+    }
+    FILE *fp = fopen(filename, "re");
+    if (fp == NULL) {
+	fprintf(stderr, "error opening %s: %s\n", filename, strerror(errno));
+	return 0;
+    }
+
+    unsigned long vmid = 0;
+    ssize_t rc = 0;
+    char *buf = NULL;
+    size_t buflen = 0;
+    while ((rc = getdelim(&buf, &buflen, '\0', fp)) >= 0) {
+	if (!strcmp(buf, "-id")) {
+	    break;
+	}
+    }
+
+    if (rc < 0) {
+	goto err;
+    }
+
+    if (getdelim(&buf, &buflen, '\0', fp) >= 0) {
+	if (buf[0] == '-' || buf[0] == '\0') {
+	    fprintf(stderr, "invalid vmid %s\n", buf);
+	    goto ret;
+	}
+
+	errno = 0;
+	char *endptr;
+	vmid = strtoul(buf, &endptr, 10);
+	if (errno != 0) {
+	    vmid = 0;
+	    goto err;
+	} else if (*endptr != '\0') {
+	    fprintf(stderr, "invalid vmid %s\n", buf);
+	    vmid = 0;
+	}
+
+	goto ret;
+    }
+
+err:
+    fprintf(stderr, "error parsing vmid for %d: %s\n", pid, strerror(errno));
+
+ret:
+    free(buf);
+    fclose(fp);
+    return vmid;
+}
+
+static int must_write(int fd, const char *buf, size_t len)
+{
+    ssize_t wlen;
+    do {
+	wlen = write(fd, buf, len);
+    } while (wlen != (ssize_t)len && errno == EINTR);
+
+    return (wlen == (ssize_t)len);
+}
+
+static void reap_child()
+{
+    pid_t pid;
+    do {
+	pid = waitpid(-1, NULL, WNOHANG);
+    } while (pid > 0);
+}
+
+/*
+ * qmp handling functions
+ */
+
+void
+handle_qmp_handshake(struct Client *client)
+{
+    VERBOSE_PRINT("%s: got QMP handshake\n", client->vmid);
+    const char *qmp_answer = "{\"execute\":\"qmp_capabilities\"}\n";
+    if (!must_write(client->fd, qmp_answer, sizeof(qmp_answer) - 1)) {
+	fprintf(stderr, "%s: cannot complete handshake\n", client->vmid);
+	cleanup_client(client);
+    }
+}
+
+void
+handle_qmp_event(struct Client *client, struct json_object *obj)
+{
+    struct json_object *event;
+    if (!json_object_object_get_ex(obj, "event", &event)) {
+	return;
+    }
+    VERBOSE_PRINT("%s: got QMP event: %s\n", client->vmid,
+		  json_object_get_string(event));
+    // event, check if shutdown and get guest parameter
+    if (!strcmp(json_object_get_string(event), "SHUTDOWN")) {
+	client->graceful = 1;
+	struct json_object *data;
+	struct json_object *guest;
+	if (json_object_object_get_ex(obj, "data", &data) &&
+	    json_object_object_get_ex(data, "guest", &guest))
+	{
+	    client->guest = (unsigned short)json_object_get_boolean(guest);
+	}
+    }
+}
+
+/*
+ * client management functions
+ */
+
+void
+add_new_client(int client_fd)
+{
+    struct Client *client = calloc(sizeof(struct Client), 1);
+    client->fd = client_fd;
+    client->pid = get_pid_from_fd(client_fd);
+    unsigned long vmid = get_vmid_from_pid(client->pid);
+    int res = snprintf(client->vmid, sizeof(client->vmid), "%lu", vmid);
+    if (vmid == 0 || res < 0 || res >= (int)sizeof(client->vmid)) {
+	fprintf(stderr, "could not get vmid from pid %d\n", client->pid);
+	goto err;
+    }
+
+    struct epoll_event ev;
+    ev.events = EPOLLIN;
+    ev.data.ptr = client;
+    res = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client_fd, &ev);
+    if (res < 0) {
+	perror("epoll_ctl client add");
+	goto err;
+    }
+
+    VERBOSE_PRINT("added new client, pid: %d, vmid: %s\n", client->pid,
+		client->vmid);
+
+    return;
+err:
+    (void)close(client_fd);
+    free(client);
+}
+
+void
+cleanup_client(struct Client *client)
+{
+    VERBOSE_PRINT("%s: client exited, status: graceful: %d, guest: %d\n",
+		  client->vmid, client->graceful, client->guest);
+    log_neg(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, client->fd, NULL), "epoll del");
+    (void)close(client->fd);
+
+    unsigned short graceful = client->graceful;
+    unsigned short guest = client->guest;
+    char vmid[sizeof(client->vmid)];
+    strncpy(vmid, client->vmid, sizeof(vmid));
+    free(client);
+    VERBOSE_PRINT("%s: executing cleanup\n", vmid);
+
+    int pid = fork();
+    if (pid < 0) {
+	fprintf(stderr, "fork failed: %s\n", strerror(errno));
+	return;
+    }
+    if (pid == 0) {
+	char *script = "/usr/sbin/qm";
+	char **args = malloc((size_t)(6) * sizeof(*args));
+
+	args[0] = script;
+	args[1] = "cleanup";
+	args[2] = vmid;
+
+	char shutdown_args[4] = {
+	    graceful ? '1' : '0', 0,
+	    guest    ? '1' : '0', 0,
+	};
+
+	args[3] = &shutdown_args[0];
+	args[4] = &shutdown_args[2];
+	args[5] = NULL;
+
+	execvp(script, args);
+	perror("execvp");
+	_exit(1);
+    }
+}
+
+void
+handle_client(struct Client *client)
+{
+    VERBOSE_PRINT("%s: entering handle\n", client->vmid);
+    ssize_t len;
+    do {
+	len = read(client->fd, (client->buf+client->buflen),
+		   sizeof(client->buf) - client->buflen);
+    } while (len < 0 && errno == EINTR);
+
+    if (len < 0) {
+	if (!(errno == EAGAIN || errno == EWOULDBLOCK)) {
+	    log_neg((int)len, "read");
+	    cleanup_client(client);
+	}
+	return;
+    } else if (len == 0) {
+	VERBOSE_PRINT("%s: got EOF\n", client->vmid);
+	cleanup_client(client);
+	return;
+    }
+
+    VERBOSE_PRINT("%s: read %ld bytes\n", client->vmid, len);
+    client->buflen += len;
+
+    struct json_tokener *tok = json_tokener_new();
+    struct json_object *jobj = NULL;
+    enum json_tokener_error jerr = json_tokener_success;
+    while (jerr == json_tokener_success && client->buflen != 0) {
+	jobj = json_tokener_parse_ex(tok, client->buf, (int)client->buflen);
+	jerr = json_tokener_get_error(tok);
+	unsigned int offset = (unsigned int)tok->char_offset;
+	switch (jerr) {
+	    case json_tokener_success:
+		// move rest from buffer to front
+		memmove(client->buf, client->buf + offset, client->buflen - offset);
+		client->buflen -= offset;
+		if (json_object_is_type(jobj, json_type_object)) {
+		    struct json_object *obj;
+		    if (json_object_object_get_ex(jobj, "QMP", &obj)) {
+			handle_qmp_handshake(client);
+		    } else if (json_object_object_get_ex(jobj, "event", &obj)) {
+			handle_qmp_event(client, jobj);
+		    } // else ignore message
+		}
+		break;
+	    case json_tokener_continue:
+		if (client->buflen >= sizeof(client->buf)) {
+		    VERBOSE_PRINT("%s, msg too large, discarding buffer\n",
+				  client->vmid);
+		    memset(client->buf, 0, sizeof(client->buf));
+		    client->buflen = 0;
+		} // else we have enough space try again after next read
+		break;
+	    default:
+		VERBOSE_PRINT("%s: parse error: %d, discarding buffer\n",
+			      client->vmid, jerr);
+		memset(client->buf, 0, client->buflen);
+		client->buflen = 0;
+		break;
+	}
+	json_object_put(jobj);
+    }
+    json_tokener_free(tok);
+}
+
+
+int
+main(int argc, char *argv[])
+{
+    int opt;
+    int daemonize = 1;
+    char *socket_path = NULL;
+    progname = argv[0];
+
+    while ((opt = getopt(argc, argv, "hfv")) != -1) {
+	switch (opt) {
+	    case 'f':
+		daemonize = 0;
+		break;
+	    case 'v':
+		verbose = 1;
+		break;
+	    case 'h':
+	    default:
+		usage();
+		exit(EXIT_FAILURE);
+	}
+    }
+
+    if (optind >= argc) {
+	usage();
+	exit(EXIT_FAILURE);
+    }
+
+    signal(SIGCHLD, reap_child);
+
+    socket_path = argv[optind];
+
+    int sock = socket(AF_UNIX, SOCK_STREAM, 0);
+    bail_neg(sock, "socket");
+
+    struct sockaddr_un addr;
+    memset(&addr, 0, sizeof(addr));
+    addr.sun_family = AF_UNIX;
+    strncpy(addr.sun_path, socket_path, sizeof(addr.sun_path) - 1);
+
+    unlink(socket_path);
+    bail_neg(bind(sock, (struct sockaddr*)&addr, sizeof(addr)), "bind");
+
+    struct epoll_event ev, events[1];
+    epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+    bail_neg(epoll_fd, "epoll_create1");
+
+    ev.events = EPOLLIN;
+    ev.data.fd = sock;
+    bail_neg(epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sock, &ev), "epoll_ctl");
+
+    bail_neg(listen(sock, 10), "listen");
+
+    if (daemonize) {
+	bail_neg(daemon(0, 1), "daemon");
+    }
+
+    int should_exit = 0;
+    int nevents, n;
+
+    while(!should_exit) {
+	nevents = epoll_wait(epoll_fd, events, 1, -1);
+	if (nevents < 0 && errno == EINTR) {
+	    // signal happened, try again
+	    continue;
+	}
+	bail_neg(nevents, "epoll_wait");
+
+	for (n = 0; n < nevents; n++) {
+	    if (events[n].data.fd == sock) {
+
+		int conn_sock = accept4(sock, NULL, NULL,
+					SOCK_NONBLOCK | SOCK_CLOEXEC);
+		log_neg(conn_sock, "accept");
+
+		add_new_client(conn_sock);
+	    } else {
+		handle_client((struct Client *)events[n].data.ptr);
+	    }
+	}
+    }
+}
diff --git a/qmeventd.h b/qmeventd.h
new file mode 100644
index 0000000..0c2ffc1
--- /dev/null
+++ b/qmeventd.h
@@ -0,0 +1,55 @@
+/*
+
+    Copyright (C) 2018 Proxmox Server Solutions GmbH
+
+    Copyright: qemumonitor is under GNU GPL, the GNU General Public License.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; version 2 dated June, 1991.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+    02111-1307, USA.
+
+    Author: Dominik Csapak <d.csapak at proxmox.com>
+*/
+
+#define VERBOSE_PRINT(...) do { if (verbose) { printf(__VA_ARGS__); } } while (0)
+
+static inline void log_neg(int errval, const char *msg)
+{
+    if (errval < 0) {
+	perror(msg);
+    }
+}
+
+static inline void bail_neg(int errval, const char *msg)
+{
+    if (errval < 0) {
+	perror(msg);
+	exit(EXIT_FAILURE);
+    }
+}
+
+struct Client {
+    char buf[4096];
+    char vmid[16];
+    int fd;
+    pid_t pid;
+    unsigned int buflen;
+    unsigned short graceful;
+    unsigned short guest;
+};
+
+void handle_qmp_handshake(struct Client *client);
+void handle_qmp_event(struct Client *client, struct json_object *obj);
+void handle_client(struct Client *client);
+void add_new_client(int client_fd);
+void cleanup_client(struct Client *client);
diff --git a/qmeventd.service b/qmeventd.service
new file mode 100644
index 0000000..a2462fc
--- /dev/null
+++ b/qmeventd.service
@@ -0,0 +1,9 @@
+[Unit]
+Description=PVE Qemu Event Daemon
+
+[Service]
+ExecStart=/usr/sbin/qmeventd /var/run/qemu-server/event.socket
+Type=forking
+
+[Install]
+WantedBy=multi-user.target
-- 
2.11.0





More information about the pve-devel mailing list