[pve-devel] [PATCH qemu-server v4 1/3] add qmeventd

Wolfgang Bumiller w.bumiller at proxmox.com
Wed Nov 7 12:54:36 CET 2018


On Mon, Nov 05, 2018 at 01:59:59PM +0100, Dominik Csapak wrote:
> this adds a program that can listen to qemu qmp events on a given socket
> and if a shutdown event followed by a disconnected socket occurs,
> executes qm cleanup with arguments that indicate if the
> vm was closed gracefully and whether the guest initiated it
> 
> this is useful if we want to cleanup after the qemu process exited,
> e.g. tap devices, vgpus, etc.
> 
> Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
> ---
> 
> diff --git a/Makefile b/Makefile
> index c531d04..9af6df6 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -36,6 +40,9 @@ all:
>  dinstall: deb
>  	dpkg -i ${DEB}
>  
> +qmeventd: qmeventd.c
> +	$(CC) $(CFLAGS) ${JSON_CFLAGS} -o $@ $< ${JSON_LIBS} 

^ Trailing space

> +
>  qm.bash-completion:
>  	PVE_GENERATING_DOCS=1 perl -I. -T -e "use PVE::CLI::qm; PVE::CLI::qm->generate_bash_completions();" >$@.tmp
>  	mv $@.tmp $@
> diff --git a/debian/control b/debian/control
> index f3b9ca0..7fc27ed 100644
> --- a/debian/control
> +++ b/debian/control
> @@ -19,6 +21,7 @@ Depends: dbus,
>           genisoimage,
>           libc6 (>= 2.7-18),
>           libio-multiplex-perl,
> +         libjson-c3,

Shouldn't ${shlibs:Depends} already do this?

>           libjson-perl,
>           libjson-xs-perl,
>           libnet-ssleay-perl,
> diff --git a/qmeventd.c b/qmeventd.c
> new file mode 100644
> index 0000000..9cfe5ff
> --- /dev/null
> +++ b/qmeventd.c
> @@ -0,0 +1,433 @@
> +/*
> +
> +    Copyright (C) 2018 Proxmox Server Solutions GmbH
> +
> +    Copyright: qemumonitor is under GNU GPL, the GNU General Public License.
> +
> +    This program is free software; you can redistribute it and/or modify
> +    it under the terms of the GNU General Public License as published by
> +    the Free Software Foundation; version 2 dated June, 1991.
> +
> +    This program is distributed in the hope that it will be useful,
> +    but WITHOUT ANY WARRANTY; without even the implied warranty of
> +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +    GNU General Public License for more details.
> +
> +    You should have received a copy of the GNU General Public License
> +    along with this program; if not, write to the Free Software
> +    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
> +    02111-1307, USA.
> +
> +    Author: Dominik Csapak <d.csapak at proxmox.com>
> +
> +    qmevend listens on a given socket, and waits for qemu processes
> +    to connect
> +
> +    it then waits for shutdown events followed by the closing of the socket,
> +    it then calls /usr/sbin/qm cleanup with following arguments
> +
> +    /usr/sbin/qm cleanup VMID <graceful> <guest>
> +
> +    parameter explanation:
> +
> +    graceful:
> +    1|0 depending if it saw a shutdown event before the socket closed
> +
> +    guest:
> +    1|0 depending if the shutdown was requested from the guest
> +
> +*/
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <json.h>
> +#include <signal.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <sys/epoll.h>
> +#include <sys/socket.h>
> +#include <sys/types.h>
> +#include <sys/wait.h>
> +#include <sys/un.h>
> +#include <unistd.h>
> +
> +#include "qmeventd.h"
> +
> +static int verbose = 0;
> +static int epoll_fd = 0;
> +static const char *progname;
> +/*
> + * Helper functions
> + */
> +
> +static void
> +usage()
> +{
> +    fprintf(stderr, "Usage: %s [-f] [-v] PATH\n", progname);
> +    fprintf(stderr, "  -f       run in foreground (default: false)\n");
> +    fprintf(stderr, "  -v       verbose (default: false)\n");
> +    fprintf(stderr, "  PATH     use PATH for socket\n");
> +}
> +
> +static pid_t
> +get_pid_from_fd(int fd)
> +{
> +    struct ucred credentials = { .pid = 0, .uid = 0, .gid = 0 };
> +    socklen_t len = sizeof(struct ucred);
> +    log_neg(getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &credentials, &len), "getsockopt");
> +    return credentials.pid;
> +}
> +
> +/*
> + * reads the vmid from /proc/<pid>/cmdline
> + * after the '-id' argument
> + */
> +static unsigned long
> +get_vmid_from_pid(pid_t pid)
> +{
> +    char filename[32] = { 0 };
> +    int len = snprintf(filename, sizeof(filename), "/proc/%d/cmdline", pid);
> +    if (len < 0) {
> +	fprintf(stderr, "error during snprintf for %d: %s\n", pid,
> +		strerror(errno));
> +	return 0;
> +    }
> +    if ((size_t)len >= sizeof(filename)) {
> +	fprintf(stderr, "error: pid %d too long\n", pid);
> +	return 0;
> +    }
> +    FILE *fp = fopen(filename, "re");
> +    if (fp == NULL) {
> +	fprintf(stderr, "error opening %s: %s\n", filename, strerror(errno));
> +	return 0;
> +    }
> +
> +    unsigned long vmid = 0;
> +    ssize_t rc = 0;
> +    char *buf = NULL;
> +    size_t buflen = 0;
> +    while ((rc = getdelim(&buf, &buflen, '\0', fp)) >= 0) {
> +	if (!strcmp(buf, "-id")) {
> +	    break;
> +	}
> +    }
> +
> +    if (rc < 0) {
> +	goto err;
> +    }
> +
> +    if (getdelim(&buf, &buflen, '\0', fp) >= 0) {
> +	if (buf[0] == '-' || buf[0] == '\0') {
> +	    fprintf(stderr, "invalid vmid %s\n", buf);
> +	    goto ret;
> +	}
> +
> +	errno = 0;
> +	char *endptr;

I'd prefer this NULL initialized. I don't trust that errors really
always update this to be a valid pointer...

> +	vmid = strtoul(buf, &endptr, 10);
> +	if (errno != 0) {
> +	    vmid = 0;
> +	    goto err;
> +	} else if (*endptr != '\0') {
> +	    fprintf(stderr, "invalid vmid %s\n", buf);
> +	    vmid = 0;
> +	}
> +
> +	goto ret;
> +    }
> +
> +err:
> +    fprintf(stderr, "error parsing vmid for %d: %s\n", pid, strerror(errno));
> +
> +ret:
> +    free(buf);
> +    fclose(fp);
> +    return vmid;
> +}
> +
> +static int must_write(int fd, const char *buf, size_t len)

int -> bool (via <stdbool.h>)
And newline before the function name

> +{
> +    ssize_t wlen;
> +    do {
> +	wlen = write(fd, buf, len);
> +    } while (wlen != (ssize_t)len && errno == EINTR);

Use wlen < 0; successful calls don't touch errno, so it's theoretically
possible for this loop to re-write already written data (or would be if
it was writing to a disk - it most likely won't happen here, but
still...).

> +
> +    return (wlen == (ssize_t)len);
> +}
> +
> +static void reap_child()

Style issue - but I actually realized that it should be possible to just
set the signal to be _explicitly_ ignored (signal(SIGCLD, SIG_IGN)) to
not produce zombies (this is different to the implicit ignore default
as it is equivalent to calling sigaction(2) with SA_NOCLDWAIT (which
we could do explicitly instead of the old signal() call if we want, up
to you).

> +{
> +    pid_t pid;
> +    do {
> +	pid = waitpid(-1, NULL, WNOHANG);
> +    } while (pid > 0);
> +}
> +
> +/*
> + * qmp handling functions
> + */
> +
> +void
> +handle_qmp_handshake(struct Client *client)
> +{
> +    VERBOSE_PRINT("%s: got QMP handshake\n", client->vmid);
> +    const char *qmp_answer = "{\"execute\":\"qmp_capabilities\"}\n";

static const char[]

You should not use a pointer with sizeof(), I'm surprised this works
(assuming it did for you, I didn't test it ;-) )?
Perhaps qemu doesn't need the handshake? ;-)

> +    if (!must_write(client->fd, qmp_answer, sizeof(qmp_answer) - 1)) {
> +	fprintf(stderr, "%s: cannot complete handshake\n", client->vmid);
> +	cleanup_client(client);
> +    }
> +}
> +
> +void
> +handle_qmp_event(struct Client *client, struct json_object *obj)
> +{
> +    struct json_object *event;
> +    if (!json_object_object_get_ex(obj, "event", &event)) {
> +	return;
> +    }
> +    VERBOSE_PRINT("%s: got QMP event: %s\n", client->vmid,
> +		  json_object_get_string(event));
> +    // event, check if shutdown and get guest parameter
> +    if (!strcmp(json_object_get_string(event), "SHUTDOWN")) {
> +	client->graceful = 1;
> +	struct json_object *data;
> +	struct json_object *guest;
> +	if (json_object_object_get_ex(obj, "data", &data) &&
> +	    json_object_object_get_ex(data, "guest", &guest))
> +	{
> +	    client->guest = (unsigned short)json_object_get_boolean(guest);
> +	}
> +    }
> +}
> +
> +/*
> + * client management functions
> + */
> +
> +void
> +add_new_client(int client_fd)
> +{
> +    struct Client *client = calloc(sizeof(struct Client), 1);
> +    client->fd = client_fd;
> +    client->pid = get_pid_from_fd(client_fd);

While falling through to get_vmid_from_pid(0) in an error case certainly
won't find an `-id` parameter in /proc/0/cmdline, or the file at all for
that matter, I'd like to have either a comment stating this, or
explicit error handling, rather than implicit snowballing ;-)

> +    unsigned long vmid = get_vmid_from_pid(client->pid);
> +    int res = snprintf(client->vmid, sizeof(client->vmid), "%lu", vmid);
> +    if (vmid == 0 || res < 0 || res >= (int)sizeof(client->vmid)) {
> +	fprintf(stderr, "could not get vmid from pid %d\n", client->pid);
> +	goto err;
> +    }
> +
> +    struct epoll_event ev;
> +    ev.events = EPOLLIN;
> +    ev.data.ptr = client;
> +    res = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client_fd, &ev);
> +    if (res < 0) {
> +	perror("epoll_ctl client add");
> +	goto err;
> +    }
> +
> +    VERBOSE_PRINT("added new client, pid: %d, vmid: %s\n", client->pid,
> +		client->vmid);
> +
> +    return;
> +err:
> +    (void)close(client_fd);
> +    free(client);
> +}
> +
> +void
> +cleanup_client(struct Client *client)
> +{
> +    VERBOSE_PRINT("%s: client exited, status: graceful: %d, guest: %d\n",
> +		  client->vmid, client->graceful, client->guest);
> +    log_neg(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, client->fd, NULL), "epoll del");
> +    (void)close(client->fd);
> +
> +    unsigned short graceful = client->graceful;
> +    unsigned short guest = client->guest;
> +    char vmid[sizeof(client->vmid)];
> +    strncpy(vmid, client->vmid, sizeof(vmid));
> +    free(client);
> +    VERBOSE_PRINT("%s: executing cleanup\n", vmid);
> +
> +    int pid = fork();
> +    if (pid < 0) {
> +	fprintf(stderr, "fork failed: %s\n", strerror(errno));
> +	return;
> +    }
> +    if (pid == 0) {
> +	char *script = "/usr/sbin/qm";
> +	char **args = malloc((size_t)(6) * sizeof(*args));

I suppose you could consider putting this on the stack
   char *args[] = {
       ...,
       NULL
   };

> +
> +	args[0] = script;
> +	args[1] = "cleanup";
> +	args[2] = vmid;
> +
> +	char shutdown_args[4] = {
> +	    graceful ? '1' : '0', 0,
> +	    guest    ? '1' : '0', 0,
> +	};
> +
> +	args[3] = &shutdown_args[0];
> +	args[4] = &shutdown_args[2];
> +	args[5] = NULL;
> +
> +	execvp(script, args);
> +	perror("execvp");
> +	_exit(1);
> +    }
> +}
> +
> +void
> +handle_client(struct Client *client)
> +{
> +    VERBOSE_PRINT("%s: entering handle\n", client->vmid);
> +    ssize_t len;
> +    do {
> +	len = read(client->fd, (client->buf+client->buflen),
> +		   sizeof(client->buf) - client->buflen);
> +    } while (len < 0 && errno == EINTR);
> +
> +    if (len < 0) {
> +	if (!(errno == EAGAIN || errno == EWOULDBLOCK)) {
> +	    log_neg((int)len, "read");
> +	    cleanup_client(client);
> +	}
> +	return;
> +    } else if (len == 0) {
> +	VERBOSE_PRINT("%s: got EOF\n", client->vmid);
> +	cleanup_client(client);
> +	return;
> +    }
> +
> +    VERBOSE_PRINT("%s: read %ld bytes\n", client->vmid, len);
> +    client->buflen += len;
> +
> +    struct json_tokener *tok = json_tokener_new();
> +    struct json_object *jobj = NULL;
> +    enum json_tokener_error jerr = json_tokener_success;
> +    while (jerr == json_tokener_success && client->buflen != 0) {
> +	jobj = json_tokener_parse_ex(tok, client->buf, (int)client->buflen);
> +	jerr = json_tokener_get_error(tok);
> +	unsigned int offset = (unsigned int)tok->char_offset;
> +	switch (jerr) {
> +	    case json_tokener_success:
> +		// move rest from buffer to front
> +		memmove(client->buf, client->buf + offset, client->buflen - offset);
> +		client->buflen -= offset;
> +		if (json_object_is_type(jobj, json_type_object)) {
> +		    struct json_object *obj;
> +		    if (json_object_object_get_ex(jobj, "QMP", &obj)) {
> +			handle_qmp_handshake(client);
> +		    } else if (json_object_object_get_ex(jobj, "event", &obj)) {
> +			handle_qmp_event(client, jobj);
> +		    } // else ignore message
> +		}
> +		break;
> +	    case json_tokener_continue:
> +		if (client->buflen >= sizeof(client->buf)) {
> +		    VERBOSE_PRINT("%s, msg too large, discarding buffer\n",
> +				  client->vmid);
> +		    memset(client->buf, 0, sizeof(client->buf));
> +		    client->buflen = 0;
> +		} // else we have enough space try again after next read
> +		break;
> +	    default:
> +		VERBOSE_PRINT("%s: parse error: %d, discarding buffer\n",
> +			      client->vmid, jerr);
> +		memset(client->buf, 0, client->buflen);
> +		client->buflen = 0;
> +		break;
> +	}
> +	json_object_put(jobj);
> +    }
> +    json_tokener_free(tok);
> +}
> +
> +
> +int
> +main(int argc, char *argv[])
> +{
> +    int opt;
> +    int daemonize = 1;
> +    char *socket_path = NULL;
> +    progname = argv[0];
> +
> +    while ((opt = getopt(argc, argv, "hfv")) != -1) {
> +	switch (opt) {
> +	    case 'f':
> +		daemonize = 0;
> +		break;
> +	    case 'v':
> +		verbose = 1;
> +		break;
> +	    case 'h':

Something often overlooked:
    $ ls --wrong &>/dev/null ; echo $?
    2
    $ ls --help &>/dev/null ; echo $?
    0

When the user passes -h explicitly we should not exit with failure.

PS: strongly encouraged, but optional, is explicit handling of the long
version `--help` ;-)

PPS: And I don't mean like:
    $ grep -h
    Usage: grep [OPTION]... PATTERN [FILE]...
    Try 'grep --help' for more information.
    ^~~~~~~~~~~~~~~~~

> +	    default:
> +		usage();
> +		exit(EXIT_FAILURE);
> +	}
> +    }
> +
> +    if (optind >= argc) {
> +	usage();
> +	exit(EXIT_FAILURE);
> +    }
> +
> +    signal(SIGCHLD, reap_child);
> +
> +    socket_path = argv[optind];
> +
> +    int sock = socket(AF_UNIX, SOCK_STREAM, 0);
> +    bail_neg(sock, "socket");
> +
> +    struct sockaddr_un addr;
> +    memset(&addr, 0, sizeof(addr));
> +    addr.sun_family = AF_UNIX;
> +    strncpy(addr.sun_path, socket_path, sizeof(addr.sun_path) - 1);
> +
> +    unlink(socket_path);
> +    bail_neg(bind(sock, (struct sockaddr*)&addr, sizeof(addr)), "bind");
> +
> +    struct epoll_event ev, events[1];
> +    epoll_fd = epoll_create1(EPOLL_CLOEXEC);
> +    bail_neg(epoll_fd, "epoll_create1");
> +
> +    ev.events = EPOLLIN;
> +    ev.data.fd = sock;
> +    bail_neg(epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sock, &ev), "epoll_ctl");
> +
> +    bail_neg(listen(sock, 10), "listen");
> +
> +    if (daemonize) {
> +	bail_neg(daemon(0, 1), "daemon");
> +    }
> +
> +    int should_exit = 0;
> +    int nevents, n;

I'd move 'n' down...

> +
> +    while(!should_exit) {

I don't see should_exit written anywhere, so `for (;;)` ?

> +	nevents = epoll_wait(epoll_fd, events, 1, -1);
> +	if (nevents < 0 && errno == EINTR) {
> +	    // signal happened, try again
> +	    continue;
> +	}
> +	bail_neg(nevents, "epoll_wait");
> +
> +	for (n = 0; n < nevents; n++) {

... for (int n ...

> +	    if (events[n].data.fd == sock) {
> +
> +		int conn_sock = accept4(sock, NULL, NULL,
> +					SOCK_NONBLOCK | SOCK_CLOEXEC);
> +		log_neg(conn_sock, "accept");

On error this still falls through to add_new_client().

> +
> +		add_new_client(conn_sock);
> +	    } else {
> +		handle_client((struct Client *)events[n].data.ptr);
> +	    }
> +	}
> +    }
> +}
> diff --git a/qmeventd.h b/qmeventd.h
> new file mode 100644
> index 0000000..0c2ffc1
> --- /dev/null
> +++ b/qmeventd.h
> @@ -0,0 +1,55 @@
> +/*
> +
> +    Copyright (C) 2018 Proxmox Server Solutions GmbH
> +
> +    Copyright: qemumonitor is under GNU GPL, the GNU General Public License.
> +
> +    This program is free software; you can redistribute it and/or modify
> +    it under the terms of the GNU General Public License as published by
> +    the Free Software Foundation; version 2 dated June, 1991.
> +
> +    This program is distributed in the hope that it will be useful,
> +    but WITHOUT ANY WARRANTY; without even the implied warranty of
> +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +    GNU General Public License for more details.
> +
> +    You should have received a copy of the GNU General Public License
> +    along with this program; if not, write to the Free Software
> +    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
> +    02111-1307, USA.
> +
> +    Author: Dominik Csapak <d.csapak at proxmox.com>
> +*/
> +
> +#define VERBOSE_PRINT(...) do { if (verbose) { printf(__VA_ARGS__); } } while (0)
> +
> +static inline void log_neg(int errval, const char *msg)
> +{
> +    if (errval < 0) {
> +	perror(msg);
> +    }
> +}
> +
> +static inline void bail_neg(int errval, const char *msg)
> +{
> +    if (errval < 0) {
> +	perror(msg);
> +	exit(EXIT_FAILURE);
> +    }
> +}
> +
> +struct Client {
> +    char buf[4096];
> +    char vmid[16];
> +    int fd;
> +    pid_t pid;
> +    unsigned int buflen;
> +    unsigned short graceful;
> +    unsigned short guest;
> +};
> +
> +void handle_qmp_handshake(struct Client *client);
> +void handle_qmp_event(struct Client *client, struct json_object *obj);
> +void handle_client(struct Client *client);
> +void add_new_client(int client_fd);
> +void cleanup_client(struct Client *client);
> diff --git a/qmeventd.service b/qmeventd.service
> new file mode 100644
> index 0000000..a2462fc
> --- /dev/null
> +++ b/qmeventd.service
> @@ -0,0 +1,9 @@
> +[Unit]
> +Description=PVE Qemu Event Daemon

Should we explicitly order this Before=pve-guests.service? While it's not
strictly required to start guests, I think it would still make sense to
have the service available early.

> +
> +[Service]
> +ExecStart=/usr/sbin/qmeventd /var/run/qemu-server/event.socket
> +Type=forking
> +
> +[Install]
> +WantedBy=multi-user.target
> -- 
> 2.11.0




More information about the pve-devel mailing list