[pve-devel] [PATCH common 4/4] added: enter_systemd_scope

Wolfgang Bumiller w.bumiller at proxmox.com
Fri Jun 3 11:09:24 CEST 2016


This essentially performas the task of systemd-run while
also waiting for the job to finish.

With the systemd-run version in jessie we run into a race
condition where the executed process can start forking child
processes before the systemd daemon is done setting up the
scope's cgroups, causing the children to NOT be included in
the cgroups. This means the child processes (in our case
qemu) will not adhere to the limits we want to apply to it
via cgroups.

enter_systemd_scope() performs the setup task of systemd-run
and waits for the job to finish, after this we can spawn the
qemu process without systemd-run.
---
 debian/control   |  2 +-
 src/PVE/Tools.pm | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/debian/control b/debian/control
index c00ca5e..9374fd8 100644
--- a/debian/control
+++ b/debian/control
@@ -7,6 +7,6 @@ Standards-Version: 3.8.4
 
 Package: libpve-common-perl
 Architecture: all
-Depends: ${perl:Depends} ${misc:Depends}, libclone-perl, libdevel-cycle-perl, libwww-perl, libjson-perl, liblinux-inotify2-perl, libio-stringy-perl, liburi-perl, libstring-shellquote-perl, libnet-ip-perl, libfilesys-df-perl
+Depends: ${perl:Depends} ${misc:Depends}, libclone-perl, libdevel-cycle-perl, libwww-perl, libjson-perl, liblinux-inotify2-perl, libio-stringy-perl, liburi-perl, libstring-shellquote-perl, libnet-ip-perl, libfilesys-df-perl, libnet-dbus-perl
 Description: Proxmox VE base library
  This package contains the base library used by other Proxmox VE components.
diff --git a/src/PVE/Tools.pm b/src/PVE/Tools.pm
index 608f5f8..b49a68d 100644
--- a/src/PVE/Tools.pm
+++ b/src/PVE/Tools.pm
@@ -22,6 +22,9 @@ use Digest::SHA;
 use Text::ParseWords;
 use String::ShellQuote;
 use Time::HiRes qw(usleep gettimeofday tv_interval alarm);
+use Net::DBus qw(dbus_uint32 dbus_uint64);
+use Net::DBus::Callback;
+use Net::DBus::Reactor;
 
 # avoid warning when parsing long hex values with hex()
 no warnings 'portable'; # Support for 64-bit ints required
@@ -1345,4 +1348,75 @@ sub mkdirat($$$) {
     return syscall(258, $dirfd, $name, $mode) == 0;
 }
 
+# NOTE: This calls the dbus main loop and must not be used when another dbus
+# main loop is being used as we need to wait for the JobRemoved signal.
+# Polling the job status instead doesn't work because this doesn't give us the
+# distinction between success and failure.
+#
+# Note that the description is mandatory for security reasons.
+sub enter_systemd_scope {
+    my ($unit, $description, %extra) = @_;
+    die "missing description\n" if !defined($description);
+
+    my $timeout = delete $extra{timeout};
+
+    $unit .= '.scope';
+    my $properties = [ [PIDs => [dbus_uint32($$)]] ];
+
+    foreach my $key (keys %extra) {
+	if ($key eq 'Slice' || $key eq 'KillMode') {
+	    push @$properties, [$key, $extra{$key}];
+	} elsif ($key eq 'CPUShares') {
+	    push @$properties, [$key, dbus_uint64($extra{$key})];
+	} elsif ($key eq 'CPUQuota') {
+	    push @$properties, ['CPUQuotaPerSecUSec',
+	                        dbus_uint64($extra{$key} * 10000)];
+	} else {
+	    die "Don't know how to encode $key for systemd scope\n";
+	}
+    }
+
+    my $job;
+    my $done = 0;
+
+    my $bus = Net::DBus->system();
+    my $reactor = Net::DBus::Reactor->main();
+
+    my $service = $bus->get_service('org.freedesktop.systemd1');
+    my $if = $service->get_object('/org/freedesktop/systemd1', 'org.freedesktop.systemd1.Manager');
+    # Connect to the JobRemoved signal since we want to wait for it to finish
+    my $sigid;
+    my $timer;
+    my $cleanup = sub {
+	my ($no_shutdown) = @_;
+	$if->disconnect_from_signal('JobRemoved', $sigid) if defined($if);
+	$if = undef;
+	$sigid = undef;
+	$reactor->remove_timeout($timer) if defined($timer);
+	$timer = undef;
+	return if $no_shutdown;
+	$reactor->shutdown();
+    };
+
+    $sigid = $if->connect_to_signal('JobRemoved', sub {
+	my ($id, $removed_job, $signaled_unit, $result) = @_;
+	return if $signaled_unit ne $unit || $removed_job ne $job;
+	$cleanup->(0);
+	die "systemd job failed\n" if $result ne 'done';
+	$done = 1;
+    });
+
+    my $on_timeout = sub {
+	$cleanup->(0);
+	die "systemd job timed out\n";
+    };
+
+    $timer = $reactor->add_timeout($timeout * 1000, Net::DBus::Callback->new(method => $on_timeout))
+	if defined($timeout);
+    $job = $if->StartTransientUnit($unit, 'fail', $properties, []);
+    $reactor->run();
+    $cleanup->(1);
+    die "systemd job never completed\n" if !$done;
+}
+
 1;
-- 
2.1.4




More information about the pve-devel mailing list