[pve-devel] [PATCH container] added quota flag to mountpoints

Wolfgang Bumiller w.bumiller at proxmox.com
Tue Feb 9 11:03:03 CET 2016


quotactl(2) requires a path to the device node to work which
means we need to expose them to the container, luckily it
doesn't need r/w access to the device. Also, loop devices
will not detach from the images anymore with them being
still mounted in the monitor's mount namespace (which is
unshared from the host to prevent accidental unmounts via
lxc.monitor.unshare).

Note that quota manipulation currently does not work with
unprivileged containers.
---
And we're back to setting up loop devices... But at least this time we
enforce its 'autoclear' flag, also we don't need to give r/w access
via the devices cgroup to it for the desired effect (quota control).

In order to not have to setup and then use loop devices at two separate
stages we let the prestart hook write a file with the _actually_ used
device list to the container's /.pve-devices which is then used by the
autodev hook to create the device nodes. This obviously doesn't work
with a r/o root filesystem, so if we ever want to support such a setup
we might have to mount a temporary tempfs to pass on information between
hooks. Btw. we don't bother with enabling quota support for read-only
mountpoints (you can't exceed the quota when you can't write, can you?).

Note that ever since lxc.monitor.unshare was added we also silently
fixed the loop-device problem another way (as noted in the commit
message). Also note that a privileged container can still mknod
/dev/loop-control but the default cgroup.devices permissions won't
allow it to be *used*. Unprivileged containers have no way of getting
a /dev/loop-control device node.

Naturally only known ext filesystems will be using the quota flags
as we need to pass mount parameters and neither bind mounts nor ZFS
mounts support quotas via quotactl(2). (Do we want to somehow support
ZFS quotas?)

 src/Makefile              |  3 +-
 src/PVE/LXC.pm            | 70 +++++++++++++++++++++++++++++++++++++----------
 src/lxc-pve-autodev-hook  | 44 +++++++++++++++++++++++++++++
 src/lxc-pve-prestart-hook | 24 +++++++++++++++-
 src/lxc-pve.conf          |  1 +
 5 files changed, 125 insertions(+), 17 deletions(-)
 create mode 100755 src/lxc-pve-autodev-hook

diff --git a/src/Makefile b/src/Makefile
index c9ca2ac..4e3b0b6 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -40,7 +40,7 @@ pct.conf.5.pod: gen-pct-conf-pod.pl PVE/LXC.pm
 	mv $@.tmp $@
 
 .PHONY: install
-install: pct lxc-pve.conf lxc-pve-prestart-hook lxc-pve-poststop-hook lxcnetaddbr pct.1.pod pct.1.gz pct.conf.5.pod pct.conf.5.gz pve-update-lxc-config pct.bash-completion
+install: pct lxc-pve.conf lxc-pve-prestart-hook lxc-pve-autodev-hook lxc-pve-poststop-hook lxcnetaddbr pct.1.pod pct.1.gz pct.conf.5.pod pct.conf.5.gz pve-update-lxc-config pct.bash-completion
 	perl -I. -T -e "use PVE::CLI::pct; PVE::CLI::pct->verify_api();"
 	install -d ${SBINDIR}
 	install -m 0755 pct ${SBINDIR}
@@ -49,6 +49,7 @@ install: pct lxc-pve.conf lxc-pve-prestart-hook lxc-pve-poststop-hook lxcnetaddb
 	install -m 0755 lxcnetaddbr ${LXC_SCRIPT_DIR}
 	install -d ${LXC_HOOK_DIR}
 	install -m 0755 lxc-pve-prestart-hook ${LXC_HOOK_DIR}
+	install -m 0755 lxc-pve-autodev-hook ${LXC_HOOK_DIR}
 	install -m 0755 lxc-pve-poststop-hook ${LXC_HOOK_DIR}
 	install -d ${LXC_COMMON_CONFIG_DIR}
 	install -m 0644 lxc-pve.conf ${LXC_COMMON_CONFIG_DIR}/01-pve.conf
diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
index 6a3489a..611906b 100644
--- a/src/PVE/LXC.pm
+++ b/src/PVE/LXC.pm
@@ -66,6 +66,12 @@ my $rootfs_desc = {
 	description => 'Read-only mountpoint (not supported with bind mounts)',
 	optional => 1,
     },
+    quota => {
+	type => 'boolean',
+	format_description => '[0|1]',
+	description => 'Enable user quotas inside the container (not supported with zfs subvolumes)',
+	optional => 1,
+    },
 };
 
 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
@@ -1115,6 +1121,11 @@ sub update_lxc_config {
 	die "implement me (ostype $ostype)";
     }
 
+    # WARNING: DO NOT REMOVE this without making sure that loop device nodes
+    # cannot be exposed to the container with r/w access (cgroup perms).
+    # When this is enabled mounts will still remain in the monitor's namespace
+    # after the container unmounted them and thus will not detach from their
+    # files while the container is running!
     $raw .= "lxc.monitor.unshare = 1\n";
 
     # Should we read them from /etc/subuid?
@@ -2187,6 +2198,24 @@ sub query_loopdev {
     return $found;
 }
 
+sub with_loopdev {
+    my ($func, $file) = @_;
+    my $found;
+    my $parser = sub {
+	my $line = shift;
+	if ($line =~ m@^(/dev/loop\d+)$@) {
+	    $found = $1;
+	}
+    };
+    PVE::Tools::run_command(['losetup', '--show', '-f', $file], outfunc => $parser);
+    die "failed to setup loop device for $file\n" if !$found;
+    eval { &$func($found); };
+    my $err = $@;
+    PVE::Tools::run_command(['losetup', '-d', $found]);
+    die $err if $err;
+    return $found;
+}
+
 # use $rootdir = undef to just return the corresponding mount path
 sub mountpoint_mount {
     my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
@@ -2194,6 +2223,8 @@ sub mountpoint_mount {
     my $volid = $mountpoint->{volume};
     my $mount = $mountpoint->{mp};
     my $type = $mountpoint->{type};
+    my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota};
+    my $mounted_dev;
     
     return if !$volid || !$mount;
 
@@ -2247,36 +2278,44 @@ sub mountpoint_mount {
 			die "read-only bind mounts not supported\n";
 		    }
 		    PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $path, $mount_path]);
+		    warn "cannot enable quota control for bind mounted subvolumes\n" if $quota;
 		}
 	    }
-	    return wantarray ? ($path, 0) : $path;
+	    return wantarray ? ($path, 0, $mounted_dev) : $path;
 	} elsif ($format eq 'raw' || $format eq 'iso') {
+	    my $domount = sub {
+		my ($path) = @_;
+		if ($mount_path) {
+		    if ($format eq 'iso') {
+			PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
+		    } elsif ($isBase || defined($snapname)) {
+			PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
+		    } else {
+			if ($quota) {
+			    push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0';
+			}
+			PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
+		    }
+		}
+	    };
 	    my $use_loopdev = 0;
 	    if ($scfg->{path}) {
-		push @extra_opts, '-o', 'loop';
+		$mounted_dev = with_loopdev($domount, $path);
 		$use_loopdev = 1;
 	    } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
 		     $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
-		# do nothing
+		$mounted_dev = $path;
+		&$domount($path);
 	    } else {
 		die "unsupported storage type '$scfg->{type}'\n";
 	    }
-	    if ($mount_path) {
-		if ($format eq 'iso') {
-		    PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
-		} elsif ($isBase || defined($snapname)) {
-		    PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
-		} else {
-		    PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
-		}
-	    }
-	    return wantarray ? ($path, $use_loopdev) : $path;
+	    return wantarray ? ($path, $use_loopdev, $mounted_dev) : $path;
 	} else {
 	    die "unsupported image format '$format'\n";
 	}
     } elsif ($type eq 'device') {
 	PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
-	return wantarray ? ($volid, 0) : $volid;
+	return wantarray ? ($volid, 0, $volid) : $volid;
     } elsif ($type eq 'bind') {
 	if ($mountpoint->{ro}) {
 	    die "read-only bind mounts not supported\n";
@@ -2287,7 +2326,8 @@ sub mountpoint_mount {
 	die "directory '$volid' does not exist\n" if ! -d $volid;
 	&$check_mount_path($volid);
 	PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $volid, $mount_path]) if $mount_path;
-	return wantarray ? ($volid, 0) : $volid;
+	warn "cannot enable quota control for bind mounts\n" if $quota;
+	return wantarray ? ($volid, 0, undef) : $volid;
     }
     
     die "unsupported storage";
diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook
new file mode 100755
index 0000000..59f8f4c
--- /dev/null
+++ b/src/lxc-pve-autodev-hook
@@ -0,0 +1,44 @@
+#!/usr/bin/perl
+
+package lxc_pve_autodev_hook;
+
+use strict;
+use warnings;
+
+exit 0 if $ENV{LXC_NAME} && $ENV{LXC_NAME} !~ /^\d+$/;
+
+use PVE::Tools;
+
+my $root = $ENV{LXC_ROOTFS_MOUNT};
+exit 0 if !$root;
+
+open my $fd, '<', "$root/.pve-devices";
+if (!$fd) {
+    exit 0 if $!{ENOENT};
+    die "failed to open device list: $!\n";
+}
+
+unlink $fd;
+
+while (defined(my $line = <$fd>)) {
+    if ($line !~ m@^(b):(\d+):(\d+):/dev/(\S+)\s*$@) {
+	warn "invalid .pve-devices entry: $line\n";
+    }
+    my ($type, $major, $minor, $dev) = ($1, $2, $3, $4);
+
+    # Don't break out of $root/dev/
+    if ($dev =~ /\.\./) {
+	warn "skipping illegal device node entry: $dev\n";
+	next;
+    }
+
+    # Never expose /dev/loop-control
+    if ($major == 10 && $minor == 237) {
+	warn "skipping illegal device entry (loop-control) for: $dev\n";
+	next;
+    }
+
+    PVE::Tools::run_command(['mknod', '-m', '666', "$root/dev/$dev",
+                             $type, $major, $minor]);
+}
+close $fd;
diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook
index 4ec549a..aef8059 100755
--- a/src/lxc-pve-prestart-hook
+++ b/src/lxc-pve-prestart-hook
@@ -74,17 +74,39 @@ __PACKAGE__->register_method ({
 
 	my $rootdir = $param->{rootfs};
 
+	my $devlist_file = "$rootdir/.pve-devices";
+	my $devices = [];
+
 	my $setup_mountpoint = sub {
 	    my ($ms, $mountpoint) = @_;
 
 	    #return if $ms eq 'rootfs';
-	    PVE::LXC::mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
+	    my (undef, undef, $dev) = PVE::LXC::mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
+	    push @$devices, $dev if $dev && $mountpoint->{quota};
 	};
 
 	PVE::LXC::foreach_mountpoint($conf, $setup_mountpoint);
 
 	my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
 	$lxc_setup->pre_start_hook();
+
+	unlink $devlist_file;
+	if (@$devices) {
+	    # O_EXCL implies O_NOFOLLOW
+	    sysopen my $devlist, $devlist_file, O_WRONLY | O_CREAT | O_EXCL;
+	    if (!$devlist) {
+		warn "failed to create .pve-devices";
+	    } else {
+		foreach my $dev (@$devices) {
+		    my ($mode, $rdev) = (stat($dev))[2,6];
+		    next if !$mode || !S_ISBLK($mode) || !$rdev;
+		    my $major = int($rdev / 0x100);
+		    my $minor = $rdev % 0x100;
+		    print {$devlist} "b:$major:$minor:$dev\n";
+		}
+		close $devlist;
+	    }
+	}
 	return undef;
     }});
 
diff --git a/src/lxc-pve.conf b/src/lxc-pve.conf
index 07a1818..3635b44 100644
--- a/src/lxc-pve.conf
+++ b/src/lxc-pve.conf
@@ -1,3 +1,4 @@
 lxc.hook.pre-start = /usr/share/lxc/hooks/lxc-pve-prestart-hook
+lxc.hook.autodev = /usr/share/lxc/hooks/lxc-pve-autodev-hook
 lxc.hook.post-stop = /usr/share/lxc/hooks/lxc-pve-poststop-hook
 x
-- 
2.1.4





More information about the pve-devel mailing list