[pve-devel] [PATCH v2 container 1/1] Add device passthrough

Filip Schauer f.schauer at proxmox.com
Thu Nov 2 15:28:22 CET 2023


On 30/10/2023 14:34, Wolfgang Bumiller wrote:
> On Tue, Oct 24, 2023 at 02:55:53PM +0200, Filip Schauer wrote:
>> Add a dev[n] argument to the container config to pass devices through to
>> a container. A device can be passed by its path. Alternatively a mapped
>> USB device can be passed through with usbmapping=<name>.
>>
>> Signed-off-by: Filip Schauer<f.schauer at proxmox.com>
>> ---
>>   src/PVE/LXC.pm        | 34 +++++++++++++++++++++++-
>>   src/PVE/LXC/Config.pm | 60 +++++++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 93 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
>> index c9b5ba7..a3ddb62 100644
>> --- a/src/PVE/LXC.pm
>> +++ b/src/PVE/LXC.pm
>> @@ -5,7 +5,8 @@ use warnings;
>>   
>>   use Cwd qw();
>>   use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST);
>> -use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY);
>> +use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
>> +use File::Basename;
>>   use File::Path;
>>   use File::Spec;
>>   use IO::Poll qw(POLLIN POLLHUP);
>> @@ -639,6 +640,37 @@ sub update_lxc_config {
>>   	$raw .= "lxc.mount.auto = sys:mixed\n";
>>       }
>>   
>> +    # Clear passthrough directory from previous run
>> +    my $passthrough_dir = "/var/lib/lxc/$vmid/passthrough";
>> +    File::Path::rmtree($passthrough_dir);
> I think we need to make a few changes here.
>
> First: we don't necessarily need this directory.
> Having a device list would certainly be nice, but it makes more sense to
> just have a file we can easily parse (possibly even just a json hash),
> like the `devices` file we already create in the pre-start hook, except
> prepared *for* the pre-start hook, which *should* be able to just
> `mknod` the devices right into the container's `/dev` on startup.


Devices mknoded into the container's /dev directory in the pre-start
hook will not be visible in the container once it is fully started.
Meanwhile mknoding a device to a different path inside the container
works fine. It seems that LXC mounts over the /dev directory. This can
be solved by calling mknod in lxc-pve-autodev-hook, but this does not
work with unprivileged containers without the mknod capability.

So are bind mounts our only option without modifying LXC,
or am I overlooking something?


> We'd also avoid "lingering" device nodes with potentially harmful
> uid/permissions in /var, which is certainly better from a security POV.
>
> But note that we do need the `lxc.cgroup2.*` entries before starting the
> container in order to ensure the devices cgroup has the right
> permissions.
>
>> +
>> +    PVE::LXC::Config->foreach_passthrough_device($conf, sub {
>> +	my ($key, $sanitized_path) = @_;
>> +
>> +	my $absolute_path = "/$sanitized_path";
>> +	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
>> +	die "Could not find major and minor ids of device $absolute_path.\n"
>> +	    unless ($mode && $rdev);
>> +
>> +	my $major = PVE::Tools::dev_t_major($rdev);
>> +	my $minor = PVE::Tools::dev_t_minor($rdev);
>> +	my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
>> +	my $passthrough_device_path = "$passthrough_dir/$sanitized_path";
>> +	File::Path::make_path(dirname($passthrough_device_path));
>> +	PVE::Tools::run_command([
>> +	    '/usr/bin/mknod',
>> +	    '-m', '0660',
>> +	    $passthrough_device_path,
>> +	    $device_type_char,
>> +	    $major,
>> +	    $minor
>> +	]);
> It's probably worth adding a helper for the mknod syscall to
> `PVE::Tools`, there are a bunch of syscalls in there already.
>
>> +	chown 100000, 100000, $passthrough_device_path if ($unprivileged);
> ^ This isn't necessarily the correct id. Users may have custom id
> mappings.
> `PVE::LXC::parse_id_maps($conf)` returns the mapping alongside the root
> uid and gid. (See for example `sub mount_all` for how it's used.
>
>> +
>> +	$raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n";
>> +	$raw .= "lxc.mount.entry = $passthrough_device_path $sanitized_path none bind,create=file\n";
>> +    });
>> +
>>       # WARNING: DO NOT REMOVE this without making sure that loop device nodes
>>       # cannot be exposed to the container with r/w access (cgroup perms).
>>       # When this is enabled mounts will still remain in the monitor's namespace
>> diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
>> index 56e1f10..edd813e 100644
>> --- a/src/PVE/LXC/Config.pm
>> +++ b/src/PVE/LXC/Config.pm
>> @@ -29,6 +29,7 @@ mkdir $lockdir;
>>   mkdir "/etc/pve/nodes/$nodename/lxc";
>>   my $MAX_MOUNT_POINTS = 256;
>>   my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
>> +my $MAX_DEVICES = 256;
>>   
>>   # BEGIN implemented abstract methods from PVE::AbstractConfig
>>   
>> @@ -908,6 +909,49 @@ for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
>>       }
>>   }
>>   
>> +PVE::JSONSchema::register_format('pve-lxc-dev-string', \&verify_lxc_dev_string);
>> +sub verify_lxc_dev_string {
>> +    my ($dev, $noerr) = @_;
>> +
>> +    if (
>> +	$dev =~m@/\.\.?/@  ||
>> +	$dev =~m@/\.\.?$@  ||
>> +	$dev !~ m!^/dev/!
>> +    ) {
>> +	return undef if $noerr;
>> +	die "$dev is not a valid device path\n";
>> +    }
>> +
>> +    return $dev;
>> +}
>> +
>> +my $dev_desc = {
>> +    path => {
>> +	optional => 1,
>> +	type => 'string',
>> +	default_key => 1,
>> +	format => 'pve-lxc-dev-string',
>> +	format_description => 'Path',
>> +	description => 'Device to pass through to the container',
>> +	verbose_description => 'Path to the device to pass through to the container'
>> +    },
>> +    usbmapping => {
>> +	optional => 1,
>> +	type => 'string',
>> +	format => 'pve-configid',
>> +	format_description => 'mapping-id',
>> +	description => 'The ID of a cluster wide USB mapping.'
>> +    }
>> +};
>> +
>> +for (my $i = 0; $i < $MAX_DEVICES; $i++) {
>> +    $confdesc->{"dev$i"} = {
>> +	optional => 1,
>> +	type => 'string', format => $dev_desc,
>> +	description => "Device to pass through to the container",
>> +    }
>> +}
>> +
>>   sub parse_pct_config {
>>       my ($filename, $raw, $strict) = @_;
>>   
>> @@ -1255,6 +1299,22 @@ sub parse_volume {
>>       return;
>>   }
>>   
>> +sub parse_device {
>> +    my ($class, $device_string, $noerr) = @_;
>> +
>> +    my $res;
>> +    eval { $res = PVE::JSONSchema::parse_property_string($dev_desc, $device_string) };
>> +    if ($@) {
>> +	return undef if $noerr;
>> +	die $@;
>> +    }
>> +
>> +    die "Either path or usbmapping has to be defined"
>> +	unless (defined($res->{path}) || defined($res->{usbmapping}));
>> +
>> +    return $res;
>> +}
>> +
>>   sub print_volume {
>>       my ($class, $key, $volume) = @_;
>>   
>> -- 
>> 2.39.2
>>
>>
>>
>> _______________________________________________
>> pve-devel mailing list
>> pve-devel at lists.proxmox.com
>> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
>>
>>





More information about the pve-devel mailing list