[pve-devel] [PATCH v3 qemu-server 2/3] fix #3075: add TPM v1.2 and v2.0 support via swtpm

Stefan Reiter s.reiter at proxmox.com
Mon Oct 4 17:29:20 CEST 2021


Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.

Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.

It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.

Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.

For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).

The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).

Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
 PVE/API2/Qemu.pm         |   5 ++
 PVE/QemuMigrate.pm       |  14 +++-
 PVE/QemuServer.pm        | 137 +++++++++++++++++++++++++++++++++++++--
 PVE/QemuServer/Drive.pm  |  63 ++++++++++++++----
 PVE/VZDump/QemuServer.pm |  43 ++++++++++--
 5 files changed, 238 insertions(+), 24 deletions(-)

diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index a8fbd9d..6228125 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -184,6 +184,11 @@ my $create_disks = sub {
 	    my $volid;
 	    if ($ds eq 'efidisk0') {
 		($volid, $size) = PVE::QemuServer::create_efidisk($storecfg, $storeid, $vmid, $fmt, $arch);
+	    } elsif ($ds eq 'tpmstate0') {
+		# swtpm can only use raw volumes, and uses a fixed size
+		$size = PVE::Tools::convert_size(PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE, 'b' => 'kb');
+		$volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid,
+		    "raw", undef, $size);
 	    } else {
 		$volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $size);
 	    }
diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm
index 4f5bfa4..ae3eaf1 100644
--- a/PVE/QemuMigrate.pm
+++ b/PVE/QemuMigrate.pm
@@ -488,6 +488,7 @@ sub scan_local_volumes {
 
 	    $local_volumes->{$volid}->{ref} = $attr->{referenced_in_config} ? 'config' : 'snapshot';
 	    $local_volumes->{$volid}->{ref} = 'storage' if $attr->{is_unused};
+	    $local_volumes->{$volid}->{ref} = 'generated' if $attr->{is_tpmstate};
 
 	    $local_volumes->{$volid}->{is_vmstate} = $attr->{is_vmstate} ? 1 : 0;
 
@@ -587,6 +588,9 @@ sub scan_local_volumes {
 		$local_volumes->{$volid}->{migration_mode} = 'online';
 	    } elsif ($self->{running} && $ref eq 'generated') {
 		# offline migrate the cloud-init ISO and don't regenerate on VM start
+		#
+		# tpmstate will also be offline migrated first, and in case of
+		# live migration then updated by QEMU/swtpm if necessary
 		$local_volumes->{$volid}->{migration_mode} = 'offline';
 	    } else {
 		$local_volumes->{$volid}->{migration_mode} = 'offline';
@@ -648,7 +652,9 @@ sub config_update_local_disksizes {
 
     PVE::QemuConfig->foreach_volume($conf, sub {
 	my ($key, $drive) = @_;
-	return if $key eq 'efidisk0'; # skip efidisk, will be handled later
+	# skip special disks, will be handled later
+	return if $key eq 'efidisk0';
+	return if $key eq 'tpmstate0';
 
 	my $volid = $drive->{file};
 	return if !defined($local_volumes->{$volid}); # only update sizes for local volumes
@@ -665,6 +671,12 @@ sub config_update_local_disksizes {
     if (defined($conf->{efidisk0})) {
 	PVE::QemuServer::update_efidisk_size($conf);
     }
+
+    # TPM state might have an irregular filesize, to avoid problems on transfer
+    # we always assume the static size of 4M to allocate on the target
+    if (defined($conf->{tpmstate0})) {
+	PVE::QemuServer::update_tpmstate_size($conf);
+    }
 }
 
 sub filter_local_volumes {
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index e8047e8..0ca5e00 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -1143,7 +1143,8 @@ PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
 sub verify_bootdev {
     my ($dev, $noerr) = @_;
 
-    return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && $dev !~ m/^efidisk/;
+    my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
+    return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
 
     my $check = sub {
 	my ($base) = @_;
@@ -2966,6 +2967,90 @@ sub audio_devs {
     return $devs;
 }
 
+sub get_tpm_paths {
+    my ($vmid) = @_;
+    return {
+	socket => "/var/run/qemu-server/$vmid.swtpm",
+	pid => "/var/run/qemu-server/$vmid.swtpm.pid",
+    };
+}
+
+sub add_tpm_device {
+    my ($vmid, $devices, $conf) = @_;
+
+    return if !$conf->{tpmstate0};
+
+    my $paths = get_tpm_paths($vmid);
+
+    push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
+    push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
+    push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
+}
+
+sub start_swtpm {
+    my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
+
+    return if !$tpmdrive;
+
+    my $state;
+    my $tpm = parse_drive("tpmstate0", $tpmdrive);
+    my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
+    if ($storeid) {
+	$state = PVE::Storage::map_volume($storecfg, $tpm->{file});
+    } else {
+	$state = $tpm->{file};
+    }
+
+    my $paths = get_tpm_paths($vmid);
+
+    # during migration, we will get state from remote
+    #
+    if (!$migration) {
+	# run swtpm_setup to create a new TPM state if it doesn't exist yet
+	my $setup_cmd = [
+	    "swtpm_setup",
+	    "--tpmstate",
+	    "file://$state",
+	    "--createek",
+	    "--create-ek-cert",
+	    "--create-platform-cert",
+	    "--lock-nvram",
+	    "--config",
+	    "/etc/swtpm_setup.conf", # do not use XDG configs
+	    "--runas",
+	    "0", # force creation as root, error if not possible
+	    "--not-overwrite", # ignore existing state, do not modify
+	];
+
+	push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
+	# TPM 2.0 supports ECC crypto, use if possible
+	push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
+
+	run_command($setup_cmd, outfunc => sub {
+	    print "swtpm_setup: $1\n";
+	});
+    }
+
+    my $emulator_cmd = [
+	"swtpm",
+	"socket",
+	"--tpmstate",
+	"backend-uri=file://$state,mode=0600",
+	"--ctrl",
+	"type=unixio,path=$paths->{socket},mode=0600",
+	"--pid",
+	"file=$paths->{pid}",
+	"--terminate", # terminate on QEMU disconnect
+	"--daemon",
+    ];
+    push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
+    run_command($emulator_cmd, outfunc => sub { print $1; });
+
+    # return untainted PID of swtpm daemon so it can be killed on error
+    file_read_firstline($paths->{pid}) =~ m/(\d+)/;
+    return $1;
+}
+
 sub vga_conf_has_spice {
     my ($vga) = @_;
 
@@ -3467,6 +3552,8 @@ sub config_to_command {
 	push @$devices, @$audio_devs;
     }
 
+    add_tpm_device($vmid, $devices, $conf);
+
     my $sockets = 1;
     $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
     $sockets = $conf->{sockets} if  $conf->{sockets};
@@ -3663,6 +3750,8 @@ sub config_to_command {
 
 	# ignore efidisk here, already added in bios/fw handling code above
 	return if $drive->{interface} eq 'efidisk';
+	# similar for TPM
+	return if $drive->{interface} eq 'tpmstate';
 
 	$use_virtio = 1 if $ds =~ m/^virtio/;
 
@@ -4524,6 +4613,9 @@ sub foreach_volid {
 	$volhash->{$volid}->{is_vmstate} //= 0;
 	$volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
 
+	$volhash->{$volid}->{is_tpmstate} //= 0;
+	$volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
+
 	$volhash->{$volid}->{is_unused} //= 0;
 	$volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
 
@@ -4721,7 +4813,7 @@ sub vmconfig_hotplug_pending {
 		vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
 				    $vmid, $opt, $value, $arch, $machine_type);
 	    } elsif (is_valid_drivename($opt)) {
-		die "skip\n" if $opt eq 'efidisk0';
+		die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
 		# some changes can be done without hotplug
 		my $drive = parse_drive($opt, $value);
 		if (drive_is_cloudinit($drive)) {
@@ -5341,8 +5433,17 @@ sub vm_start_nolock {
 	PVE::Tools::run_fork sub {
 	    PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %properties);
 
+	    my $tpmpid;
+	    if (my $tpm = $conf->{tpmstate0}) {
+		# start the TPM emulator so QEMU can connect on start
+		$tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
+	    }
+
 	    my $exitcode = run_command($cmd, %run_params);
-	    die "QEMU exited with code $exitcode\n" if $exitcode;
+	    if ($exitcode) {
+		kill 'TERM', $tpmpid if $tpmpid;
+		die "QEMU exited with code $exitcode\n";
+	    }
 	};
     };
 
@@ -5542,6 +5643,14 @@ sub vm_stop_cleanup {
 	if (!$keepActive) {
 	    my $vollist = get_vm_volumes($conf);
 	    PVE::Storage::deactivate_volumes($storecfg, $vollist);
+
+	    if (my $tpmdrive = $conf->{tpmstate0}) {
+		my $tpm = parse_drive("tpmstate0", $tpmdrive);
+		my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
+		if ($storeid) {
+		    PVE::Storage::unmap_volume($storecfg, $tpm->{file});
+		}
+	    }
 	}
 
 	foreach my $ext (qw(mon qmp pid vnc qga)) {
@@ -6079,7 +6188,7 @@ sub restore_update_config_line {
 	$net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
 	$netstr = print_net($net);
 	$res .= "$id: $netstr\n";
-    } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk)\d+):\s*(\S+)\s*$/) {
+    } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
 	my $virtdev = $1;
 	my $value = $3;
 	my $di = parse_drive($virtdev, $value);
@@ -6397,8 +6506,8 @@ sub restore_proxmox_backup_archive {
 	    my $volid = $d->{volid};
 	    my $path = PVE::Storage::path($storecfg, $volid);
 
-	    # for live-restore we only want to preload the efidisk
-	    next if $options->{live} && $virtdev ne 'efidisk0';
+	    # for live-restore we only want to preload the efidisk and TPM state
+	    next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
 
 	    my $pbs_restore_cmd = [
 		'/usr/bin/pbs-restore',
@@ -6473,7 +6582,9 @@ sub restore_proxmox_backup_archive {
 	my $conf = PVE::QemuConfig->load_config($vmid);
 	die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
 
-	delete $devinfo->{'drive-efidisk0'}; # this special drive is already restored before start
+	# these special drives are already restored before start
+	delete $devinfo->{'drive-efidisk0'};
+	delete $devinfo->{'drive-tpmstate0-backup'};
 	pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
 
 	PVE::QemuConfig->remove_lock($vmid, "create");
@@ -7307,6 +7418,8 @@ sub clone_disk {
 	    $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
 	} elsif ($drivename eq 'efidisk0') {
 	    $size = get_efivars_size($conf);
+	} elsif ($drivename eq 'tpmstate0') {
+	    $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
 	} else {
 	    ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
 	}
@@ -7347,6 +7460,8 @@ sub clone_disk {
 	    }
 	} else {
 
+	    die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0';
+
 	    my $kvmver = get_running_qemu_version ($vmid);
 	    if (!min_version($kvmver, 2, 7)) {
 		die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
@@ -7417,6 +7532,14 @@ sub update_efidisk_size {
     return;
 }
 
+sub update_tpmstate_size {
+    my ($conf) = @_;
+
+    my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
+    $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
+    $conf->{tpmstate0} = print_drive($disk);
+}
+
 sub create_efidisk($$$$$) {
     my ($storecfg, $storeid, $vmid, $fmt, $arch) = @_;
 
diff --git a/PVE/QemuServer/Drive.pm b/PVE/QemuServer/Drive.pm
index 5110190..32c7377 100644
--- a/PVE/QemuServer/Drive.pm
+++ b/PVE/QemuServer/Drive.pm
@@ -306,16 +306,6 @@ my $virtiodesc = {
 };
 PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc);
 
-my $alldrive_fmt = {
-    %drivedesc_base,
-    %iothread_fmt,
-    %model_fmt,
-    %queues_fmt,
-    %scsiblock_fmt,
-    %ssd_fmt,
-    %wwn_fmt,
-};
-
 my $efidisk_fmt = {
     volume => { alias => 'file' },
     file => {
@@ -345,6 +335,55 @@ my $efidisk_desc = {
 
 PVE::JSONSchema::register_standard_option("pve-qm-efidisk", $efidisk_desc);
 
+my %tpmversion_fmt = (
+    version => {
+	type => 'string',
+	enum => [qw(v1.2 v2.0)],
+	description => "The TPM interface version. v2.0 is newer and should be "
+		     . "preferred. Note that this cannot be changed later on.",
+	optional => 1,
+	default => 'v2.0',
+    },
+);
+my $tpmstate_fmt = {
+    volume => { alias => 'file' },
+    file => {
+	type => 'string',
+	format => 'pve-volume-id-or-qm-path',
+	default_key => 1,
+	format_description => 'volume',
+	description => "The drive's backing volume.",
+    },
+    size => {
+	type => 'string',
+	format => 'disk-size',
+	format_description => 'DiskSize',
+	description => "Disk size. This is purely informational and has no effect.",
+	optional => 1,
+    },
+    %tpmversion_fmt,
+};
+my $tpmstate_desc = {
+    optional => 1,
+    type => 'string', format => $tpmstate_fmt,
+    description => "Configure a Disk for storing TPM state. " .
+	$ALLOCATION_SYNTAX_DESC . " Note that SIZE_IN_GiB is ignored here " .
+	"and that the default size of 4 MiB will always be used instead. The " .
+	"format is also fixed to 'raw'.",
+};
+use constant TPMSTATE_DISK_SIZE => 4 * 1024 * 1024;
+
+my $alldrive_fmt = {
+    %drivedesc_base,
+    %iothread_fmt,
+    %model_fmt,
+    %queues_fmt,
+    %scsiblock_fmt,
+    %ssd_fmt,
+    %wwn_fmt,
+    %tpmversion_fmt,
+};
+
 my $unused_fmt = {
     volume => { alias => 'file' },
     file => {
@@ -379,6 +418,7 @@ for (my $i = 0; $i < $MAX_VIRTIO_DISKS; $i++)  {
 }
 
 $drivedesc_hash->{efidisk0} = $efidisk_desc;
+$drivedesc_hash->{tpmstate0} = $tpmstate_desc;
 
 for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
     $drivedesc_hash->{"unused$i"} = $unuseddesc;
@@ -390,7 +430,8 @@ sub valid_drive_names {
             (map { "scsi$_" } (0 .. ($MAX_SCSI_DISKS - 1))),
             (map { "virtio$_" } (0 .. ($MAX_VIRTIO_DISKS - 1))),
             (map { "sata$_" } (0 .. ($MAX_SATA_DISKS - 1))),
-            'efidisk0');
+            'efidisk0',
+            'tpmstate0');
 }
 
 sub is_valid_drivename {
diff --git a/PVE/VZDump/QemuServer.pm b/PVE/VZDump/QemuServer.pm
index 44b705f..b133694 100644
--- a/PVE/VZDump/QemuServer.pm
+++ b/PVE/VZDump/QemuServer.pm
@@ -86,11 +86,10 @@ sub prepare {
 	if (!$volume->{included}) {
 	    $self->loginfo("exclude disk '$name' '$volid' ($volume->{reason})");
 	    next;
-	} elsif ($self->{vm_was_running} && $volume_config->{iothread}) {
-	    if (!PVE::QemuServer::Machine::runs_at_least_qemu_version($vmid, 4, 0, 1)) {
-		die "disk '$name' '$volid' (iothread=on) can't use backup feature with running QEMU " .
-		    "version < 4.0.1! Either set backup=no for this drive or upgrade QEMU and restart VM\n";
-	    }
+	} elsif ($self->{vm_was_running} && $volume_config->{iothread} &&
+		 !PVE::QemuServer::Machine::runs_at_least_qemu_version($vmid, 4, 0, 1)) {
+	    die "disk '$name' '$volid' (iothread=on) can't use backup feature with running QEMU " .
+		"version < 4.0.1! Either set backup=no for this drive or upgrade QEMU and restart VM\n";
 	} else {
 	    my $log = "include disk '$name' '$volid'";
 	    if (defined(my $size = $volume_config->{size})) {
@@ -131,6 +130,12 @@ sub prepare {
 	    qmdevice => "drive-$ds",
 	};
 
+	if ($ds eq 'tpmstate0') {
+	    # TPM drive only exists for backup, which is reflected in the name
+	    $diskinfo->{qmdevice} = 'drive-tpmstate0-backup';
+	    $task->{tpmpath} = $path;
+	}
+
 	if (-b $path) {
 	    $diskinfo->{type} = 'block';
 	} else {
@@ -425,6 +430,28 @@ my $query_backup_status_loop = sub {
     };
 };
 
+my $attach_tpmstate_drive = sub {
+    my ($self, $task, $vmid) = @_;
+
+    return if !$task->{tpmpath};
+
+    # unconditionally try to remove the tpmstate-named drive - it only exists
+    # for backing up, and avoids errors if left over from some previous event
+    eval { PVE::QemuServer::qemu_drivedel($vmid, "tpmstate0-backup"); };
+
+    $self->loginfo('attaching TPM drive to QEMU for backup');
+
+    my $drive = "file=$task->{tpmpath},if=none,read-only=on,id=drive-tpmstate0-backup";
+    my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
+    die "attaching TPM drive failed\n" if $ret !~ m/OK/s;
+};
+
+my $detach_tpmstate_drive = sub {
+    my ($task, $vmid) = @_;
+    return if !$task->{tpmpath} || !PVE::QemuServer::check_running($vmid);
+    eval { PVE::QemuServer::qemu_drivedel($vmid, "tpmstate0-backup"); };
+};
+
 sub archive_pbs {
     my ($self, $task, $vmid) = @_;
 
@@ -501,6 +528,8 @@ sub archive_pbs {
 	    $master_keyfile = undef; # skip rest of master key handling below
 	}
 
+	$attach_tpmstate_drive->($self, $task, $vmid);
+
 	my $fs_frozen = $self->qga_fs_freeze($task, $vmid);
 
 	my $params = {
@@ -673,6 +702,8 @@ sub archive_vma {
 	    die "interrupted by signal\n";
 	};
 
+	$attach_tpmstate_drive->($self, $task, $vmid);
+
 	my $outfh;
 	if ($opts->{stdout}) {
 	    $outfh = $opts->{stdout};
@@ -876,6 +907,8 @@ sub snapshot {
 sub cleanup {
     my ($self, $task, $vmid) = @_;
 
+    $detach_tpmstate_drive->($task, $vmid);
+
     if ($self->{qmeventd_fh}) {
 	close($self->{qmeventd_fh});
     }
-- 
2.30.2






More information about the pve-devel mailing list