[pve-devel] [RFC PATCH qemu-server 5/7] memory: explicit dimm configuration

Wolfgang Bumiller w.bumiller at proxmox.com
Tue Dec 20 12:49:24 CET 2016


---
 PVE/QemuServer.pm        |  14 +++
 PVE/QemuServer/Memory.pm | 237 ++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 229 insertions(+), 22 deletions(-)

diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 1ca6b79..3724249 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -232,6 +232,14 @@ my $confdesc = {
 	minimum => 16,
 	default => 512,
     },
+    dimms => {
+	optional => 1,
+	type => 'string',
+	pattern => '(?:\d+@\d+(?:x\d+)?(?:[ ;]\d+@\d+(?:x\d+)?)*)|none',
+	description => 'When memory hotplug is available and dimms are explicitly specified,'
+	 .' the "memory" option becomes the static memory and the defined DIMMs are used for hotplugging.'
+	 .' This can be "none" to enable later dimm hotplugging with no initial ones added at startup.',
+    },
     balloon => {
         optional => 1,
         type => 'integer',
@@ -4104,6 +4112,9 @@ sub vmconfig_hotplug_pending {
 	    } elsif ($opt =~ m/^memory$/) {
 		die "skip\n" if !$hotplug_features->{memory};
 		PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
+	    } elsif ($opt =~ m/^dimms$/) {
+		die "skip\n" if !$hotplug_features->{memory};
+		PVE::QemuServer::Memory::qemu_dimm_hotplug($vmid, $conf, $defaults, $opt);
 	    } elsif ($opt eq 'cpuunits') {
 		cgroups_write("cpu", $vmid, "cpu.shares", $defaults->{cpuunits});
 	    } elsif ($opt eq 'cpulimit') {
@@ -4169,6 +4180,9 @@ sub vmconfig_hotplug_pending {
 	    } elsif ($opt =~ m/^memory$/) { #dimms
 		die "skip\n" if !$hotplug_features->{memory};
 		$value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
+	    } elsif ($opt =~ m/^dimms$/) {
+		die "skip\n" if !$hotplug_features->{memory};
+		$value = PVE::QemuServer::Memory::qemu_dimm_hotplug($vmid, $conf, $defaults, $opt, $value);
 	    } elsif ($opt eq 'cpuunits') {
 		cgroups_write("cpu", $vmid, "cpu.shares", $conf->{pending}->{$opt});
 	    } elsif ($opt eq 'cpulimit') {
diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
index 740e3ae..002eadd 100644
--- a/PVE/QemuServer/Memory.pm
+++ b/PVE/QemuServer/Memory.pm
@@ -152,6 +152,9 @@ sub qemu_memory_hotplug {
 
     return $value if !PVE::QemuServer::check_running($vmid);
 
+    die "static memory cannot be hot-plugged if dimm-hotplugging is used\n"
+	if defined($conf->{dimms});
+
     my $sockets = 1;
     $sockets = $conf->{sockets} if $conf->{sockets};
 
@@ -219,6 +222,165 @@ sub qemu_memory_hotplug {
     }
 }
 
+sub parse_dimmlist {
+    my ($list) = @_;
+    return if $list eq 'none';
+    return map {
+	/^(\d+)@(\d+)(?:x(\d+))?$/
+	? ([$1, $2]) x ($3//1)
+	: die "bad dimm element: $_\n"
+    } PVE::Tools::split_list($list);
+}
+
+my $qemu_update_dimm_config  = sub {
+    my ($vmid, $conf, $cur_sizes_by_node, $write) = @_;
+    my $value = '';
+    foreach my $node (sort {$a <=> $b} keys %$cur_sizes_by_node) {
+	my $cur_sizes = $cur_sizes_by_node->{$node};
+	foreach my $dimm_size (sort {$a <=> $b} keys %$cur_sizes) {
+	    my $sizes = $cur_sizes->{$dimm_size};
+	    my $count = scalar(@$sizes);
+	    next if !$count;
+	    $value .= ';' if length($value);
+	    $value .= "${dimm_size}\@${node}";
+	    $value .= "x$count" if $count != 1;
+	}
+    }
+    $value = 'none' if !length($value);
+    if ($write) {
+	$conf->{dimms} = $value;
+	PVE::QemuConfig->write_config($vmid, $conf);
+    }
+    return $value;
+};
+
+sub qemu_dimm_hotplug {
+    my ($vmid, $conf, $defaults, $opt, $value) = @_;
+
+    return $value if !PVE::QemuServer::check_running($vmid);
+
+    if (!defined($value)) {
+	die "dimms need to be unplugged before deleting the dimms property\n"
+	    if $conf->{dimms} && $conf->{dimms} ne 'none';
+	return undef;
+    }
+
+    my %allnodes;
+    my %allids;
+    my %allsizes;
+
+    my $qemudimms = qemu_dimm_list($vmid);
+
+    my $cur_sizes_by_node = {};
+    foreach my $id (keys %$qemudimms) {
+	my $dimm = $qemudimms->{$id};
+	my $node = $dimm->{node};
+	my $size = $dimm->{size};
+	my $megs = int($size/(1024*1024));
+	next if ($megs*1024*1024) != $size; # sanity check
+	$allids{$id} = 1;
+	$allnodes{$node} = 1;
+	$allsizes{$megs} = 1;
+	push @{$cur_sizes_by_node->{$node}->{$megs}}, $id;
+    }
+
+    my @new_dimms = parse_dimmlist($value);
+    foreach my $dimm (@new_dimms) {
+	my ($dimm_size, $numanode) = @$dimm;
+	$allnodes{$numanode} = 1;
+	$allsizes{$dimm_size} = 1;
+    }
+
+    # check against $MAX_MEM and fill %allnodes
+    my $totalmem = 0;
+    my $totalslots = 0;
+    my $dimm_size_count_per_node = {};
+    foreach my $dimm (@new_dimms) {
+	my ($dimm_size, $numanode) = @$dimm;
+	next if !$dimm_size;
+	$allsizes{$dimm_size} = 1;
+	$allnodes{$numanode} = 1;
+	$totalmem += $dimm_size;
+	++$totalslots;
+	my $dimm_size_count = ($dimm_size_count_per_node->{$numanode} //= {});
+	++($dimm_size_count->{$dimm_size}//=0);
+    }
+    die "you cannot add more memory than $MAX_MEM MB!\n"
+	if $totalmem > $MAX_MEM;
+    die "cannot use more than 64 dimm slots ($totalslots requested)\n"
+	if $totalslots > 64;
+
+    my $id = 0;
+    eval {
+	my %size_done;
+	my $numa_hostmap = get_numa_guest_to_host_map($conf) if $conf->{hugepages};
+	foreach my $numanode (keys %allnodes) {
+	    my $sizecounts = ($dimm_size_count_per_node->{$numanode}//{});
+	    my $cur_sizes = ($cur_sizes_by_node->{$numanode} //= {});
+	    foreach my $dimm_size (sort keys %allsizes) {
+		my $count = $sizecounts->{$dimm_size} // 0;
+		my $cur_ids = ($cur_sizes->{$dimm_size}//=[]);
+		while ($count > scalar(@$cur_ids)) {
+		    ++$id while $allids{"dimm$id"};
+		    my $name = "dimm$id";
+		    eval {
+			qemu_add_dimm($vmid, $conf, $numa_hostmap, $numanode, $dimm_size, $name);
+			PVE::QemuServer::vm_mon_cmd($vmid, "device_add",
+			    driver => "pc-dimm",
+			    id => "$name",
+			    memdev => "mem-$name",
+			    node => $numanode
+			);
+		    };
+		    if (my $err = $@) {
+			eval { PVE::QemuServer::qemu_objectdel($vmid, "mem-$name"); };
+			die $err;
+		    }
+
+		    # update ids for value building below
+		    $allids{$name} = 1;
+		    push @$cur_ids, $name;
+		    &$qemu_update_dimm_config($vmid, $conf, $cur_sizes_by_node, 1);
+		}
+		my $index = scalar(@$cur_ids);
+		while ($count < scalar(@$cur_ids) && $index-- > 0) {
+		    my $name = $cur_ids->[$index];
+		    my $dimm = $qemudimms->{$name};
+		    die "bad dimm name: $dimm\n" if !$dimm; # sanity check
+		    if (!$dimm->{hotpluggable}) {
+			warn "skipping non-hotpluggable dimm $name\n";
+			next;
+		    }
+		    eval {
+			PVE::QemuServer::qemu_devicedel($vmid, "$name");
+			sleep 1;
+			foreach(1..3) {
+			    my $dimm_list = qemu_dimm_list($vmid);
+			    last if !$dimm_list->{$name};
+			    sleep 3;
+			}
+			PVE::QemuServer::qemu_objectdel($vmid, "mem-$name");
+		    };
+		    if (my $err = $@) {
+			warn "failed to delete dimm $name, trying another: ($err)\n";
+			# Try another...
+			next;
+		    }
+
+		    $allids{$name} = 0;
+		    my ($cur_id) = ($name =~ /^dimm(\d+)$/);
+		    $id = $cur_id if $cur_id < $id;
+		    # update ids for value building below
+		    splice @$cur_ids, $index, 1;
+		    &$qemu_update_dimm_config($vmid, $conf, $cur_sizes_by_node, 1);
+		}
+	    }
+	}
+    };
+    warn $@ if $@;
+    return &$qemu_update_dimm_config($vmid, $conf, $cur_sizes_by_node, 0);
+}
+
 sub qemu_dimm_list {
     my ($vmid) = @_;
 
@@ -234,6 +396,9 @@ sub qemu_dimm_list {
 
 sub config {
     my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd) = @_;
+
+    my $dimmlist = $conf->{dimms};
+    my @dimms = parse_dimmlist($dimmlist) if $dimmlist;
     
     my $memory = $conf->{memory} || $defaults->{memory};
     my $static_memory = 0;
@@ -241,17 +406,20 @@ sub config {
     if ($hotplug_features->{memory}) {
 	die "NUMA needs to be enabled for memory hotplug\n" if !$conf->{numa};
 	die "Total memory is bigger than ${MAX_MEM}MB\n" if $memory > $MAX_MEM;
-	my $sockets = 1;
-	$sockets = $conf->{sockets} if $conf->{sockets};
 
-	$static_memory = $STATICMEM;
-	$static_memory *= $sockets if ($conf->{hugepages} && $conf->{hugepages} eq '1024');
+	if ($dimmlist) {
+	    $static_memory = $memory;
+	} else {
+	    $static_memory = $STATICMEM;
+	    $static_memory *= $sockets if $conf->{hugepages} && $conf->{hugepages} eq '1024';
+	}
 
 	die "minimum memory must be ${static_memory}MB\n" if($memory < $static_memory);
 	push @$cmd, '-m', "size=${static_memory},slots=255,maxmem=${MAX_MEM}M";
 
     } else {
-
+	# without hotplugging dimms are simply added to the memory size...
+	$memory += $_->[0] foreach @dimms;
 	$static_memory = $memory;
 	push @$cmd, '-m', $static_memory;
     }
@@ -323,20 +491,33 @@ sub config {
     }
 
     if ($hotplug_features->{memory}) {
-	foreach_dimm($conf, $vmid, $memory, $sockets, sub {
-	    my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
+	if ($dimmlist) {
+	    my $id = 0;
+	    foreach my $dimm (@dimms) {
+		my ($dimm_size, $numanode) = @$dimm;
+		my $name = "dimm$id";
+		$id++;
+		next if !$dimm_size; # size 0 is used for empty slots during migration
+		my $mem_object = print_mem_object($conf, "mem-$name", $dimm_size);
+		push @$cmd, '-object', $mem_object;
+		push @$cmd, '-device', "pc-dimm,id=$name,memdev=mem-$name,node=$numanode";
+	    }
+	} else {
+	    foreach_dimm($conf, $vmid, $memory, $sockets, sub {
+		my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
 
-	    my $mem_object = print_mem_object($conf, "mem-$name", $dimm_size);
+		my $mem_object = print_mem_object($conf, "mem-$name", $dimm_size);
 
-	    push @$cmd, "-object" , $mem_object;
-	    push @$cmd, "-device", "pc-dimm,id=$name,memdev=mem-$name,node=$numanode";
+		push @$cmd, '-object' , $mem_object;
+		push @$cmd, '-device', "pc-dimm,id=$name,memdev=mem-$name,node=$numanode";
 
-	    #if dimm_memory is not aligned to dimm map
-	    if($current_size > $memory) {
-	         $conf->{memory} = $current_size;
-	         PVE::QemuConfig->write_config($vmid, $conf);
-	    }
-	});
+		#if dimm_memory is not aligned to dimm map
+		if($current_size > $memory) {
+		     $conf->{memory} = $current_size;
+		     PVE::QemuConfig->write_config($vmid, $conf);
+		}
+	    });
+	}
     }
 }
 
@@ -491,14 +672,26 @@ sub hugepages_topology {
     if ($hotplug_features->{memory}) {
 	my $numa_hostmap = get_numa_guest_to_host_map($conf);
 
-	foreach_dimm($conf, undef, $memory, $sockets, sub {
-	    my ($conf, undef, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
+	if (defined(my $dimmlist = $conf->{dimms})) {
+	    my @dimms = parse_dimmlist($dimmlist);
+	    my $id = 0;
+	    foreach my $dimm (@dimms) {
+		my ($dimm_size, $numanode) = @$dimm;
+		next if !$dimm_size;
+		$numanode = $numa_hostmap->{$numanode};
+		my $hugepages_size = hugepages_size($conf, $dimm_size);
+		$hugepages_topology->{$hugepages_size}->{$numanode} += hugepages_nr($dimm_size, $hugepages_size);
+	    }
+	} else {
+	    foreach_dimm($conf, undef, $memory, $sockets, sub {
+		my ($conf, undef, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
 
-	    $numanode = $numa_hostmap->{$numanode};
+		$numanode = $numa_hostmap->{$numanode};
 
-	    my $hugepages_size = hugepages_size($conf, $dimm_size);
-	    $hugepages_topology->{$hugepages_size}->{$numanode} += hugepages_nr($dimm_size, $hugepages_size);
-	});
+		my $hugepages_size = hugepages_size($conf, $dimm_size);
+		$hugepages_topology->{$hugepages_size}->{$numanode} += hugepages_nr($dimm_size, $hugepages_size);
+	    });
+	}
     }
 
     return $hugepages_topology;
-- 
2.1.4





More information about the pve-devel mailing list