[pve-devel] Using zstd for backup

Oliver Jaksch proxmox-devel at com-in.de
Mon Nov 6 12:49:04 CET 2017


Some users, including me, are discussing about the possibilities to extend the backup routines to offer more compression programs:
https://forum.proxmox.com/threads/suggestion-add-lrzip-to-backup-compression-options.29496

I've made a small set of patches to be able to use zstd (https://github.com/facebook/zstd/).
Debian/stretch offers zstd, but it is too old; I used zstd 1.3.x to gain access for controlling the use of threads like using pigz.

The patches applies cleanly at my pve 5.1 (and another fresh system) and are working fine to backup/restore lxc containers and qemu VMs.
So please have a look at these patches to get it maybe official sometimes.
Thanks, over and out
-
Oliver


--- /usr/share/pve-manager.orig/js/pvemanagerlib.js     2017-10-27 12:25:26.000000000 +0200
+++ /usr/share/pve-manager/js/pvemanagerlib.js  2017-11-04 17:30:37.626121574 +0100
@@ -6044,7 +6044,8 @@
     comboItems: [
                 ['0', PVE.Utils.noneText],
                 ['lzo', 'LZO (' + gettext('fast') + ')'],
-                ['gzip', 'GZIP (' + gettext('good') + ')']
+                ['gzip', 'GZIP (' + gettext('good') + ')'],
+                ['zstd', 'ZSTD (' + gettext('best') + ')']
     ]
 });
 Ext.define('PVE.form.PoolSelector', {
--- /usr/share/perl5/PVE.org/QemuServer.pm      2017-11-04 17:00:48.000000000 +0100
+++ /usr/share/perl5/PVE/QemuServer.pm  2017-11-05 16:37:08.438200797 +0100
@@ -5222,6 +5222,9 @@
     } elsif ($archive =~ m/.tar.lzo$/) {
        $format = 'tar' if !$format;
        $comp = 'lzop';
+    } elsif ($archive =~ m/.tar.zst$/) {
+       $format = 'tar' if !$format;
+       $comp = 'zstd';
     } elsif ($archive =~ m/\.vma$/) {
        $format = 'vma' if !$format;
     } elsif ($archive =~ m/\.vma\.gz$/) {
@@ -5230,6 +5233,9 @@
     } elsif ($archive =~ m/\.vma\.lzo$/) {
        $format = 'vma' if !$format;
        $comp = 'lzop';
+    } elsif ($archive =~ m/\.vma\.zst$/) {
+       $format = 'vma' if !$format;
+       $comp = 'zstd';
     } else {
        $format = 'vma' if !$format; # default
     }
@@ -5472,6 +5478,8 @@
            $uncomp = "zcat $qarchive|";
        } elsif ($comp eq 'lzop') {
            $uncomp = "lzop -d -c $qarchive|";
+       } elsif ($comp eq 'zstd') {
+           $uncomp = "zstd -d -c -q $qarchive|";
        } else {
            die "unknown compression method '$comp'\n";
        }
@@ -5746,10 +5754,17 @@
     $tocmd .= ' --prealloc' if $opts->{prealloc};
     $tocmd .= ' --info' if $opts->{info};
 
-    # tar option "xf" does not autodetect compression when read from STDIN,
-    # so we pipe to zcat
-    my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
-       PVE::Tools::shellquote("--to-command=$tocmd");
+    my $cmd;
+    if ($archive =~ m/\.tgz$/ || $archive =~ m/\.tar\.gz$/ || $archive =~ m/.tar.lzo$/) {
+       # tar option "xf" does not autodetect compression when read from STDIN,
+       # so we pipe to zcat
+       $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
+           PVE::Tools::shellquote("--to-command=$tocmd");
+    } elsif ($archive =~ m/.tar.zst$/) {
+       $cmd = "zcat -f|tar -I zstd -xf " . PVE::Tools::shellquote($archive) . " " .
+           PVE::Tools::shellquote("--to-command=$tocmd");
+    }
+    warn "#################### $cmd\n";
 
     my $tmpdir = "/var/tmp/vzdumptmp$$";
     mkpath $tmpdir;
@@ -6483,7 +6498,7 @@
     my $res = [];
     foreach my $id (keys %$data) {
        foreach my $item (@{$data->{$id}}) {
-           next if $item->{format} !~ m/^vma\.(gz|lzo)$/;
+           next if $item->{format} !~ m/^vma\.(gz|lzo|zst)$/;
            push @$res, $item->{volid} if defined($item->{volid});
        }
     }
--- /usr/share/perl5/PVE.org/Storage.pm 2017-10-17 15:02:17.000000000 +0200
+++ /usr/share/perl5/PVE/Storage.pm     2017-11-05 16:25:18.312236261 +0100
@@ -492,7 +492,7 @@
        } elsif ($path =~ m!^$privatedir/(\d+)$!) {
            my $vmid = $1;
            return ('rootdir', "$sid:rootdir/$vmid");
-       } elsif ($path =~ m!^$backupdir/([^/]+\.(tar|tar\.gz|tar\.lzo|tgz|vma|vma\.gz|vma\.lzo))$!) {
+       } elsif ($path =~ m!^$backupdir/([^/]+\.(tar|tar\.gz|tar\.lzo|tgz|vma|vma\.gz|vma\.lzo|tar\.zst|vma\.zst))$!) {
            my $name = $1;
            return ('iso', "$sid:backup/$name");
        }
@@ -778,7 +778,7 @@
                    $info = { volid => "$sid:vztmpl/$1", format => "t$2" };
 
                } elsif ($tt eq 'backup') {
-                   next if $fn !~ m!/([^/]+\.(tar|tar\.gz|tar\.lzo|tgz|vma|vma\.gz|vma\.lzo))$!;
+                   next if $fn !~ m!/([^/]+\.(tar|tar\.gz|tar\.lzo|tar\.zst|tgz|vma|vma\.gz|vma\.lzo|vma\.zst))$!;
 
                    $info = { volid => "$sid:backup/$1", format => $2 };
                }
@@ -1295,8 +1295,14 @@
 
     die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
 
-    my $pid = open(my $fh, '-|', 'tar', 'tf', $archive) ||
-       die "unable to open file '$archive'\n";
+    my ($pid, $fh);
+    if ($archive =~ m/\.tgz$/ || $archive =~ m/\.tar\.gz$/ || $archive =~ m/.tar.lzo$/) {
+       $pid = open($fh, '-|', 'tar', 'tf', $archive) ||
+          die "unable to open file '$archive'\n";
+    } elsif ($archive =~ m/.tar.zst$/) {
+       $pid = open($fh, '-|', "tar -I zstd -tf '$archive'") ||
+          die "unable to open file '$archive'\n";
+    }
 
     my $file;
     while (defined($file = <$fh>)) {
@@ -1319,7 +1325,11 @@
        $raw .= "$output\n";
     };
 
-    PVE::Tools::run_command(['tar', '-xpOf', $archive, $file, '--occurrence'], outfunc => $out);
+    if ($archive =~ m/\.tgz$/ || $archive =~ m/\.tar\.gz$/ || $archive =~ m/.tar.lzo$/) {
+       PVE::Tools::run_command(['tar', '-xpOf', $archive, $file, '--occurrence'], outfunc => $out);
+    } elsif ($archive =~ m/.tar.zst$/) {
+       PVE::Tools::run_command(['tar', '-I zstd', '-xpOf', $archive, $file, '--occurrence'], outfunc => $out);
+    }
 
     return wantarray ? ($raw, $file) : $raw;
 }
@@ -1341,6 +1351,8 @@
            $uncomp = ["zcat", $archive];
        } elsif ($comp eq 'lzo') {
            $uncomp = ["lzop", "-d", "-c", $archive];
+       } elsif ($comp eq 'zst') {
+           $uncomp = ["zstd", "-d", $archive];
        } else {
            die "unknown compression method '$comp'\n";
        }
@@ -1352,7 +1364,7 @@
        my $errstring;
        my $err = sub {
            my $output = shift;
-           if ($output =~ m/lzop: Broken pipe: <stdout>/ || $output =~ m/gzip: stdout: Broken pipe/) {
+           if ($output =~ m/lzop: Broken pipe: <stdout>/ || $output =~ m/gzip: stdout: Broken pipe/ || $output =~ m/zstd: stdout: Broken pipe/) {
                $broken_pipe = 1;
            } elsif (!defined ($errstring) && $output !~ m/^\s*$/) {
                $errstring = "Failed to extract config from VMA archive: $output\n";
@@ -1386,9 +1398,9 @@
 
     my $archive = abs_filesystem_path($cfg, $volid);
 
-    if ($volid =~ /vzdump-(lxc|openvz)-\d+-(\d{4})_(\d{2})_(\d{2})-(\d{2})_(\d{2})_(\d{2})\.(tgz|(tar(\.(gz|lzo))?))$/) {
+    if ($volid =~ /vzdump-(lxc|openvz)-\d+-(\d{4})_(\d{2})_(\d{2})-(\d{2})_(\d{2})_(\d{2})\.(tgz|(tar(\.(gz|lzo|zst))?))$/) {
        return extract_vzdump_config_tar($archive, qr!^(\./etc/vzdump/(pct|vps)\.conf)$!);
-    } elsif ($volid =~ /vzdump-qemu-\d+-(\d{4})_(\d{2})_(\d{2})-(\d{2})_(\d{2})_(\d{2})\.(tgz|((tar|vma)(\.(gz|lzo))?))$/) {
+    } elsif ($volid =~ /vzdump-qemu-\d+-(\d{4})_(\d{2})_(\d{2})-(\d{2})_(\d{2})_(\d{2})\.(tgz|((tar|vma)(\.(gz|lzo|zst))?))$/) {
        my $format;
        my $comp;
        if ($7 eq 'tgz') {
--- /usr/share/perl5/PVE.org/VZDump.pm  2017-10-25 11:15:46.000000000 +0200
+++ /usr/share/perl5/PVE/VZDump.pm      2017-11-05 12:45:04.945033895 +0100
@@ -56,7 +56,7 @@
        type => 'string',
        description => "Compress dump file.",
        optional => 1,
-       enum => ['0', '1', 'gzip', 'lzo'],
+       enum => ['0', '1', 'gzip', 'lzo', 'zstd'],
        default => '0',
     },
     pigz=> {
@@ -66,6 +66,13 @@
        optional => 1,
        default => 0,
     },
+    zstd=> {
+       type => "integer",
+       description => "Use zstd instead of lzo/gzip/pigz when N>0.".
+           " N=1 uses half of cores, N>1 uses N as thread count.",
+       optional => 1,
+       default => 0,
+    },
     quiet => {
        type => 'boolean',
        description => "Be quiet.",
@@ -716,6 +723,15 @@
        } else {
            return ('gzip', 'gz');
        }
+    } elsif ($opt_compress eq 'zstd') {
+       if ($opts->{zstd} > 0) {
+           my $zstd_threads = $opts->{zstd};
+           if ($zstd_threads == 1) {
+               my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
+               $zstd_threads = int(($cpuinfo->{cpus} + 1)/2);
+           }
+           return ("zstd -T${zstd_threads}", 'zst');
+       }
     } else {
        die "internal error - unknown compression option '$opt_compress'";
     }
@@ -727,7 +743,7 @@
     my $bklist = [];
     foreach my $fn (<$dir/${bkname}-*>) {
        next if $exclude_fn && $fn eq $exclude_fn;
-       if ($fn =~ m!/(${bkname}-(\d{4})_(\d{2})_(\d{2})-(\d{2})_(\d{2})_(\d{2})\.(tgz|((tar|vma)(\.(gz|lzo))?)))$!) {
+       if ($fn =~ m!/(${bkname}-(\d{4})_(\d{2})_(\d{2})-(\d{2})_(\d{2})_(\d{2})\.(tgz|((tar|vma)(\.(gz|lzo|zst))?)))$!) {
            $fn = "$dir/$1"; # untaint
            my $t = timelocal ($7, $6, $5, $4, $3 - 1, $2 - 1900);
            push @$bklist, [$fn, $t];
@@ -985,7 +1001,7 @@
                debugmsg ('info', "delete old backup '$d->[0]'", $logfd);
                unlink $d->[0];
                my $logfn = $d->[0];
-               $logfn =~ s/\.(tgz|((tar|vma)(\.(gz|lzo))?))$/\.log/;
+               $logfn =~ s/\.(tgz|((tar|vma)(\.(gz|lzo|zst))?))$/\.log/;
                unlink $logfn;
            }
        }
--- /usr/share/perl5/PVE/LXC.org/Create.pm      2017-10-17 15:11:19.000000000 +0200
+++ /usr/share/perl5/PVE/LXC/Create.pm  2017-11-05 16:43:07.635271185 +0100
@@ -73,9 +73,16 @@
        $archive_fh->fcntl(Fcntl::F_SETFD(), $flags & ~(Fcntl::FD_CLOEXEC()));
     }
 
-    my $cmd = [@$userns_cmd, 'tar', 'xpf', $tar_input_file, '--totals',
+    my $cmd;
+    if ($archive =~ m/\.tgz$/ || $archive =~ m/\.tar\.gz$/ || $archive =~ m/.tar.lzo$/) {
+       $cmd = [@$userns_cmd, 'tar', 'xpf', $tar_input_file, '--totals',
                @PVE::Storage::Plugin::COMMON_TAR_FLAGS,
                '-C', $rootdir];
+    } elsif ($archive =~ m/.tar.zst$/) {
+       $cmd = [@$userns_cmd, 'tar', '-I zstd', '-xpf', $tar_input_file, '--totals',
+               @PVE::Storage::Plugin::COMMON_TAR_FLAGS,
+               '-C', $rootdir];
+    }
 
     # skip-old-files doesn't have anything to do with time (old/new), but is
     # simply -k (annoyingly also called --keep-old-files) without the 'treat
--- /usr/share/perl5/PVE/Storage.org/Plugin.pm  2017-10-17 15:02:17.000000000 +0200
+++ /usr/share/perl5/PVE/Storage/Plugin.pm      2017-11-04 17:02:13.000000000 +0100
@@ -388,7 +388,7 @@
        return ('vztmpl', $1);
     } elsif ($volname =~ m!^rootdir/(\d+)$!) {
        return ('rootdir', $1, $1);
-    } elsif ($volname =~ m!^backup/([^/]+(\.(tar|tar\.gz|tar\.lzo|tgz|vma|vma\.gz|vma\.lzo)))$!) {
+    } elsif ($volname =~ m!^backup/([^/]+(\.(tar|tar\.gz|tar\.lzo|tar\.zst|tgz|vma|vma\.gz|vma\.lzo|vma\.zst)))$!) {
        my $fn = $1;
        if ($fn =~ m/^vzdump-(openvz|lxc|qemu)-(\d+)-.+/) {
            return ('backup', $fn, $2);




More information about the pve-devel mailing list