[pve-devel] [PATCH pve-manager 05/18] PVE::Replication - use new calendar events instead of interval

Fabian Grünbichler f.gruenbichler at proxmox.com
Mon May 29 12:13:38 CEST 2017


On Mon, May 29, 2017 at 11:59:35AM +0200, Dietmar Maurer wrote:
> comment inline
> > 
> > On Tue, May 23, 2017 at 09:08:44AM +0200, Dietmar Maurer wrote:
> > > And implement retry algorythm after failure:
> > > 
> > >   $next_sync = $state->{last_try} + 5*60*$fail_count;
> > > 
> > > and limit to 3 failures.
> > > 
> > > Signed-off-by: Dietmar Maurer <dietmar at proxmox.com>
> > > ---
> > >  PVE/API2/Replication.pm |  2 +-
> > >  PVE/CLI/pvesr.pm        | 28 ++++++++++++++++++++--------
> > >  PVE/Replication.pm      | 35 +++++++++++++++++++++--------------
> > >  3 files changed, 42 insertions(+), 23 deletions(-)
> > > 
> > > diff --git a/PVE/API2/Replication.pm b/PVE/API2/Replication.pm
> > > index c400a93c..977d3ec1 100644
> > > --- a/PVE/API2/Replication.pm
> > > +++ b/PVE/API2/Replication.pm
> > > @@ -82,7 +82,7 @@ __PACKAGE__->register_method ({
> > >  	    my $vmid = $d->{guest};
> > >  	    next if !$rpcenv->check($authuser, "/vms/$vmid", [ 'VM.Audit' ]);
> > >  	    $d->{id} = $id;
> > > -	    foreach my $k (qw(last_sync fail_count error duration)) {
> > > +	    foreach my $k (qw(last_sync last_try fail_count error duration)) {
> > >  		$d->{$k} = $state->{$k} if defined($state->{$k});
> > >  	    }
> > >  	    if ($state->{pid} && $state->{ptime}) {
> > > diff --git a/PVE/CLI/pvesr.pm b/PVE/CLI/pvesr.pm
> > > index 38116f7b..115bc2c1 100644
> > > --- a/PVE/CLI/pvesr.pm
> > > +++ b/PVE/CLI/pvesr.pm
> > > @@ -97,14 +97,14 @@ my $print_job_list = sub {
> > >  
> > >      my $format = "%-20s %10s %-20s %10s %5s %8s\n";
> > >  
> > > -    printf($format, "JobID", "GuestID", "Target", "Interval", "Rate",
> > > "Enabled");
> > > +    printf($format, "JobID", "GuestID", "Target", "Schedule", "Rate",
> > > "Enabled");
> > >  
> > >      foreach my $job (sort { $a->{guest} <=> $b->{guest} } @$list) {
> > >  	my $plugin = PVE::ReplicationConfig->lookup($job->{type});
> > >  	my $tid = $plugin->get_unique_target_id($job);
> > >  
> > >  	printf($format, $job->{id}, $job->{guest}, $tid,
> > > -	       defined($job->{interval}) ? $job->{interval} : '-',
> > > +	       defined($job->{schedule}) ? $job->{schedule} : '*/15',
> > >  	       defined($job->{rate}) ? $job->{rate} : '-',
> > >  	       $job->{disable} ? 'no' : 'yes'
> > >  	    );
> > > @@ -114,21 +114,33 @@ my $print_job_list = sub {
> > >  my $print_job_status = sub {
> > >      my ($list) = @_;
> > >  
> > > -    my $format = "%-20s %10s %-20s %20s %10s %10s %s\n";
> > > +    my $format = "%-20s %10s %-20s %20s %20s %10s %10s %s\n";
> > >  
> > > -    printf($format, "JobID", "GuestID", "Target", "LastSync", "Duration",
> > > "FailCount", "State");
> > > +    printf($format, "JobID", "GuestID", "Target", "LastSync", "NextSync",
> > > "Duration", "FailCount", "State");
> > >  
> > >      foreach my $job (sort { $a->{guest} <=> $b->{guest} } @$list) {
> > >  	my $plugin = PVE::ReplicationConfig->lookup($job->{type});
> > >  	my $tid = $plugin->get_unique_target_id($job);
> > >  
> > > -	my $timestr = $job->{last_sync} ?
> > > -	    strftime("%Y-%m-%d_%H:%M:%S", localtime($job->{last_sync})) : '-';
> > > +	my $timestr = '-';
> > > +	if ($job->{last_sync}) {
> > > +	    strftime("%Y-%m-%d_%H:%M:%S", localtime($job->{last_sync}));
> > > +	}
> > > +
> > > +	my $nextstr = '-';
> > > +	if (my $next = $job->{next_sync}) {
> > > +	    my $now = time();
> > > +	    if ($next > $now) {
> > > +		$nextstr = strftime("%Y-%m-%d_%H:%M:%S", localtime($job->{next_sync}));
> > > +	    } else {
> > > +		$nextstr = 'now'
> > > +	    }
> > > +	}
> > >  
> > >  	my $state = $job->{pid} ? "SYNCING" : $job->{error} // 'OK';
> > >  
> > >  	printf($format, $job->{id}, $job->{guest}, $tid,
> > > -	       $timestr, $job->{duration} // '-',
> > > +	       $timestr, $nextstr, $job->{duration} // '-',
> > >  	       $job->{fail_count}, $state);
> > >      }
> > >  };
> > > @@ -136,7 +148,7 @@ my $print_job_status = sub {
> > >  our $cmddef = {
> > >      status => [ 'PVE::API2::Replication', 'status', [], { node => $nodename
> > > }, $print_job_status ],
> > >  
> > > -    jobs => [ 'PVE::API2::ReplicationConfig', 'index' , [], {},
> > > $print_job_list ],
> > > +    list => [ 'PVE::API2::ReplicationConfig', 'index' , [], {},
> > > $print_job_list ],
> > >      read => [ 'PVE::API2::ReplicationConfig', 'read' , ['id'], {},
> > >  	     sub { my $res = shift; print to_json($res, { pretty => 1, canonical
> > > => 1}); }],
> > >      update => [ 'PVE::API2::ReplicationConfig', 'update' , ['id'], {} ],
> > > diff --git a/PVE/Replication.pm b/PVE/Replication.pm
> > > index ff4bbeb4..d878b44a 100644
> > > --- a/PVE/Replication.pm
> > > +++ b/PVE/Replication.pm
> > > @@ -9,6 +9,7 @@ use Time::HiRes qw(gettimeofday tv_interval);
> > >  use PVE::INotify;
> > >  use PVE::ProcFSTools;
> > >  use PVE::Tools;
> > > +use PVE::CalendarEvent;
> > >  use PVE::Cluster;
> > >  use PVE::QemuConfig;
> > >  use PVE::QemuServer;
> > > @@ -46,7 +47,8 @@ my $get_job_state = sub {
> > >      $state = {} if !$state;
> > >  
> > >      $state->{last_iteration} //= 0;
> > > -    $state->{last_sync} //= 0;
> > > +    $state->{last_try} //= 0; # last sync start time
> > > +    $state->{last_sync} //= 0; # last successful sync start time
> > >      $state->{fail_count} //= 0;
> > >  
> > >      return $state;
> > > @@ -93,10 +95,23 @@ sub job_status {
> > >  
> > >  	next if $jobcfg->{disable};
> > >  
> > > -	$jobcfg->{state} = $get_job_state->($stateobj, $jobcfg);
> > > +	my $state = $get_job_state->($stateobj, $jobcfg);
> > > +	$jobcfg->{state} = $state;
> > >  	$jobcfg->{id} = $jobid;
> > >  	$jobcfg->{vmtype} = $vms->{ids}->{$vmid}->{type};
> > >  
> > > +	my $next_sync = 0;
> > > +	if (my $fail_count = $state->{fail_count}) {
> > > +	    if ($fail_count < 3) {
> > > +		$next_sync = $state->{last_try} + 5*60*$fail_count;
> > 
> > both the 3 and the 5*60 constants seem awfully arbitrary.. maybe we
> > could make them configurable? e.g., for a sync job that syncs once or
> > twice a day, a window of 0+5+10+15=30 minutes of not being able to sync
> > should probably not put that job into an error state?
> 
> yes, but I guess we we can add such things later.
>  

sounds good to me




More information about the pve-devel mailing list