[pve-devel] [RFC v3 pve-ha-manager 4/4] Adding error state behaviour

Thomas Lamprecht t.lamprecht at proxmox.com
Wed Sep 2 17:52:33 CEST 2015


Previously there was no way out of the error state.
Now a 'safe' state can be reached by disabling the service manually.

Disabling and reactivating should only be done if the error cause
was found and fixed.

Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
 src/PVE/HA/Env/PVE2.pm | 10 ++++++++++
 src/PVE/HA/Manager.pm  | 19 ++++++++++++++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/PVE/HA/Env/PVE2.pm b/src/PVE/HA/Env/PVE2.pm
index 2df80ab..d508922 100644
--- a/src/PVE/HA/Env/PVE2.pm
+++ b/src/PVE/HA/Env/PVE2.pm
@@ -471,6 +471,16 @@ sub exec_resource_agent {
 
 	return 0;
 
+    } elsif ($cmd eq 'error') {
+
+
+	if($running) {
+	    $self->log("err", "service $sid is in an error state while running");
+	} else {
+	    $self->log("info", "service $sid is not running and in an error state");
+	}
+	return 0;
+
     }
 
     die "implement me (cmd '$cmd')";
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 44d9cc8..746c1da 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -349,7 +349,7 @@ sub manage {
 
 	    } elsif ($last_state eq 'error') {
 
-		# fixme: 
+		$self->next_state_error($sid, $cd, $sd, $lrm_res);
 
 	    } else {
 
@@ -573,4 +573,21 @@ sub next_state_started {
     $haenv->log('err', "service '$sid' - unknown state '$cd->{state}' in service configuration");
 }
 
+sub next_state_error {
+    my ($self, $sid, $cd, $sd, $lrm_res) = @_;
+
+    my $ns = $self->{ns};
+
+    if ($cd->{state} eq 'disabled') {
+	&$change_service_state($self, $sid, 'stopped');
+	return;
+    }
+
+    if ($ns->node_is_offline_delayed($sd->{node}, $fence_delay)) {
+	&$change_service_state($self, $sid, 'fence');
+	return;
+    }
+
+}
+
 1;
-- 
2.1.4




More information about the pve-devel mailing list