[pve-devel] [RFC ha-manager v2 2/3] implement 'stopped' state

Thomas Lamprecht t.lamprecht at proxmox.com
Tue Nov 15 11:13:56 CET 2016


In stopped state we do not start a service but we recover it on
fencing.
This can be practical for templates or cold standby guests.

With this we remove some unnecessary state transitions to started
where we'd directly go to request_stop anyway.

Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
 src/PVE/HA/Manager.pm                              | 24 +++++++++++++++++-----
 src/PVE/HA/Sim/TestHardware.pm                     |  4 ++--
 src/test/test-basic1/log.expect                    |  1 -
 .../test-relocate-policy-default-group/log.expect  |  1 -
 src/test/test-relocate-policy1/log.expect          |  1 -
 src/test/test-relocate-to-inactive-node/log.expect |  1 -
 src/test/test-resource-failure1/log.expect         |  1 -
 src/test/test-resource-failure2/log.expect         |  1 -
 src/test/test-resource-failure5/log.expect         |  1 -
 src/test/test-resource-failure6/log.expect         |  1 -
 10 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index e58fc0b..9d333fb 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -290,7 +290,8 @@ my $recover_fenced_service = sub {
 
 	# $sd *is normally read-only*, fencing is the exception
 	$cd->{node} = $sd->{node} = $recovery_node;
-	&$change_service_state($self, $sid, 'started', node => $recovery_node);
+	my $new_state = ($cd->{state} eq 'enabled') ? 'started' : 'request_stop';
+	&$change_service_state($self, $sid, $new_state, node => $recovery_node);
     } else {
 	# no possible node found, cannot recover
 	$haenv->log('err', "recovering service '$sid' from fenced node " .
@@ -379,9 +380,11 @@ sub manage {
     # add new service
     foreach my $sid (sort keys %$sc) {
 	next if $ss->{$sid}; # already there
-	$haenv->log('info', "adding new service '$sid' on node '$sc->{$sid}->{node}'");
+	my $cd = $sc->{$sid};
+	$haenv->log('info', "adding new service '$sid' on node '$cd->{node}'");
 	# assume we are running to avoid relocate running service at add
-	$ss->{$sid} = { state => 'started', node => $sc->{$sid}->{node},
+	my $state = ($cd->{state} eq 'enabled') ? 'started' : 'request_stop';
+	$ss->{$sid} = { state => $state, node => $cd->{node},
 			uid => compute_new_uuid('started') };
     }
 
@@ -432,7 +435,8 @@ sub manage {
 
 		my $lrm_mode = $sd->{node} ? $lrm_modes->{$sd->{node}} : undef;
 		# unfreeze
-		&$change_service_state($self, $sid, 'started') 
+		my $state = ($cd->{state} eq 'enabled') ? 'started' : 'request_stop';
+		&$change_service_state($self, $sid, $state)
 		    if $lrm_mode && $lrm_mode eq 'active';
 
 	    } elsif ($last_state eq 'error') {
@@ -579,6 +583,16 @@ sub next_state_stopped {
 	return;
     }
 
+    if ($ns->node_is_offline_delayed($sd->{node})) {
+	&$change_service_state($self, $sid, 'fence');
+	return;
+    }
+
+    if ($cd->{state} eq 'stopped') {
+	# almost the same as 'disabled' state but the service will also get recovered
+	return;
+    }
+
     if ($cd->{state} eq 'enabled') {
 	# simply mark it started, if it's on the wrong node
 	# next_state_started will fix that for us
@@ -613,7 +627,7 @@ sub next_state_started {
 	return;
     }
 	
-    if ($cd->{state} eq 'disabled') {
+    if ($cd->{state} eq 'disabled' || $cd->{state} eq 'stopped') {
 	&$change_service_state($self, $sid, 'request_stop');
 	return;
     }
diff --git a/src/PVE/HA/Sim/TestHardware.pm b/src/PVE/HA/Sim/TestHardware.pm
index 0c7d6cd..c6ad238 100644
--- a/src/PVE/HA/Sim/TestHardware.pm
+++ b/src/PVE/HA/Sim/TestHardware.pm
@@ -87,7 +87,7 @@ sub log {
 # reboot <node>
 # shutdown <node>
 # restart-lrm <node>
-# service <sid> <enabled|disabled>
+# service <sid> <enabled|disabled|stopped>
 # service <sid> <migrate|relocate> <target>
 # service <sid> lock/unlock [lockname]
 
@@ -175,7 +175,7 @@ sub sim_hardware_cmd {
 	    }
 
 	} elsif ($cmd eq 'service') {
-	    if ($action eq 'enabled' || $action eq 'disabled') {
+	    if ($action eq 'enabled' || $action eq 'disabled' || $action eq 'stopped') {
 
 		$self->set_service_state($sid, $action);
 
diff --git a/src/test/test-basic1/log.expect b/src/test/test-basic1/log.expect
index c24b41e..0d92240 100644
--- a/src/test/test-basic1/log.expect
+++ b/src/test/test-basic1/log.expect
@@ -16,7 +16,6 @@ info     20    node1/crm: node 'node3': state changed from 'unknown' => 'online'
 info     20    node1/crm: adding new service 'vm:101' on node 'node1'
 info     20    node1/crm: adding new service 'vm:102' on node 'node2'
 info     20    node1/crm: adding new service 'vm:103' on node 'node3'
-info     20    node1/crm: service 'vm:102': state changed from 'started' to 'request_stop'
 info     21    node1/lrm: got lock 'ha_agent_node1_lock'
 info     21    node1/lrm: status change wait_for_agent_lock => active
 info     21    node1/lrm: starting service vm:101
diff --git a/src/test/test-relocate-policy-default-group/log.expect b/src/test/test-relocate-policy-default-group/log.expect
index a7dd644..694bef6 100644
--- a/src/test/test-relocate-policy-default-group/log.expect
+++ b/src/test/test-relocate-policy-default-group/log.expect
@@ -14,7 +14,6 @@ info     20    node1/crm: node 'node1': state changed from 'unknown' => 'online'
 info     20    node1/crm: node 'node2': state changed from 'unknown' => 'online'
 info     20    node1/crm: node 'node3': state changed from 'unknown' => 'online'
 info     20    node1/crm: adding new service 'fa:130' on node 'node2'
-info     20    node1/crm: service 'fa:130': state changed from 'started' to 'request_stop'
 info     22    node2/crm: status change wait_for_quorum => slave
 info     23    node2/lrm: got lock 'ha_agent_node2_lock'
 info     23    node2/lrm: status change wait_for_agent_lock => active
diff --git a/src/test/test-relocate-policy1/log.expect b/src/test/test-relocate-policy1/log.expect
index 0383604..834284b 100644
--- a/src/test/test-relocate-policy1/log.expect
+++ b/src/test/test-relocate-policy1/log.expect
@@ -11,7 +11,6 @@ info     20    node3/lrm: status change startup => wait_for_agent_lock
 info     20    node1/crm: got lock 'ha_manager_lock'
 info     20    node1/crm: status change wait_for_quorum => master
 info     20    node1/crm: adding new service 'fa:130' on node 'node3'
-info     20    node1/crm: service 'fa:130': state changed from 'started' to 'request_stop'
 info     21    node1/lrm: got lock 'ha_agent_node1_lock'
 info     21    node1/lrm: status change wait_for_agent_lock => active
 info     21    node1/lrm: starting service vm:100
diff --git a/src/test/test-relocate-to-inactive-node/log.expect b/src/test/test-relocate-to-inactive-node/log.expect
index c5cfffb..62bc555 100644
--- a/src/test/test-relocate-to-inactive-node/log.expect
+++ b/src/test/test-relocate-to-inactive-node/log.expect
@@ -14,7 +14,6 @@ info     20    node1/crm: node 'node1': state changed from 'unknown' => 'online'
 info     20    node1/crm: node 'node2': state changed from 'unknown' => 'online'
 info     20    node1/crm: node 'node3': state changed from 'unknown' => 'online'
 info     20    node1/crm: adding new service 'vm:103' on node 'node3'
-info     20    node1/crm: service 'vm:103': state changed from 'started' to 'request_stop'
 info     22    node2/crm: status change wait_for_quorum => slave
 info     24    node3/crm: status change wait_for_quorum => slave
 info     25    node3/lrm: got lock 'ha_agent_node3_lock'
diff --git a/src/test/test-resource-failure1/log.expect b/src/test/test-resource-failure1/log.expect
index c3170fc..8439778 100644
--- a/src/test/test-resource-failure1/log.expect
+++ b/src/test/test-resource-failure1/log.expect
@@ -14,7 +14,6 @@ info     20    node1/crm: node 'node1': state changed from 'unknown' => 'online'
 info     20    node1/crm: node 'node2': state changed from 'unknown' => 'online'
 info     20    node1/crm: node 'node3': state changed from 'unknown' => 'online'
 info     20    node1/crm: adding new service 'fa:110' on node 'node2'
-info     20    node1/crm: service 'fa:110': state changed from 'started' to 'request_stop'
 info     22    node2/crm: status change wait_for_quorum => slave
 info     23    node2/lrm: got lock 'ha_agent_node2_lock'
 info     23    node2/lrm: status change wait_for_agent_lock => active
diff --git a/src/test/test-resource-failure2/log.expect b/src/test/test-resource-failure2/log.expect
index 278e7aa..66ddc04 100644
--- a/src/test/test-resource-failure2/log.expect
+++ b/src/test/test-resource-failure2/log.expect
@@ -14,7 +14,6 @@ info     20    node1/crm: node 'node1': state changed from 'unknown' => 'online'
 info     20    node1/crm: node 'node2': state changed from 'unknown' => 'online'
 info     20    node1/crm: node 'node3': state changed from 'unknown' => 'online'
 info     20    node1/crm: adding new service 'fa:130' on node 'node2'
-info     20    node1/crm: service 'fa:130': state changed from 'started' to 'request_stop'
 info     22    node2/crm: status change wait_for_quorum => slave
 info     23    node2/lrm: got lock 'ha_agent_node2_lock'
 info     23    node2/lrm: status change wait_for_agent_lock => active
diff --git a/src/test/test-resource-failure5/log.expect b/src/test/test-resource-failure5/log.expect
index 807a237..4396691 100644
--- a/src/test/test-resource-failure5/log.expect
+++ b/src/test/test-resource-failure5/log.expect
@@ -14,7 +14,6 @@ info     20    node1/crm: node 'node1': state changed from 'unknown' => 'online'
 info     20    node1/crm: node 'node2': state changed from 'unknown' => 'online'
 info     20    node1/crm: node 'node3': state changed from 'unknown' => 'online'
 info     20    node1/crm: adding new service 'fa:130' on node 'node2'
-info     20    node1/crm: service 'fa:130': state changed from 'started' to 'request_stop'
 info     22    node2/crm: status change wait_for_quorum => slave
 info     23    node2/lrm: got lock 'ha_agent_node2_lock'
 info     23    node2/lrm: status change wait_for_agent_lock => active
diff --git a/src/test/test-resource-failure6/log.expect b/src/test/test-resource-failure6/log.expect
index 05a8bbd..5738b82 100644
--- a/src/test/test-resource-failure6/log.expect
+++ b/src/test/test-resource-failure6/log.expect
@@ -14,7 +14,6 @@ info     20    node1/crm: node 'node1': state changed from 'unknown' => 'online'
 info     20    node1/crm: node 'node2': state changed from 'unknown' => 'online'
 info     20    node1/crm: node 'node3': state changed from 'unknown' => 'online'
 info     20    node1/crm: adding new service 'fa:130' on node 'node2'
-info     20    node1/crm: service 'fa:130': state changed from 'started' to 'request_stop'
 info     22    node2/crm: status change wait_for_quorum => slave
 info     23    node2/lrm: got lock 'ha_agent_node2_lock'
 info     23    node2/lrm: status change wait_for_agent_lock => active
-- 
2.1.4





More information about the pve-devel mailing list