[pve-devel] [PATCH ha-manager] release LRM agent lock on graceful shutdown

Thomas Lamprecht t.lamprecht at proxmox.com
Fri Jan 15 08:27:35 CET 2016


Release the agent lock when we shutdown the LRM and stopped all
services.

Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
 src/PVE/HA/Env.pm                  |  9 +++++++++
 src/PVE/HA/Env/PVE2.pm             | 11 +++++++++++
 src/PVE/HA/LRM.pm                  |  3 +++
 src/PVE/HA/Sim/Env.pm              | 13 +++++++++++++
 src/test/test-reboot1/log.expect   |  1 +
 src/test/test-shutdown1/log.expect | 18 +++++++++---------
 src/test/test-shutdown2/log.expect | 18 +++++++++---------
 src/test/test-shutdown3/log.expect | 18 +++++++++---------
 src/test/test-shutdown4/log.expect | 18 +++++++++---------
 9 files changed, 73 insertions(+), 36 deletions(-)

diff --git a/src/PVE/HA/Env.pm b/src/PVE/HA/Env.pm
index 74ce545..82ba497 100644
--- a/src/PVE/HA/Env.pm
+++ b/src/PVE/HA/Env.pm
@@ -136,6 +136,15 @@ sub get_ha_agent_lock {
     return $self->{plug}->get_ha_agent_lock($node);
 }
 
+# release the respective node agent lock.
+# this should only get called if the nodes LRM gracefully shuts down with
+# all services already cleanly stopped!
+sub release_ha_agent_lock {
+    my ($self) = @_;
+
+    return $self->{plug}->release_ha_agent_lock();
+}
+
 # return true when cluster is quorate
 sub quorate {
     my ($self) = @_;
diff --git a/src/PVE/HA/Env/PVE2.pm b/src/PVE/HA/Env/PVE2.pm
index 1920a09..8a36a0b 100644
--- a/src/PVE/HA/Env/PVE2.pm
+++ b/src/PVE/HA/Env/PVE2.pm
@@ -277,6 +277,17 @@ sub get_ha_agent_lock {
     return $self->get_pve_lock("ha_agent_${node}_lock");
 }
 
+# release the respective node agent lock.
+# this should only get called if the nodes LRM gracefully shuts down with
+# all services already cleanly stopped!
+sub release_ha_agent_lock {
+    my ($self) = @_;
+
+    my $node = $self->nodename();
+
+    return rmdir("$lockdir/ha_agent_${node}_lock");
+}
+
 sub quorate {
     my ($self) = @_;
 
diff --git a/src/PVE/HA/LRM.pm b/src/PVE/HA/LRM.pm
index 8092818..60ee448 100644
--- a/src/PVE/HA/LRM.pm
+++ b/src/PVE/HA/LRM.pm
@@ -295,6 +295,9 @@ sub do_one_iteration {
 			}
 
 			$shutdown = 1;
+
+			# shutdown with all services stopped thus release the lock
+			$haenv->release_ha_agent_lock();
 		    }
 		}
 	    } else {
diff --git a/src/PVE/HA/Sim/Env.pm b/src/PVE/HA/Sim/Env.pm
index 56cc7f8..20d0de5 100644
--- a/src/PVE/HA/Sim/Env.pm
+++ b/src/PVE/HA/Sim/Env.pm
@@ -239,6 +239,19 @@ sub get_ha_agent_lock {
     return $self->sim_get_lock($lck);
 }
 
+
+# release the respective node agent lock.
+# this should only get called if the nodes LRM gracefully shuts down with
+# all services already cleanly stopped!
+sub release_ha_agent_lock {
+    my ($self) = @_;
+
+    my $node = $self->nodename();
+
+    my $lock = $self->get_ha_agent_lock_name($node);
+    return $self->sim_get_lock($lock, 1);
+}
+
 # return true when cluster is quorate
 sub quorate {
     my ($self) = @_;
diff --git a/src/test/test-reboot1/log.expect b/src/test/test-reboot1/log.expect
index 802b57f..12c3fe5 100644
--- a/src/test/test-reboot1/log.expect
+++ b/src/test/test-reboot1/log.expect
@@ -31,6 +31,7 @@ info    126       reboot: execute power node3 on
 info    125    node3/crm: status change startup => wait_for_quorum
 info    126    node3/lrm: status change startup => wait_for_agent_lock
 info    144    node3/crm: status change wait_for_quorum => slave
+info    145    node3/lrm: got lock 'ha_agent_node3_lock'
 info    145    node3/lrm: status change wait_for_agent_lock => active
 info    145    node3/lrm: starting service vm:103
 info    145    node3/lrm: service status vm:103 started
diff --git a/src/test/test-shutdown1/log.expect b/src/test/test-shutdown1/log.expect
index 52a684f..5c063ab 100644
--- a/src/test/test-shutdown1/log.expect
+++ b/src/test/test-shutdown1/log.expect
@@ -30,13 +30,13 @@ info    125    node3/crm: killed by poweroff
 info    140    node1/crm: node 'node3': state changed from 'online' => 'unknown'
 info    180    node1/crm: service 'vm:103': state changed from 'started' to 'fence' 
 info    180    node1/crm: node 'node3': state changed from 'unknown' => 'fence'
-info    260    node1/crm: got lock 'ha_agent_node3_lock'
-info    260    node1/crm: fencing: acknowleged - got agent lock for node 'node3'
-info    260    node1/crm: node 'node3': state changed from 'fence' => 'unknown'
-info    260    node1/crm: service 'vm:103': state changed from 'fence' to 'stopped' 
-info    280    node1/crm: service 'vm:103': state changed from 'stopped' to 'started'  (node = node1)
-info    281    node1/lrm: got lock 'ha_agent_node1_lock'
-info    281    node1/lrm: status change wait_for_agent_lock => active
-info    281    node1/lrm: starting service vm:103
-info    281    node1/lrm: service status vm:103 started
+info    180    node1/crm: got lock 'ha_agent_node3_lock'
+info    180    node1/crm: fencing: acknowleged - got agent lock for node 'node3'
+info    180    node1/crm: node 'node3': state changed from 'fence' => 'unknown'
+info    180    node1/crm: service 'vm:103': state changed from 'fence' to 'stopped' 
+info    180    node1/crm: service 'vm:103': state changed from 'stopped' to 'started'  (node = node1)
+info    181    node1/lrm: got lock 'ha_agent_node1_lock'
+info    181    node1/lrm: status change wait_for_agent_lock => active
+info    181    node1/lrm: starting service vm:103
+info    181    node1/lrm: service status vm:103 started
 info    720     hardware: exit simulation - done
diff --git a/src/test/test-shutdown2/log.expect b/src/test/test-shutdown2/log.expect
index c9834a1..b367b64 100644
--- a/src/test/test-shutdown2/log.expect
+++ b/src/test/test-shutdown2/log.expect
@@ -30,15 +30,15 @@ info    125    node3/crm: killed by poweroff
 info    140    node1/crm: node 'node3': state changed from 'online' => 'unknown'
 info    180    node1/crm: service 'vm:103': state changed from 'started' to 'fence' 
 info    180    node1/crm: node 'node3': state changed from 'unknown' => 'fence'
-info    260    node1/crm: got lock 'ha_agent_node3_lock'
-info    260    node1/crm: fencing: acknowleged - got agent lock for node 'node3'
-info    260    node1/crm: node 'node3': state changed from 'fence' => 'unknown'
-info    260    node1/crm: service 'vm:103': state changed from 'fence' to 'stopped' 
-info    280    node1/crm: service 'vm:103': state changed from 'stopped' to 'started'  (node = node1)
-info    281    node1/lrm: got lock 'ha_agent_node1_lock'
-info    281    node1/lrm: status change wait_for_agent_lock => active
-info    281    node1/lrm: starting service vm:103
-info    281    node1/lrm: service status vm:103 started
+info    180    node1/crm: got lock 'ha_agent_node3_lock'
+info    180    node1/crm: fencing: acknowleged - got agent lock for node 'node3'
+info    180    node1/crm: node 'node3': state changed from 'fence' => 'unknown'
+info    180    node1/crm: service 'vm:103': state changed from 'fence' to 'stopped' 
+info    180    node1/crm: service 'vm:103': state changed from 'stopped' to 'started'  (node = node1)
+info    181    node1/lrm: got lock 'ha_agent_node1_lock'
+info    181    node1/lrm: status change wait_for_agent_lock => active
+info    181    node1/lrm: starting service vm:103
+info    181    node1/lrm: service status vm:103 started
 info    500      cmdlist: execute power node3 on
 info    500    node3/crm: status change startup => wait_for_quorum
 info    500    node3/lrm: status change startup => wait_for_agent_lock
diff --git a/src/test/test-shutdown3/log.expect b/src/test/test-shutdown3/log.expect
index 7cbd1bc..559cb4f 100644
--- a/src/test/test-shutdown3/log.expect
+++ b/src/test/test-shutdown3/log.expect
@@ -30,15 +30,15 @@ info    125    node3/crm: killed by poweroff
 info    140    node1/crm: node 'node3': state changed from 'online' => 'unknown'
 info    180    node1/crm: service 'ct:103': state changed from 'started' to 'fence' 
 info    180    node1/crm: node 'node3': state changed from 'unknown' => 'fence'
-info    260    node1/crm: got lock 'ha_agent_node3_lock'
-info    260    node1/crm: fencing: acknowleged - got agent lock for node 'node3'
-info    260    node1/crm: node 'node3': state changed from 'fence' => 'unknown'
-info    260    node1/crm: service 'ct:103': state changed from 'fence' to 'stopped' 
-info    280    node1/crm: service 'ct:103': state changed from 'stopped' to 'started'  (node = node1)
-info    281    node1/lrm: got lock 'ha_agent_node1_lock'
-info    281    node1/lrm: status change wait_for_agent_lock => active
-info    281    node1/lrm: starting service ct:103
-info    281    node1/lrm: service status ct:103 started
+info    180    node1/crm: got lock 'ha_agent_node3_lock'
+info    180    node1/crm: fencing: acknowleged - got agent lock for node 'node3'
+info    180    node1/crm: node 'node3': state changed from 'fence' => 'unknown'
+info    180    node1/crm: service 'ct:103': state changed from 'fence' to 'stopped' 
+info    180    node1/crm: service 'ct:103': state changed from 'stopped' to 'started'  (node = node1)
+info    181    node1/lrm: got lock 'ha_agent_node1_lock'
+info    181    node1/lrm: status change wait_for_agent_lock => active
+info    181    node1/lrm: starting service ct:103
+info    181    node1/lrm: service status ct:103 started
 info    500      cmdlist: execute power node3 on
 info    500    node3/crm: status change startup => wait_for_quorum
 info    500    node3/lrm: status change startup => wait_for_agent_lock
diff --git a/src/test/test-shutdown4/log.expect b/src/test/test-shutdown4/log.expect
index 15711ab..6838632 100644
--- a/src/test/test-shutdown4/log.expect
+++ b/src/test/test-shutdown4/log.expect
@@ -33,13 +33,13 @@ info    122    node2/crm: status change slave => master
 info    122    node2/crm: node 'node1': state changed from 'online' => 'unknown'
 info    200    node2/crm: service 'vm:100': state changed from 'started' to 'fence' 
 info    200    node2/crm: node 'node1': state changed from 'unknown' => 'fence'
-info    260    node2/crm: got lock 'ha_agent_node1_lock'
-info    260    node2/crm: fencing: acknowleged - got agent lock for node 'node1'
-info    260    node2/crm: node 'node1': state changed from 'fence' => 'unknown'
-info    260    node2/crm: service 'vm:100': state changed from 'fence' to 'stopped' 
-info    280    node2/crm: service 'vm:100': state changed from 'stopped' to 'started'  (node = node2)
-info    281    node2/lrm: got lock 'ha_agent_node2_lock'
-info    281    node2/lrm: status change wait_for_agent_lock => active
-info    281    node2/lrm: starting service vm:100
-info    281    node2/lrm: service status vm:100 started
+info    200    node2/crm: got lock 'ha_agent_node1_lock'
+info    200    node2/crm: fencing: acknowleged - got agent lock for node 'node1'
+info    200    node2/crm: node 'node1': state changed from 'fence' => 'unknown'
+info    200    node2/crm: service 'vm:100': state changed from 'fence' to 'stopped' 
+info    200    node2/crm: service 'vm:100': state changed from 'stopped' to 'started'  (node = node2)
+info    201    node2/lrm: got lock 'ha_agent_node2_lock'
+info    201    node2/lrm: status change wait_for_agent_lock => active
+info    201    node2/lrm: starting service vm:100
+info    201    node2/lrm: service status vm:100 started
 info    720     hardware: exit simulation - done
-- 
2.1.4





More information about the pve-devel mailing list