From d47b26db3276eca10144f53612c0fb6a755b72d0 Mon Sep 17 00:00:00 2001 From: Eric Ren Date: May 20 2016 15:20:35 +0000 Subject: dlm_controld: output explicit info about stateful merging When there are 3 or more partitions that merge, none may see enough clean nodes. Therefore, DLM would be stuck there forever until administrator manually reset/restart enough nodes to produce sufficient clean nodes. Therefore, output explicit information for higher code about the stateful merging state. Now, higher code can use `dlm status -v` to get "stateful_merge_wait". If it equals "1", we know dlm is waiting for manual intervention. Then, higher code can choose nodes to fence. DLM will continue to work if "clean nodes >= stateful merged nodes" becomes true. Signed-off-by: Eric Ren --- diff --git a/dlm_controld/daemon_cpg.c b/dlm_controld/daemon_cpg.c index 356e80d..0d55027 100644 --- a/dlm_controld/daemon_cpg.c +++ b/dlm_controld/daemon_cpg.c @@ -118,6 +118,7 @@ static int zombie_count; static int fence_result_pid; static unsigned int fence_result_try; +static int stateful_merge_wait; /* cluster is stuck in waiting for manual intervention */ static void send_fence_result(int nodeid, int result, uint32_t flags, uint64_t walltime); static void send_fence_clear(int nodeid, int result, uint32_t flags, uint64_t walltime); @@ -847,10 +848,14 @@ static void daemon_fence_work(void) if ((clean_count >= merge_count) && !part_count && (low == our_nodeid)) kick_stateful_merge_members(); + if ((clean_count < merge_count) && !part_count) + stateful_merge_wait = 1; retry = 1; goto out; } + if (stateful_merge_wait) + stateful_merge_wait = 0; /* * startup fencing @@ -2382,7 +2387,8 @@ static int print_state_daemon(char *str) "fence_pid=%d " "fence_in_progress_unknown=%d " "zombie_count=%d " - "monotime=%llu ", + "monotime=%llu " + "stateful_merge_wait=%d ", daemon_member_count, daemon_joined_count, daemon_remove_count, @@ -2392,7 +2398,8 @@ static int print_state_daemon(char *str) daemon_fence_pid, fence_in_progress_unknown, zombie_count, - (unsigned long long)monotime()); + (unsigned long long)monotime(), + stateful_merge_wait); return strlen(str) + 1; }