From b7794b4580bad7ab12e8b280bdc02885eeb04c9f Mon Sep 17 00:00:00 2001 From: Patrick Caulfield Date: Apr 19 2006 08:01:11 +0000 Subject: Don't try to delete AUTODELETE barriers in timer context as we can't get the semaphore that protects the structures. bz#177577 --- diff --git a/cman-kernel/src/cnxman-private.h b/cman-kernel/src/cnxman-private.h index ef75852..1e28f21 100644 --- a/cman-kernel/src/cnxman-private.h +++ b/cman-kernel/src/cnxman-private.h @@ -340,7 +340,7 @@ struct cl_barrier { char name[MAX_BARRIER_NAME_LEN]; unsigned int flags; enum { BARRIER_STATE_WAITING, BARRIER_STATE_INACTIVE, - BARRIER_STATE_COMPLETE } state; + BARRIER_STATE_COMPLETE, BARRIER_STATE_DELETED } state; unsigned int expected_nodes; unsigned int registered_nodes; atomic_t got_nodes; diff --git a/cman-kernel/src/cnxman.c b/cman-kernel/src/cnxman.c index 2cbff0e..31770f9 100644 --- a/cman-kernel/src/cnxman.c +++ b/cman-kernel/src/cnxman.c @@ -50,6 +50,7 @@ static void post_close_oob(unsigned char port, int nodeid); static void process_barrier_msg(struct cl_barriermsg *msg, struct cluster_node *node); static struct cl_barrier *find_barrier(char *name); +static void tidy_barriers(void); static void node_shutdown(void); static void node_cleanup(void); static int send_or_queue_message(struct socket *sock, void *buf, int len, struct sockaddr_cl *caddr, @@ -160,6 +161,7 @@ static unsigned long mainloop_flags; #define ACK_TIMEOUT 1 #define RESEND_NEEDED 2 +#define TIDY_BARRIERS 3 /* A queue of messages waiting to be sent. If kcl_sendmsg is called outside of * process context then the messages get put in here */ @@ -370,6 +372,10 @@ static int cluster_kthread(void *unused) check_for_unacked_nodes(); } + if (test_and_clear_bit(TIDY_BARRIERS, &mainloop_flags)) { + tidy_barriers(); + } + /* Resend any unacked messages */ if (test_and_clear_bit(RESEND_NEEDED, &mainloop_flags) && acks_expected) { @@ -3160,6 +3166,24 @@ static struct cl_barrier *find_barrier(char *name) return NULL; } +static void tidy_barriers(void) +{ + struct list_head *blist, *tmp; + struct cl_barrier *bar; + + down(&barrier_list_lock); + list_for_each_safe(blist, tmp, &barrier_list) { + bar = list_entry(blist, struct cl_barrier, list); + + if (bar->state == BARRIER_STATE_DELETED) { + P_BARRIER("Deleting barrier %s\n", bar->name); + list_del(&bar->list); + kfree(bar); + } + } + up(&barrier_list_lock); +} + /* Do the stuff we need to do when the barrier has completed phase 1 */ static void check_barrier_complete_phase1(struct cl_barrier *barrier) { @@ -3212,11 +3236,14 @@ static int check_barrier_complete_phase2(struct cl_barrier *barrier, int status) barrier->callback(barrier->name, 0); barrier->callback = NULL; } - /* Remove it if it's AUTO-DELETE */ + /* Flag it to be removed it if it's AUTO-DELETE. + We can't actually remove it because we can't get the barrier semaphore + in timer context */ if (barrier->flags & BARRIER_ATTR_AUTODELETE) { - list_del(&barrier->list); + barrier->state = BARRIER_STATE_DELETED; + set_bit(TIDY_BARRIERS, &mainloop_flags); + wake_up_interruptible(&cnxman_waitq); spin_unlock_irq(&barrier->phase2_spinlock); - kfree(barrier); return 1; } }