From cf6d12caabc4df37a9021755b440365209bd9d98 Mon Sep 17 00:00:00 2001 From: William Brown Date: Jan 12 2018 16:25:44 +0000 Subject: Ticket 48184 - close connections at shutdown cleanly. Bug Description: During shutdown we would not close connections. In the past this may have just been an annoyance, but now with the way nunc-stans works, io events can still trigger on open xeisting connectinos during shutdown. Fix Description: Close connections during shutdown rather than leaving them alive. https://pagure.io/389-ds-base/issue/48184 Author: wibrown Review by: lkrispen, vashirov (Thank you!) --- diff --git a/ldap/servers/slapd/conntable.c b/ldap/servers/slapd/conntable.c index 7c57b47..f2f763d 100644 --- a/ldap/servers/slapd/conntable.c +++ b/ldap/servers/slapd/conntable.c @@ -91,6 +91,19 @@ connection_table_abandon_all_operations(Connection_Table *ct) } } +void +connection_table_disconnect_all(Connection_Table *ct) +{ + for (size_t i = 0; i < ct->size; i++) { + if (ct->c[i].c_mutex) { + Connection *c = &(ct->c[i]); + PR_EnterMonitor(c->c_mutex); + disconnect_server_nomutex(c, c->c_connid, -1, SLAPD_DISCONNECT_ABORT, ECANCELED); + PR_ExitMonitor(c->c_mutex); + } + } +} + /* Given a file descriptor for a socket, this function will return * a slot in the connection table to use. * diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c index 4e0466a..c245a4d 100644 --- a/ldap/servers/slapd/daemon.c +++ b/ldap/servers/slapd/daemon.c @@ -1176,6 +1176,30 @@ slapd_daemon(daemon_ports_t *ports, ns_thrpool_t *tp) housekeeping_stop(); /* Run this after op_thread_cleanup() logged sth */ disk_monitoring_stop(); + /* + * Now that they are abandonded, we need to mark them as done. + * In NS while it's safe to allow excess jobs to be cleaned by + * by the walk and ns_job_done of remaining queued events, the + * issue is that if we allow something to live past this point + * the CT is freed from underneath, and bad things happen (tm). + * + * NOTE: We do this after we stop psearch, because there could + * be a race between flagging the psearch done, and users still + * try to send on the connection. Similar with op_threads. + */ + connection_table_disconnect_all(the_connection_table); + + /* + * WARNING: Normally we should close the tp in main + * but because of issues in the current connection design + * we need to close it here to guarantee events won't fire! + * + * All the connection close jobs "should" complete before + * shutdown at least. + */ + ns_thrpool_shutdown(tp); + ns_thrpool_wait(tp); + threads = g_get_active_threadcnt(); if (threads > 0) { slapi_log_err(SLAPI_LOG_INFO, "slapd_daemon", @@ -1628,23 +1652,18 @@ ns_handle_closure(struct ns_job_t *job) Connection *c = (Connection *)ns_job_get_data(job); int do_yield = 0; -/* this function must be called from the event loop thread */ -#ifdef DEBUG - PR_ASSERT(0 == NS_JOB_IS_THREAD(ns_job_get_type(job))); -#else - /* This doesn't actually confirm it's in the event loop thread, but it's a start */ - if (NS_JOB_IS_THREAD(ns_job_get_type(job)) != 0) { - slapi_log_err(SLAPI_LOG_ERR, "ns_handle_closure", "Attempt to close outside of event loop thread %" PRIu64 " for fd=%d\n", - c->c_connid, c->c_sd); - return; - } -#endif PR_EnterMonitor(c->c_mutex); + /* Assert we really have the right job state. */ + PR_ASSERT(job == c->c_job); + connection_release_nolock_ext(c, 1); /* release ref acquired for event framework */ PR_ASSERT(c->c_ns_close_jobs == 1); /* should be exactly 1 active close job - this one */ c->c_ns_close_jobs--; /* this job is processing closure */ + /* Because handle closure will add a new job, we need to detach our current one. */ + c->c_job = NULL; do_yield = ns_handle_closure_nomutex(c); PR_ExitMonitor(c->c_mutex); + /* Remove this task now. */ ns_job_done(job); if (do_yield) { /* closure not done - another reference still outstanding */ @@ -1667,6 +1686,14 @@ ns_connection_post_io_or_closing(Connection *conn) return; } + /* + * Cancel any existing ns jobs we have registered. + */ + if (conn->c_job != NULL) { + ns_job_done(conn->c_job); + conn->c_job = NULL; + } + if (CONN_NEEDS_CLOSING(conn)) { /* there should only ever be 0 or 1 active closure jobs */ PR_ASSERT((conn->c_ns_close_jobs == 0) || (conn->c_ns_close_jobs == 1)); @@ -1676,13 +1703,10 @@ ns_connection_post_io_or_closing(Connection *conn) conn->c_connid, conn->c_sd); return; } else { - /* just make sure we schedule the event to be closed in a timely manner */ - tv.tv_sec = 0; - tv.tv_usec = slapd_wakeup_timer * 1000; conn->c_ns_close_jobs++; /* now 1 active closure job */ connection_acquire_nolock_ext(conn, 1 /* allow acquire even when closing */); /* event framework now has a reference */ - ns_result_t job_result = ns_add_timeout_job(conn->c_tp, &tv, NS_JOB_TIMER, - ns_handle_closure, conn, NULL); + /* Close the job asynchronously. Why? */ + ns_result_t job_result = ns_add_job(conn->c_tp, NS_JOB_TIMER, ns_handle_closure, conn, &(conn->c_job)); if (job_result != NS_SUCCESS) { if (job_result == NS_SHUTDOWN) { slapi_log_err(SLAPI_LOG_INFO, "ns_connection_post_io_or_closing", "post closure job " @@ -1726,7 +1750,7 @@ ns_connection_post_io_or_closing(Connection *conn) #endif ns_result_t job_result = ns_add_io_timeout_job(conn->c_tp, conn->c_prfd, &tv, NS_JOB_READ | NS_JOB_PRESERVE_FD, - ns_handle_pr_read_ready, conn, NULL); + ns_handle_pr_read_ready, conn, &(conn->c_job)); if (job_result != NS_SUCCESS) { if (job_result == NS_SHUTDOWN) { slapi_log_err(SLAPI_LOG_INFO, "ns_connection_post_io_or_closing", "post I/O job for " @@ -1755,19 +1779,13 @@ ns_handle_pr_read_ready(struct ns_job_t *job) int maxthreads = config_get_maxthreadsperconn(); Connection *c = (Connection *)ns_job_get_data(job); -/* this function must be called from the event loop thread */ -#ifdef DEBUG - PR_ASSERT(0 == NS_JOB_IS_THREAD(ns_job_get_type(job))); -#else - /* This doesn't actually confirm it's in the event loop thread, but it's a start */ - if (NS_JOB_IS_THREAD(ns_job_get_type(job)) != 0) { - slapi_log_err(SLAPI_LOG_ERR, "ns_handle_pr_read_ready", "Attempt to handle read ready outside of event loop thread %" PRIu64 " for fd=%d\n", - c->c_connid, c->c_sd); - return; - } -#endif - PR_EnterMonitor(c->c_mutex); + /* Assert we really have the right job state. */ + PR_ASSERT(job == c->c_job); + + /* On all code paths we remove the job, so set it null now */ + c->c_job = NULL; + slapi_log_err(SLAPI_LOG_CONNS, "ns_handle_pr_read_ready", "activity on conn %" PRIu64 " for fd=%d\n", c->c_connid, c->c_sd); /* if we were called due to some i/o event, see what the state of the socket is */ @@ -1826,6 +1844,7 @@ ns_handle_pr_read_ready(struct ns_job_t *job) slapi_log_err(SLAPI_LOG_CONNS, "ns_handle_pr_read_ready", "queued conn %" PRIu64 " for fd=%d\n", c->c_connid, c->c_sd); } + /* Since we call done on the job, we need to remove it here. */ PR_ExitMonitor(c->c_mutex); ns_job_done(job); return; diff --git a/ldap/servers/slapd/fe.h b/ldap/servers/slapd/fe.h index 4d25a9f..f47bb61 100644 --- a/ldap/servers/slapd/fe.h +++ b/ldap/servers/slapd/fe.h @@ -100,6 +100,7 @@ extern Connection_Table *the_connection_table; /* JCM - Exported from globals.c Connection_Table *connection_table_new(int table_size); void connection_table_free(Connection_Table *ct); void connection_table_abandon_all_operations(Connection_Table *ct); +void connection_table_disconnect_all(Connection_Table *ct); Connection *connection_table_get_connection(Connection_Table *ct, int sd); int connection_table_move_connection_out_of_active_list(Connection_Table *ct, Connection *c); void connection_table_move_connection_on_to_active_list(Connection_Table *ct, Connection *c); diff --git a/ldap/servers/slapd/slap.h b/ldap/servers/slapd/slap.h index 830944f..08754d8 100644 --- a/ldap/servers/slapd/slap.h +++ b/ldap/servers/slapd/slap.h @@ -1644,6 +1644,7 @@ typedef struct conn void *c_io_layer_cb_data; /* callback data */ struct connection_table *c_ct; /* connection table that this connection belongs to */ ns_thrpool_t *c_tp; /* thread pool for this connection */ + struct ns_job_t *c_job; /* If it exists, the current ns_job_t */ int c_ns_close_jobs; /* number of current close jobs */ char *c_ipaddr; /* ip address str - used by monitor */ } Connection;