From bbd33e46aa6194c1086939f7cf8538c067186455 Mon Sep 17 00:00:00 2001 From: Jan Zeleny Date: Aug 01 2012 14:19:41 +0000 Subject: Primary server support: basic support in failover code Now there are two list of servers for each service. If currently selected server is only backup, then an event will be scheduled which tries to get connection to one of primary servers and if it succeeds, it starts using this server instead of the one which is currently connected to. --- diff --git a/src/providers/data_provider_fo.c b/src/providers/data_provider_fo.c index 51d6ae2..1c03e31 100644 --- a/src/providers/data_provider_fo.c +++ b/src/providers/data_provider_fo.c @@ -54,6 +54,7 @@ struct be_failover_ctx { struct resolv_ctx *resolv; struct be_svc_data *svcs; + struct tevent_timer *primary_server_handler; }; static const char *proto_table[] = { FO_PROTO_TCP, FO_PROTO_UDP, NULL }; @@ -315,7 +316,8 @@ int be_fo_get_server_count(struct be_ctx *ctx, const char *service_name) } int be_fo_add_server(struct be_ctx *ctx, const char *service_name, - const char *server, int port, void *user_data) + const char *server, int port, void *user_data, + bool primary) { struct be_svc_data *svc; int ret; @@ -325,7 +327,8 @@ int be_fo_add_server(struct be_ctx *ctx, const char *service_name, return ENOENT; } - ret = fo_add_server(svc->fo_service, server, port, user_data); + ret = fo_add_server(svc->fo_service, server, port, + user_data, primary); if (ret && ret != EEXIST) { DEBUG(1, ("Failed to add server to failover service\n")); return ret; @@ -345,6 +348,138 @@ struct be_resolve_server_state { bool first_try; }; +struct be_primary_server_ctx { + struct be_ctx *bctx; + struct tevent_context *ev; + + struct be_svc_data *svc; + unsigned long timeout; + + int attempts; +}; + +errno_t be_resolve_server_process(struct tevent_req *subreq, + struct be_resolve_server_state *state, + struct tevent_req **new_subreq); +static void be_primary_server_done(struct tevent_req *subreq); +static errno_t +be_primary_server_timeout_activate(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct be_ctx *bctx, + struct be_svc_data *svc, + const unsigned long timeout_seconds); + +static void +be_primary_server_timeout(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval tv, void *pvt) +{ + struct be_primary_server_ctx *ctx = talloc_get_type(pvt, struct be_primary_server_ctx); + struct tevent_req *subreq; + + ctx->bctx->be_fo->primary_server_handler = NULL; + + DEBUG(SSSDBG_TRACE_FUNC, ("Looking for primary server!\n")); + subreq = fo_resolve_service_send(ctx->bctx, ctx->ev, + ctx->bctx->be_fo->resolv, + ctx->bctx->be_fo->fo_ctx, + ctx->svc->fo_service); + if (subreq == NULL) { + return; + } + tevent_req_set_callback(subreq, be_primary_server_done, ctx); +} + +static void be_primary_server_done(struct tevent_req *subreq) +{ + errno_t ret; + struct be_primary_server_ctx *ctx; + struct be_resolve_server_state *resolve_state; + struct tevent_req *new_subreq; + + ctx = tevent_req_callback_data(subreq, struct be_primary_server_ctx); + + resolve_state = talloc_zero(ctx->bctx, struct be_resolve_server_state); + if (resolve_state == NULL) { + DEBUG(SSSDBG_CRIT_FAILURE, ("talloc_zero() failed\n")); + return; + } + + resolve_state->attempts = ctx->attempts; + resolve_state->ctx = ctx->bctx; + resolve_state->ev = ctx->ev; + resolve_state->first_try = true; + resolve_state->srv = NULL; + resolve_state->svc = ctx->svc; + + ret = be_resolve_server_process(subreq, resolve_state, &new_subreq); + talloc_free(subreq); + if (ret == EAGAIN) { + ctx->attempts++; + tevent_req_set_callback(new_subreq, be_primary_server_done, ctx); + return; + } else if (ret == EIO || (ret == EOK && + !fo_is_server_primary(resolve_state->srv))) { + + /* Schedule another lookup + * (either no server could be found or it was not primary) + */ + ret = be_primary_server_timeout_activate(ctx->bctx, ctx->ev, ctx->bctx, + ctx->svc, ctx->timeout); + if (ret != EOK) { + DEBUG(SSSDBG_MINOR_FAILURE, ("Could not schedule primary server lookup\n")); + } + } else if (ret == EOK) { + be_run_reconnect_cb(ctx->bctx); + } + talloc_zfree(ctx); + + /* If an error occurred just end the routine */ +} + +static errno_t +be_primary_server_timeout_activate(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct be_ctx *bctx, + struct be_svc_data *svc, + const unsigned long timeout_seconds) +{ + struct timeval tv; + struct be_primary_server_ctx *ctx; + struct be_failover_ctx *fo_ctx = bctx->be_fo; + + if (fo_ctx->primary_server_handler != NULL) { + DEBUG(SSSDBG_TRACE_FUNC, ("The primary server reconnection " + "is already scheduled\n")); + return EOK; + } + + ctx = talloc_zero(mem_ctx, struct be_primary_server_ctx); + if (ctx == NULL) { + return ENOMEM; + } + + ctx->bctx = bctx; + ctx->ev = ev; + ctx->svc = svc; + ctx->timeout = timeout_seconds; + + tv = tevent_timeval_current(); + tv = tevent_timeval_add(&tv, timeout_seconds, 0); + fo_ctx->primary_server_handler = tevent_add_timer(ev, bctx, tv, + be_primary_server_timeout, ctx); + if (fo_ctx->primary_server_handler == NULL) { + DEBUG(SSSDBG_CRIT_FAILURE, ("tevent_add_timer failed.\n")); + talloc_free(ctx); + return ENOMEM; + } + + DEBUG(SSSDBG_TRACE_INTERNAL, ("Primary server reactivation timeout set " + "to %lu seconds\n", timeout_seconds)); + return EOK; +} + + static void be_resolve_server_done(struct tevent_req *subreq); struct tevent_req *be_resolve_server_send(TALLOC_CTX *memctx, @@ -389,35 +524,66 @@ struct tevent_req *be_resolve_server_send(TALLOC_CTX *memctx, static void be_resolve_server_done(struct tevent_req *subreq) { + struct tevent_req *new_subreq; struct tevent_req *req = tevent_req_callback_data(subreq, struct tevent_req); struct be_resolve_server_state *state = tevent_req_data(req, struct be_resolve_server_state); - struct be_svc_callback *callback; int ret; + + ret = be_resolve_server_process(subreq, state, &new_subreq); + talloc_zfree(subreq); + if (ret == EAGAIN) { + tevent_req_set_callback(new_subreq, be_resolve_server_done, req); + return; + } else if (ret != EOK) { + goto fail; + } + + if (!fo_is_server_primary(state->srv)) { + /* FIXME: make the timeout configurable */ + ret = be_primary_server_timeout_activate(state->ctx, state->ev, + state->ctx, state->svc, + 30); + if (ret != EOK) { + goto fail; + } + } + + tevent_req_done(req); + return; + +fail: + DEBUG(SSSDBG_TRACE_LIBS, ("Server resolution failed: %d\n", ret)); + state->svc->first_resolved = NULL; + tevent_req_error(req, ret); +} + +errno_t be_resolve_server_process(struct tevent_req *subreq, + struct be_resolve_server_state *state, + struct tevent_req **new_subreq) +{ + errno_t ret; time_t srv_status_change; + struct be_svc_callback *callback; ret = fo_resolve_service_recv(subreq, &state->srv); - talloc_zfree(subreq); switch (ret) { case EOK: if (!state->srv) { - ret = EFAULT; - goto fail; + return EFAULT; } break; case ENOENT: /* all servers have been tried and none * was found good, go offline */ - ret = EIO; - goto fail; + return EIO; default: /* mark server as bad and retry */ if (!state->srv) { - ret = EFAULT; - goto fail; + return EFAULT; } DEBUG(SSSDBG_MINOR_FAILURE, ("Couldn't resolve server (%s), resolver returned (%d)\n", @@ -425,9 +591,8 @@ static void be_resolve_server_done(struct tevent_req *subreq) state->attempts++; if (state->attempts >= 10) { - DEBUG(2, ("Failed to find a server after 10 attempts\n")); - ret = EIO; - goto fail; + DEBUG(SSSDBG_OP_FAILURE, ("Failed to find a server after 10 attempts\n")); + return EIO; } /* now try next one */ @@ -437,12 +602,14 @@ static void be_resolve_server_done(struct tevent_req *subreq) state->ctx->be_fo->fo_ctx, state->svc->fo_service); if (!subreq) { - ret = ENOMEM; - goto fail; + return ENOMEM; } - tevent_req_set_callback(subreq, be_resolve_server_done, req); - return; + if (new_subreq) { + *new_subreq = subreq; + } + + return EAGAIN; } /* all fine we got the server */ @@ -452,8 +619,7 @@ static void be_resolve_server_done(struct tevent_req *subreq) } else if (state->svc->first_resolved == state->srv) { DEBUG(SSSDBG_OP_FAILURE, ("The fail over cycled through all available servers\n")); - ret = ENOENT; - goto fail; + return ENOENT; } if (DEBUG_IS_SET(SSSDBG_FUNC_DATA) && fo_get_server_name(state->srv)) { @@ -464,8 +630,7 @@ static void be_resolve_server_done(struct tevent_req *subreq) DEBUG(SSSDBG_CRIT_FAILURE, ("FATAL: No hostent available for server (%s)\n", fo_get_server_str_name(state->srv))); - ret = EFAULT; - goto fail; + return EFAULT; } inet_ntop(srvaddr->family, srvaddr->addr_list[0]->ipaddr, @@ -492,13 +657,7 @@ static void be_resolve_server_done(struct tevent_req *subreq) } } - tevent_req_done(req); - return; - -fail: - DEBUG(SSSDBG_TRACE_LIBS, ("Server resolution failed: %d\n", ret)); - state->svc->first_resolved = NULL; - tevent_req_error(req, ret); + return EOK; } int be_resolve_server_recv(struct tevent_req *req, struct fo_server **srv) diff --git a/src/providers/dp_backend.h b/src/providers/dp_backend.h index 41dd3f6..8e897a1 100644 --- a/src/providers/dp_backend.h +++ b/src/providers/dp_backend.h @@ -231,7 +231,8 @@ int be_fo_add_srv_server(struct be_ctx *ctx, enum be_fo_protocol proto, bool proto_fallback, void *user_data); int be_fo_add_server(struct be_ctx *ctx, const char *service_name, - const char *server, int port, void *user_data); + const char *server, int port, void *user_data, + bool primary); struct tevent_req *be_resolve_server_send(TALLOC_CTX *memctx, struct tevent_context *ev, diff --git a/src/providers/fail_over.c b/src/providers/fail_over.c index 5ef1436..a16ab33 100644 --- a/src/providers/fail_over.c +++ b/src/providers/fail_over.c @@ -72,6 +72,7 @@ struct fo_server { struct fo_server *prev; struct fo_server *next; + bool primary; void *user_data; int port; int port_status; @@ -577,7 +578,7 @@ fo_add_srv_server(struct fo_service *service, const char *srv, static struct fo_server * create_fo_server(struct fo_service *service, const char *name, - int port, void *user_data) + int port, void *user_data, bool primary) { struct fo_server *server; int ret; @@ -590,6 +591,7 @@ create_fo_server(struct fo_service *service, const char *name, server->user_data = user_data; server->service = service; server->port_status = DEFAULT_PORT_STATUS; + server->primary = primary; if (name != NULL) { ret = get_server_common(server, service->ctx, name, &server->common); @@ -621,26 +623,42 @@ fo_get_server_count(struct fo_service *service) return count; } +static bool fo_server_match(struct fo_server *server, + const char *name, + int port, + void *user_data) +{ + if (server->port != port || server->user_data != user_data) { + return false; + } + + if (name == NULL && server->common == NULL) { + return true; + } + + if (name != NULL && server->common != NULL) { + if (!strcasecmp(name, server->common->name)) + return true; + } + + return false; +} + int fo_add_server(struct fo_service *service, const char *name, int port, - void *user_data) + void *user_data, bool primary) { struct fo_server *server; DEBUG(3, ("Adding new server '%s', to service '%s'\n", name ? name : "(no name)", service->name)); DLIST_FOR_EACH(server, service->server_list) { - if (server->port != port || server->user_data != user_data) - continue; - if (name == NULL && server->common == NULL) { + if (fo_server_match(server, name, port, user_data)) { return EEXIST; - } else if (name != NULL && server->common != NULL) { - if (!strcasecmp(name, server->common->name)) - return EEXIST; } } - server = create_fo_server(service, name, port, user_data); + server = create_fo_server(service, name, port, user_data, primary); if (!server) { return ENOMEM; } @@ -658,7 +676,7 @@ get_first_server_entity(struct fo_service *service, struct fo_server **_server) /* If we already have a working server, use that one. */ server = service->active_server; if (server != NULL) { - if (service_works(server)) { + if (service_works(server) && fo_is_server_primary(server)) { goto done; } service->active_server = NULL; @@ -668,17 +686,27 @@ get_first_server_entity(struct fo_service *service, struct fo_server **_server) * Otherwise iterate through the server list. */ - /* First, try servers after the last one we tried. */ - if (service->last_tried_server != NULL) { + + /* First, try primary servers after the last one we tried. + * (only if the last one was primary as well) + */ + if (service->last_tried_server != NULL && + service->last_tried_server->primary) { DLIST_FOR_EACH(server, service->last_tried_server->next) { + /* Go only through primary servers */ + if (!server->primary) continue; + if (service_works(server)) { goto done; } } } - /* If none were found, try at the start. */ + /* If none were found, try at the start, primary first */ DLIST_FOR_EACH(server, service->server_list) { + /* First iterate only over primary servers */ + if (!server->primary) continue; + if (service_works(server)) { goto done; } @@ -687,6 +715,15 @@ get_first_server_entity(struct fo_service *service, struct fo_server **_server) } } + DLIST_FOR_EACH(server, service->server_list) { + /* Now iterate only over backup servers */ + if (server->primary) continue; + + if (service_works(server)) { + goto done; + } + } + service->last_tried_server = NULL; return ENOENT; @@ -727,6 +764,8 @@ set_lookup_hook(struct fo_server *server, struct tevent_req *req) return EOK; } + + /******************************************************************* * Get server to connect to. * *******************************************************************/ @@ -740,7 +779,6 @@ struct resolve_service_state { struct fo_ctx *fo_ctx; }; - static errno_t fo_resolve_service_activate_timeout(struct tevent_req *req, struct tevent_context *ev, const unsigned long timeout_seconds); static void fo_resolve_service_cont(struct tevent_req *subreq); @@ -1171,7 +1209,8 @@ resolve_srv_done(struct tevent_req *subreq) for (reply = reply_list; reply; reply = reply->next) { server = create_fo_server(state->service, reply->host, - reply->port, state->meta->user_data); + reply->port, state->meta->user_data, + true); if (!server) { ret = ENOMEM; goto fail; @@ -1451,6 +1490,12 @@ fo_get_server_hostent(struct fo_server *server) return server->common->rhostent; } +bool +fo_is_server_primary(struct fo_server *server) +{ + return server->primary; +} + time_t fo_get_server_hostname_last_change(struct fo_server *server) { diff --git a/src/providers/fail_over.h b/src/providers/fail_over.h index 8fbbe25..b69e8a5 100644 --- a/src/providers/fail_over.h +++ b/src/providers/fail_over.h @@ -116,9 +116,8 @@ int fo_get_server_count(struct fo_service *service); * connection. If 'name' is NULL, no server resolution will be done. */ int fo_add_server(struct fo_service *service, - const char *name, - int port, - void *user_data); + const char *name, int port, + void *user_data, bool primary); int fo_add_srv_server(struct fo_service *service, @@ -180,6 +179,8 @@ const char *fo_get_server_str_name(struct fo_server *server); struct resolv_hostent *fo_get_server_hostent(struct fo_server *server); +bool fo_is_server_primary(struct fo_server *server); + time_t fo_get_server_hostname_last_change(struct fo_server *server); int fo_is_srv_lookup(struct fo_server *s); diff --git a/src/providers/ipa/ipa_common.c b/src/providers/ipa/ipa_common.c index 148a8b7..98a7c58 100644 --- a/src/providers/ipa/ipa_common.c +++ b/src/providers/ipa/ipa_common.c @@ -892,7 +892,7 @@ int ipa_service_init(TALLOC_CTX *memctx, struct be_ctx *ctx, continue; } - ret = be_fo_add_server(ctx, "IPA", list[i], 0, NULL); + ret = be_fo_add_server(ctx, "IPA", list[i], 0, NULL, true); if (ret && ret != EEXIST) { DEBUG(0, ("Failed to add server\n")); goto done; diff --git a/src/providers/krb5/krb5_common.c b/src/providers/krb5/krb5_common.c index e068270..19fbd76 100644 --- a/src/providers/krb5/krb5_common.c +++ b/src/providers/krb5/krb5_common.c @@ -586,7 +586,7 @@ int krb5_service_init(TALLOC_CTX *memctx, struct be_ctx *ctx, } ret = be_fo_add_server(ctx, service_name, server_spec, (int) port, - list[i]); + list[i], true); if (ret && ret != EEXIST) { DEBUG(0, ("Failed to add server\n")); goto done; diff --git a/src/providers/ldap/ldap_common.c b/src/providers/ldap/ldap_common.c index 29aa029..24c6e12 100644 --- a/src/providers/ldap/ldap_common.c +++ b/src/providers/ldap/ldap_common.c @@ -1197,8 +1197,8 @@ int sdap_service_init(TALLOC_CTX *memctx, struct be_ctx *ctx, talloc_steal(service, list[i]); - ret = be_fo_add_server(ctx, service->name, - lud->lud_host, lud->lud_port, list[i]); + ret = be_fo_add_server(ctx, service->name, lud->lud_host, + lud->lud_port, list[i], true); ldap_free_urldesc(lud); if (ret) { goto done; diff --git a/src/tests/fail_over-tests.c b/src/tests/fail_over-tests.c index 8d6bdd3..6f4843c 100644 --- a/src/tests/fail_over-tests.c +++ b/src/tests/fail_over-tests.c @@ -230,14 +230,15 @@ START_TEST(test_fo_resolve_service) fail_if(fo_new_service(ctx->fo_ctx, "ntp", &service[2]) != EOK); /* Add servers. */ - fail_if(fo_add_server(service[0], "localhost", 20, NULL) != EOK); - fail_if(fo_add_server(service[0], "127.0.0.1", 80, NULL) != EOK); + fail_if(fo_add_server(service[0], "localhost", 20, NULL, true) != EOK); + fail_if(fo_add_server(service[0], "127.0.0.1", 80, NULL, false) != EOK); - fail_if(fo_add_server(service[1], "localhost", 30, NULL) != EOK); - fail_if(fo_add_server(service[1], "127.0.0.1", 389, NULL) != EOK); - fail_if(fo_add_server(service[1], "127.0.0.1", 389, NULL) != EEXIST); + fail_if(fo_add_server(service[1], "localhost", 30, NULL, false) != EOK); + fail_if(fo_add_server(service[1], "127.0.0.1", 389, NULL, true) != EOK); + fail_if(fo_add_server(service[1], "127.0.0.1", 389, NULL, true) != EEXIST); + fail_if(fo_add_server(service[1], "127.0.0.1", 389, NULL, false) != EEXIST); - fail_if(fo_add_server(service[2], NULL, 123, NULL) != EOK); + fail_if(fo_add_server(service[2], NULL, 123, NULL, true) != EOK); /* Make requests. */ get_request(ctx, service[0], EOK, 20, PORT_WORKING, -1);