From 1bf51f8b9bd95e62ebb6f3b0b3bf41baffde0915 Mon Sep 17 00:00:00 2001 From: Thierry bordaz (tbordaz) Date: Feb 23 2015 10:58:17 +0000 Subject: Ticket 47901: After total init, nsds5replicaLastInitStatus can report an erroneous error status (like 'Referral') Bug Description: Total update may fails for several reasons: - consumer ldap server returning an error to the extended op - replication problem (replica not acquired, replication disabled) - connection timeout When the replica is acquired and the total update starts, each entry is sent by send_entry. send_entry returns a conn_Result not a ldap error Fix Description: Change agmt_set_last_init_status to that the 3 errors (ldap, replication, connection) are reported. http://fedorahosted.org/389/ticket/47901 Reviewed by: Ludwig Krispenz, Rich Megginson (Thanks Ludwig, thanks Rich) Platforms tested: F17 Flag Day: no Doc impact: no --- diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h index e2b6209..39d25bb 100644 --- a/ldap/servers/plugins/replication/repl5.h +++ b/ldap/servers/plugins/replication/repl5.h @@ -384,7 +384,7 @@ void agmt_set_update_in_progress (Repl_Agmt *ra, PRBool in_progress); PRBool agmt_get_update_in_progress (const Repl_Agmt *ra); void agmt_set_last_init_start (Repl_Agmt *ra, time_t start_time); void agmt_set_last_init_end (Repl_Agmt *ra, time_t end_time); -void agmt_set_last_init_status (Repl_Agmt *ra, int ldaprc, int replrc, const char *msg); +void agmt_set_last_init_status (Repl_Agmt *ra, int ldaprc, int replrc, int connrc, const char *msg); void agmt_inc_last_update_changecount (Repl_Agmt *ra, ReplicaId rid, int skipped); void agmt_get_changecount_string (Repl_Agmt *ra, char *buf, int bufsize); int agmt_set_replicated_attributes_from_entry(Repl_Agmt *ra, const Slapi_Entry *e); @@ -431,7 +431,9 @@ time_t backoff_step(Backoff_Timer *bt); int backoff_expired(Backoff_Timer *bt, int margin); void backoff_delete(Backoff_Timer **btp); -/* In repl5_connection.c */ +/* In repl5_connection.c + * keep in sync with conn_result2string + */ typedef struct repl_connection Repl_Connection; typedef enum { @@ -456,8 +458,9 @@ typedef enum CONN_NOT_WIN2K3, CONN_SUPPORTS_DS90_REPL, CONN_DOES_NOT_SUPPORT_DS90_REPL -} ConnResult; +} ConnResult; +char *conn_result2string (int result); Repl_Connection *conn_new(Repl_Agmt *agmt); ConnResult conn_connect(Repl_Connection *conn); void conn_disconnect(Repl_Connection *conn); diff --git a/ldap/servers/plugins/replication/repl5_agmt.c b/ldap/servers/plugins/replication/repl5_agmt.c index 7e66a44..d27648e 100644 --- a/ldap/servers/plugins/replication/repl5_agmt.c +++ b/ldap/servers/plugins/replication/repl5_agmt.c @@ -2424,15 +2424,25 @@ agmt_set_last_update_status (Repl_Agmt *ra, int ldaprc, int replrc, const char * } void -agmt_set_last_init_status (Repl_Agmt *ra, int ldaprc, int replrc, const char *message) +agmt_set_last_init_status (Repl_Agmt *ra, int ldaprc, int replrc, int connrc, const char *message) { + char *connmsg = NULL; + char unknown_connrc[100] = {0}; + + connmsg = conn_result2string(connrc); + if (connrc && (connmsg == NULL)) { + /* That was an unknown connection error */ + PR_snprintf(unknown_connrc, sizeof(unknown_connrc), "Unknown connection error (%d)", connrc); + connmsg = unknown_connrc; + } + PR_ASSERT(NULL != ra); if (NULL != ra) { if (ldaprc != LDAP_SUCCESS) { char *replmsg = NULL; - + if ( replrc ) { replmsg = protocol_response2string(replrc); /* Do not mix the unknown replication error with the known ldap one */ @@ -2440,9 +2450,10 @@ agmt_set_last_init_status (Repl_Agmt *ra, int ldaprc, int replrc, const char *me replmsg = NULL; } } - PR_snprintf(ra->last_init_status, STATUS_LEN, "%d %s%sLDAP error: %s%s%s", + PR_snprintf(ra->last_init_status, STATUS_LEN, "%d %s%sLDAP error: %s%s%s%s%s", ldaprc, message?message:"",message?"":" - ", - slapi_err2string(ldaprc), replmsg ? " - " : "", replmsg ? replmsg : ""); + slapi_err2string(ldaprc), replmsg ? " - " : "", replmsg ? replmsg : "", + connrc ? " - " : "", connrc ? connmsg : ""); } /* ldaprc == LDAP_SUCCESS */ else if (replrc != 0) @@ -2476,11 +2487,18 @@ agmt_set_last_init_status (Repl_Agmt *ra, int ldaprc, int replrc, const char *me else { PR_snprintf(ra->last_init_status, STATUS_LEN, - "%d Replication error acquiring replica: %s%s%s", + "%d Replication error acquiring replica: %s%s%s%s%s", replrc, protocol_response2string(replrc), - message?" - ":"",message?message:""); + message?" - ":"",message?message:"", + connrc?" - ":"",connrc?connmsg:""); } } + else if (connrc != CONN_OPERATION_SUCCESS) { + PR_snprintf(ra->last_init_status, STATUS_LEN, + "%d connection error: %s%s%s", + connrc, connmsg, + message?" - ":"",message?message:""); + } else if (message != NULL) /* replrc == NSDS50_REPL_REPLICA_READY == 0 */ { PR_snprintf(ra->last_init_status, STATUS_LEN, diff --git a/ldap/servers/plugins/replication/repl5_agmtlist.c b/ldap/servers/plugins/replication/repl5_agmtlist.c index 4a1ff5d..e414e0b 100644 --- a/ldap/servers/plugins/replication/repl5_agmtlist.c +++ b/ldap/servers/plugins/replication/repl5_agmtlist.c @@ -592,7 +592,7 @@ agmtlist_modify_callback(Slapi_PBlock *pb, Slapi_Entry *entryBefore, Slapi_Entry { if (agmt_initialize_replica(agmt) != 0) { /* The suffix/repl agmt is disabled */ - agmt_set_last_init_status(agmt, 0, NSDS50_REPL_DISABLED, NULL); + agmt_set_last_init_status(agmt, 0, NSDS50_REPL_DISABLED, 0, NULL); if(agmt_is_enabled(agmt)){ PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Suffix is disabled"); } else { diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c index eddcae8..398ff98 100644 --- a/ldap/servers/plugins/replication/repl5_connection.c +++ b/ldap/servers/plugins/replication/repl5_connection.c @@ -147,6 +147,37 @@ static void repl5_debug_timeout_callback(time_t when, void *arg); static void close_connection_internal(Repl_Connection *conn); static void conn_delete_internal(Repl_Connection *conn); + +/* converts consumer's response to a string */ +char * +conn_result2string (int result) +{ + switch (result) + { + case CONN_OPERATION_SUCCESS: return "operation success"; + case CONN_OPERATION_FAILED: return "operation failure"; + case CONN_NOT_CONNECTED: return "not connected"; + case CONN_SUPPORTS_DS5_REPL: return "consumer supports all DS5 extop"; + case CONN_DOES_NOT_SUPPORT_DS5_REPL: return "consumer does not support all DS5 extop"; + case CONN_SCHEMA_UPDATED: return "consumer schema updated"; + case CONN_SCHEMA_NO_UPDATE_NEEDED: return "consumer schema up to date"; + case CONN_LOCAL_ERROR: return "local error"; + case CONN_BUSY: return "consumer is busy"; + case CONN_SSL_NOT_ENABLED: return "NSS not initialized"; + case CONN_TIMEOUT: return "time out"; + case CONN_SUPPORTS_DS71_REPL: return "consumer supports all DS71 extop"; + case CONN_DOES_NOT_SUPPORT_DS71_REPL: return "consumer does not support all DS7.1 extop"; + case CONN_IS_READONLY: return "consumer is read only"; + case CONN_IS_NOT_READONLY: return "consumer is not read only"; + case CONN_SUPPORTS_DIRSYNC: return "consumer supports DIRSYNC control"; + case CONN_DOES_NOT_SUPPORT_DIRSYNC: return "consumer does not support DIRSYNC control"; + case CONN_IS_WIN2K3: return "consumer is W2K3 or after"; + case CONN_NOT_WIN2K3: return "consumer is before W2K3"; + case CONN_SUPPORTS_DS90_REPL: return "consumer supports all DS90 extop"; + case CONN_DOES_NOT_SUPPORT_DS90_REPL: return "consumer does not support all DS90 extop"; + default: return NULL; + } +} /* * Create a new connection object. Returns a pointer to the object, or * NULL if an error occurs. diff --git a/ldap/servers/plugins/replication/repl5_protocol.c b/ldap/servers/plugins/replication/repl5_protocol.c index 0e9668d..e8ec16a 100644 --- a/ldap/servers/plugins/replication/repl5_protocol.c +++ b/ldap/servers/plugins/replication/repl5_protocol.c @@ -244,7 +244,7 @@ prot_initialize_replica(Repl_Protocol *rp) rp->next_state = STATE_PERFORMING_TOTAL_UPDATE; /* Stop the incremental protocol, if running */ rp->prp_incremental->stop(rp->prp_incremental); - if (rp->prp_total) agmt_set_last_init_status(rp->prp_total->agmt, 0, 0, NULL); + if (rp->prp_total) agmt_set_last_init_status(rp->prp_total->agmt, 0, 0, 0, NULL); PR_Unlock(rp->lock); } diff --git a/ldap/servers/plugins/replication/repl5_tot_protocol.c b/ldap/servers/plugins/replication/repl5_tot_protocol.c index adadd44..43b0de5 100644 --- a/ldap/servers/plugins/replication/repl5_tot_protocol.c +++ b/ldap/servers/plugins/replication/repl5_tot_protocol.c @@ -369,7 +369,7 @@ repl5_tot_run(Private_Repl_Protocol *prp) int optype, ldaprc; conn_get_error(prp->conn, &optype, &ldaprc); agmt_set_last_init_status(prp->agmt, ldaprc, - prp->last_acquire_response_code, NULL); + prp->last_acquire_response_code, 0, NULL); goto done; } else if (prp->terminate) @@ -381,7 +381,7 @@ repl5_tot_run(Private_Repl_Protocol *prp) hostname = agmt_get_hostname(prp->agmt); portnum = agmt_get_port(prp->agmt); - agmt_set_last_init_status(prp->agmt, 0, 0, "Total schema update in progress"); + agmt_set_last_init_status(prp->agmt, 0, 0, 0, "Total schema update in progress"); remote_schema_csn = agmt_get_consumer_schema_csn ( prp->agmt ); rc = conn_push_schema(prp->conn, &remote_schema_csn); @@ -396,11 +396,11 @@ repl5_tot_run(Private_Repl_Protocol *prp) "total update session.\n", hostname, portnum); /* But keep going */ - agmt_set_last_init_status(prp->agmt, 0, rc, "Total schema update failed"); + agmt_set_last_init_status(prp->agmt, 0, rc, 0, "Total schema update failed"); } else { - agmt_set_last_init_status(prp->agmt, 0, 0, "Total schema update succeeded"); + agmt_set_last_init_status(prp->agmt, 0, 0, 0, "Total schema update succeeded"); } /* ONREPL - big assumption here is that entries a returned in the id order @@ -409,7 +409,7 @@ repl5_tot_run(Private_Repl_Protocol *prp) properly updated because bulk import at the moment skips orphand entries. */ /* XXXggood above assumption may not be valid if orphaned entry moved???? */ - agmt_set_last_init_status(prp->agmt, 0, 0, "Total update in progress"); + agmt_set_last_init_status(prp->agmt, 0, 0, 0, "Total update in progress"); slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "Beginning total update of replica " "\"%s\".\n", agmt_get_long_name(prp->agmt)); @@ -459,7 +459,9 @@ repl5_tot_run(Private_Repl_Protocol *prp) } /* this search get all the entries from the replicated area including tombstones - and referrals */ + and referrals + Note that cb_data.rc contains values from ConnResult + */ slapi_search_internal_callback_pb (pb, &cb_data /* callback data */, get_result /* result callback */, send_entry /* entry callback */, @@ -474,7 +476,7 @@ repl5_tot_run(Private_Repl_Protocol *prp) if (!prp->repl50consumer) { - if(cb_data.rc == LDAP_SUCCESS){ /* no need to wait if we already failed */ + if(cb_data.rc == CONN_OPERATION_SUCCESS){ /* no need to wait if we already failed */ repl5_tot_waitfor_async_results(&cb_data); } rc = repl5_tot_destroy_async_result_thread(&cb_data); @@ -498,16 +500,16 @@ repl5_tot_run(Private_Repl_Protocol *prp) agmt_update_done(prp->agmt, 1); release_replica(prp); - if (rc != LDAP_SUCCESS) + if (rc != CONN_OPERATION_SUCCESS) { slapi_log_error (SLAPI_LOG_FATAL, repl_plugin_name, "Total update failed for replica \"%s\", " "error (%d)\n", agmt_get_long_name(prp->agmt), rc); - agmt_set_last_init_status(prp->agmt, rc, 0, "Total update aborted"); + agmt_set_last_init_status(prp->agmt, 0, 0, rc, "Total update aborted"); } else { slapi_log_error (SLAPI_LOG_FATAL, repl_plugin_name, "Finished total update of replica " "\"%s\". Sent %lu entries.\n", agmt_get_long_name(prp->agmt), cb_data.num_entries); - agmt_set_last_init_status(prp->agmt, 0, 0, "Total update succeeded"); + agmt_set_last_init_status(prp->agmt, 0, 0, 0, "Total update succeeded"); } done: @@ -825,12 +827,14 @@ int send_entry (Slapi_Entry *e, void *cb_data) if (CONN_NOT_CONNECTED == rc) { ((callback_data*)cb_data)->rc = -2; retval = -1; - } else if (CONN_OPERATION_SUCCESS == rc) { - retval = 0; } else { - ((callback_data*)cb_data)->rc = rc; - retval = -1; - } + ((callback_data*) cb_data)->rc = rc; + if (CONN_OPERATION_SUCCESS == rc) { + retval = 0; + } else { + retval = -1; + } + } error: return retval; } diff --git a/ldap/servers/plugins/replication/windows_tot_protocol.c b/ldap/servers/plugins/replication/windows_tot_protocol.c index c81974b..f739944 100644 --- a/ldap/servers/plugins/replication/windows_tot_protocol.c +++ b/ldap/servers/plugins/replication/windows_tot_protocol.c @@ -182,7 +182,7 @@ windows_tot_run(Private_Repl_Protocol *prp) int optype, ldaprc; windows_conn_get_error(prp->conn, &optype, &ldaprc); agmt_set_last_init_status(prp->agmt, ldaprc, - prp->last_acquire_response_code, NULL); + prp->last_acquire_response_code, 0, NULL); goto done; } else if (prp->terminate) @@ -192,9 +192,9 @@ windows_tot_run(Private_Repl_Protocol *prp) goto done; } - agmt_set_last_init_status(prp->agmt, 0, 0, "Total schema update in progress"); + agmt_set_last_init_status(prp->agmt, 0, 0, 0, "Total schema update in progress"); - agmt_set_last_init_status(prp->agmt, 0, 0, "Total update in progress"); + agmt_set_last_init_status(prp->agmt, 0, 0, 0, "Total update in progress"); agmt_set_update_in_progress(prp->agmt, PR_TRUE); @@ -253,15 +253,15 @@ windows_tot_run(Private_Repl_Protocol *prp) rc = cb_data.rc; windows_release_replica(prp); - if (rc != LDAP_SUCCESS) { + if (rc != CONN_OPERATION_SUCCESS) { slapi_log_error(SLAPI_LOG_REPL, windows_repl_plugin_name, "%s: windows_tot_run: " "failed to obtain data to send to the consumer; LDAP error - %d\n", agmt_get_long_name(prp->agmt), rc); - agmt_set_last_init_status(prp->agmt, rc, 0, "Total update aborted"); + agmt_set_last_init_status(prp->agmt, 0, 0, rc, "Total update aborted"); } else { slapi_log_error(SLAPI_LOG_FATAL, windows_repl_plugin_name, "Finished total update of replica " "\"%s\". Sent %lu entries.\n", agmt_get_long_name(prp->agmt), cb_data.num_entries); - agmt_set_last_init_status(prp->agmt, 0, 0, "Total update succeeded"); + agmt_set_last_init_status(prp->agmt, 0, 0, 0, "Total update succeeded"); /* Now update our consumer RUV for this agreement. * This ensures that future incrememental updates work. */