From c28af902d5af09f1e171d26783edfe2f211dd4c2 Mon Sep 17 00:00:00 2001 From: Mark Reynolds Date: Apr 21 2015 21:11:49 +0000 Subject: Ticket 48151 - Improve CleanAllRUV logging Bug Description: It is very difficult to troubleshoot cleanAllRUV issues when there are multiple clean tasks running. Fix Description: Add the replica ID to each logging statement. https://fedorahosted.org/389/ticket/48151 Reviewed by: rmeggins(Thanks!) (cherry picked from commit 33330c355ace8f3d351ea1db94cf2e8faf12f19f) --- diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h index a7da266..7f5d693 100644 --- a/ldap/servers/plugins/replication/repl5.h +++ b/ldap/servers/plugins/replication/repl5.h @@ -697,7 +697,7 @@ int is_task_aborted(ReplicaId rid); void delete_aborted_rid(Replica *replica, ReplicaId rid, char *repl_root, int skip); int is_pre_cleaned_rid(ReplicaId rid); void set_cleaned_rid(ReplicaId rid); -void cleanruv_log(Slapi_Task *task, char *task_type, char *fmt, ...); +void cleanruv_log(Slapi_Task *task, int rid, char *task_type, char *fmt, ...); char * replica_cleanallruv_get_local_maxcsn(ReplicaId rid, char *base_dn); #define CLEANRIDSIZ 4 /* maximum number for concurrent CLEANALLRUV tasks */ diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c index 1570ba7..39783b2 100644 --- a/ldap/servers/plugins/replication/repl5_replica_config.c +++ b/ldap/servers/plugins/replication/repl5_replica_config.c @@ -1518,27 +1518,27 @@ replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter, /* * Get our task settings */ - if ((base_dn = fetch_attr(e, "replica-base-dn", 0)) == NULL){ - PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing replica-base-dn attribute"); - cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext); + if ((rid_str = fetch_attr(e, "replica-id", 0)) == NULL){ + PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing replica-id attribute"); + cleanruv_log(task, -1, CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OBJECT_CLASS_VIOLATION; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; } - if ((rid_str = fetch_attr(e, "replica-id", 0)) == NULL){ - PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing replica-id attribute"); - cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext); + rid = atoi(rid_str); + if ((base_dn = fetch_attr(e, "replica-base-dn", 0)) == NULL){ + PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing replica-base-dn attribute"); + cleanruv_log(task, (int)rid, CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OBJECT_CLASS_VIOLATION; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; } - if ((force_cleaning = fetch_attr(e, "replica-force-cleaning", 0)) != NULL){ if(strcasecmp(force_cleaning,"yes") != 0 && strcasecmp(force_cleaning,"no") != 0){ PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Invalid value for replica-force-cleaning " "(%s). Value must be \"yes\" or \"no\" for task - (%s)", force_cleaning, slapi_sdn_get_dn(task_dn)); - cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext); + cleanruv_log(task, (int)rid, CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OPERATIONS_ERROR; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; @@ -1549,11 +1549,10 @@ replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter, /* * Check the rid */ - rid = atoi(rid_str); if (rid <= 0 || rid >= READ_ONLY_REPLICA_ID){ PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Invalid replica id (%d) for task - (%s)", rid, slapi_sdn_get_dn(task_dn)); - cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext); + cleanruv_log(task, rid, CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OPERATIONS_ERROR; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; @@ -1561,7 +1560,7 @@ replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter, if(is_cleaned_rid(rid)){ /* we are already cleaning this rid */ PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Replica id (%d) is already being cleaned", rid); - cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext); + cleanruv_log(task, rid, CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_UNWILLING_TO_PERFORM; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; @@ -1572,7 +1571,7 @@ replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter, dn = slapi_sdn_new_dn_byval(base_dn); if((r = replica_get_replica_from_dn(dn)) == NULL){ PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Could not find replica from dn(%s)",slapi_sdn_get_dn(dn)); - cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext); + cleanruv_log(task, rid, CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OPERATIONS_ERROR; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; @@ -1583,7 +1582,7 @@ replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter, out: if(rc){ - cleanruv_log(task, CLEANALLRUV_ID, "Task failed...(%d)", rc); + cleanruv_log(task, rid, CLEANALLRUV_ID, "Task failed...(%d)", rc); slapi_task_finish(task, *returncode); } else { rc = SLAPI_DSE_CALLBACK_OK; @@ -1615,11 +1614,11 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c char *basedn = NULL; int rc = 0; - cleanruv_log(pre_task, CLEANALLRUV_ID,"Initiating CleanAllRUV Task..."); + cleanruv_log(pre_task, rid, CLEANALLRUV_ID,"Initiating CleanAllRUV Task..."); if(get_cleanruv_task_count() >= CLEANRIDSIZ){ /* we are already running the maximum number of tasks */ - cleanruv_log(pre_task, CLEANALLRUV_ID, + cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Exceeded maximum number of active CLEANALLRUV tasks(%d)",CLEANRIDSIZ); return LDAP_UNWILLING_TO_PERFORM; } @@ -1629,7 +1628,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c if(r){ replica = (Replica*)object_get_data (r); } else { - cleanruv_log(pre_task, CLEANALLRUV_ID, "Replica object is NULL, aborting task"); + cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Replica object is NULL, aborting task"); return -1; } /* @@ -1637,7 +1636,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c */ if(replica_get_type(replica) == REPLICA_TYPE_READONLY){ /* this is a consumer, send error */ - cleanruv_log(pre_task, CLEANALLRUV_ID, "Failed to clean rid (%d), task can not be run on a consumer",rid); + cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Failed to clean rid (%d), task can not be run on a consumer",rid); if(task){ rc = -1; slapi_task_finish(task, rc); @@ -1647,7 +1646,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c /* * Grab the max csn of the deleted replica */ - cleanruv_log(pre_task, CLEANALLRUV_ID, "Retrieving maxcsn..."); + cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Retrieving maxcsn..."); basedn = (char *)slapi_sdn_get_dn(replica_get_root(replica)); maxcsn = replica_cleanallruv_find_maxcsn(replica, rid, basedn); if(maxcsn == NULL || csn_get_replicaid(maxcsn) == 0){ @@ -1660,7 +1659,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c csn_init_by_string(maxcsn, ""); } csn_as_string(maxcsn, PR_FALSE, csnstr); - cleanruv_log(pre_task, CLEANALLRUV_ID, "Found maxcsn (%s)",csnstr); + cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Found maxcsn (%s)",csnstr); /* * Create payload */ @@ -1669,7 +1668,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c slapi_ch_free_string(&ridstr); if(payload == NULL){ - cleanruv_log(pre_task, CLEANALLRUV_ID, "Failed to create extended op payload, aborting task"); + cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Failed to create extended op payload, aborting task"); rc = -1; goto fail; } @@ -1679,7 +1678,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c */ data = (cleanruv_data*)slapi_ch_calloc(1, sizeof(cleanruv_data)); if (data == NULL) { - cleanruv_log(pre_task, CLEANALLRUV_ID, "Failed to allocate cleanruv_data. Aborting task."); + cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Failed to allocate cleanruv_data. Aborting task."); rc = -1; goto fail; } @@ -1706,7 +1705,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c } fail: - cleanruv_log(pre_task, CLEANALLRUV_ID, "Failed to clean rid (%d)",rid); + cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Failed to clean rid (%d)", rid); if(task){ slapi_task_finish(task, rc); } @@ -1779,9 +1778,9 @@ replica_cleanallruv_thread(void *arg) PR_Unlock( notify_lock ); data->repl_obj = replica_get_replica_from_dn(data->sdn); if(data->repl_obj == NULL){ - cleanruv_log(data->task, CLEANALLRUV_ID, "Unable to retrieve repl object from dn(%s).", data->sdn); - aborted = 1; - goto done; + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Unable to retrieve repl object from dn(%s).", data->sdn); + aborted = 1; + goto done; } data->replica = (Replica*)object_get_data(data->repl_obj); free_obj = 1; @@ -1793,9 +1792,9 @@ replica_cleanallruv_thread(void *arg) } /* verify we have set our repl objects */ if(data->repl_obj == NULL || data->replica == NULL){ - cleanruv_log(data->task, CLEANALLRUV_ID, "Unable to set the replica objects."); - aborted = 1; - goto done; + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Unable to set the replica objects."); + aborted = 1; + goto done; } if(data->repl_root == NULL){ /* we must have resumed from start up, fill in the repl root */ @@ -1815,11 +1814,11 @@ replica_cleanallruv_thread(void *arg) * Add the cleanallruv task to the repl config - so we can handle restarts */ add_cleaned_rid(data->rid, data->replica, csnstr, data->force); /* marks config that we started cleaning a rid */ - cleanruv_log(data->task, CLEANALLRUV_ID, "Cleaning rid (%d)...", data->rid); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Cleaning rid (%d)...", data->rid); /* * First, wait for the maxcsn to be covered */ - cleanruv_log(data->task, CLEANALLRUV_ID, "Waiting to process all the updates from the deleted replica..."); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Waiting to process all the updates from the deleted replica..."); ruv_obj = replica_get_ruv(data->replica); ruv = object_get_data (ruv_obj); while(data->maxcsn && !is_task_aborted(data->rid) && !is_cleaned_rid(data->rid) && !slapi_is_shutting_down()){ @@ -1837,7 +1836,7 @@ replica_cleanallruv_thread(void *arg) * * Even if we are forcing the cleaning, the replicas still need to be up */ - cleanruv_log(data->task, CLEANALLRUV_ID,"Waiting for all the replicas to be online..."); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Waiting for all the replicas to be online..."); if(check_agmts_are_alive(data->replica, data->rid, data->task)){ /* error, aborted or shutdown */ aborted = 1; @@ -1846,7 +1845,7 @@ replica_cleanallruv_thread(void *arg) /* * Make sure all the replicas have seen the max csn */ - cleanruv_log(data->task, CLEANALLRUV_ID,"Waiting for all the replicas to receive all the deleted replica updates..."); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Waiting for all the replicas to receive all the deleted replica updates..."); if(strcasecmp(data->force,"no") == 0 && check_agmts_are_caught_up(data, csnstr)){ /* error, aborted or shutdown */ aborted = 1; @@ -1860,7 +1859,7 @@ replica_cleanallruv_thread(void *arg) /* * Now send the cleanruv extended op to all the agreements */ - cleanruv_log(data->task, CLEANALLRUV_ID, "Sending cleanAllRUV task to all the replicas..."); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Sending cleanAllRUV task to all the replicas..."); while(agmt_not_notified && !is_task_aborted(data->rid) && !slapi_is_shutting_down()){ agmt_obj = agmtlist_get_first_agreement_for_replica (data->replica); if(agmt_obj == NULL){ @@ -1878,7 +1877,7 @@ replica_cleanallruv_thread(void *arg) agmt_not_notified = 0; } else { agmt_not_notified = 1; - cleanruv_log(data->task, CLEANALLRUV_ID, "Failed to send task to replica (%s)",agmt_get_long_name(agmt)); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Failed to send task to replica (%s)",agmt_get_long_name(agmt)); break; } agmt_obj = agmtlist_get_next_agreement_for_replica (data->replica, agmt_obj); @@ -1894,7 +1893,7 @@ replica_cleanallruv_thread(void *arg) /* * need to sleep between passes */ - cleanruv_log(data->task, CLEANALLRUV_ID, "Not all replicas have received the " + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Not all replicas have received the " "cleanallruv extended op, retrying in %d seconds",interval); PR_Lock( notify_lock ); PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); @@ -1909,12 +1908,13 @@ replica_cleanallruv_thread(void *arg) /* * Run the CLEANRUV task */ - cleanruv_log(data->task, CLEANALLRUV_ID,"Cleaning local ruv's..."); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Cleaning local ruv's..."); replica_execute_cleanruv_task (data->repl_obj, data->rid, returntext); /* * Wait for all the replicas to be cleaned */ - cleanruv_log(data->task, CLEANALLRUV_ID,"Waiting for all the replicas to be cleaned..."); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, + "Waiting for all the replicas to be cleaned..."); interval = 10; while(found_dirty_rid && !is_task_aborted(data->rid) && !slapi_is_shutting_down()){ @@ -1933,7 +1933,8 @@ replica_cleanallruv_thread(void *arg) found_dirty_rid = 0; } else { found_dirty_rid = 1; - cleanruv_log(data->task, CLEANALLRUV_ID,"Replica is not cleaned yet (%s)",agmt_get_long_name(agmt)); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Replica is not cleaned yet (%s)", + agmt_get_long_name(agmt)); break; } agmt_obj = agmtlist_get_next_agreement_for_replica (data->replica, agmt_obj); @@ -1949,7 +1950,7 @@ replica_cleanallruv_thread(void *arg) /* * need to sleep between passes */ - cleanruv_log(data->task, CLEANALLRUV_ID, "Replicas have not been cleaned yet, " + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Replicas have not been cleaned yet, " "retrying in %d seconds", interval); PR_Lock( notify_lock ); PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); @@ -1971,16 +1972,16 @@ done: delete_cleaned_rid_config(data); /* make sure all the replicas have been "pre_cleaned" before finishing */ check_replicas_are_done_cleaning(data); - cleanruv_log(data->task, CLEANALLRUV_ID, "Successfully cleaned rid(%d).", data->rid); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Successfully cleaned rid(%d).", data->rid); remove_cleaned_rid(data->rid); } else { /* * Shutdown or abort */ if(!is_task_aborted(data->rid)){ - cleanruv_log(data->task, CLEANALLRUV_ID,"Server shutting down. Process will resume at server startup"); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Server shutting down. Process will resume at server startup"); } else { - cleanruv_log(data->task, CLEANALLRUV_ID,"Task aborted for rid(%d).",data->rid); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Task aborted for rid(%d).",data->rid); delete_cleaned_rid_config(data); remove_cleaned_rid(data->rid); } @@ -2048,7 +2049,7 @@ check_replicas_are_done_cleaning(cleanruv_data *data ) int not_all_cleaned = 1; int interval = 10; - cleanruv_log(data->task, CLEANALLRUV_ID, "Waiting for all the replicas to finish cleaning..."); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Waiting for all the replicas to finish cleaning..."); csn_as_string(data->maxcsn, PR_FALSE, csnstr); filter = PR_smprintf("(%s=%d:%s:%s)", type_replicaCleanRUV,(int)data->rid, csnstr, data->force); @@ -2076,7 +2077,7 @@ check_replicas_are_done_cleaning(cleanruv_data *data ) if(not_all_cleaned == 0 || is_task_aborted(data->rid) ){ break; } - cleanruv_log(data->task, CLEANALLRUV_ID, "Not all replicas finished cleaning, retrying in %d seconds",interval); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Not all replicas finished cleaning, retrying in %d seconds",interval); PR_Lock( notify_lock ); PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); PR_Unlock( notify_lock ); @@ -2151,7 +2152,7 @@ check_replicas_are_done_aborting(cleanruv_data *data ) int not_all_aborted = 1; int interval = 10; - cleanruv_log(data->task, ABORT_CLEANALLRUV_ID,"Waiting for all the replicas to finish aborting..."); + cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID,"Waiting for all the replicas to finish aborting..."); filter = PR_smprintf("(%s=%d:%s)", type_replicaAbortCleanRUV, data->rid, data->repl_root); @@ -2179,7 +2180,7 @@ check_replicas_are_done_aborting(cleanruv_data *data ) if(not_all_aborted == 0){ break; } - cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Not all replicas finished aborting, retrying in %d seconds",interval); + cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Not all replicas finished aborting, retrying in %d seconds",interval); PR_Lock( notify_lock ); PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); PR_Unlock( notify_lock ); @@ -2223,7 +2224,7 @@ check_agmts_are_caught_up(cleanruv_data *data, char *maxcsn) not_all_caughtup = 0; } else { not_all_caughtup = 1; - cleanruv_log(data->task, CLEANALLRUV_ID, "Replica not caught up (%s)",agmt_get_long_name(agmt)); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Replica not caught up (%s)",agmt_get_long_name(agmt)); break; } agmt_obj = agmtlist_get_next_agreement_for_replica (data->replica, agmt_obj); @@ -2232,7 +2233,8 @@ check_agmts_are_caught_up(cleanruv_data *data, char *maxcsn) if(not_all_caughtup == 0 || is_task_aborted(data->rid) ){ break; } - cleanruv_log(data->task, CLEANALLRUV_ID, "Not all replicas caught up, retrying in %d seconds",interval); + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, + "Not all replicas caught up, retrying in %d seconds",interval); PR_Lock( notify_lock ); PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); PR_Unlock( notify_lock ); @@ -2280,7 +2282,8 @@ check_agmts_are_alive(Replica *replica, ReplicaId rid, Slapi_Task *task) not_all_alive = 0; } else { not_all_alive = 1; - cleanruv_log(task, CLEANALLRUV_ID, "Replica not online (%s)",agmt_get_long_name(agmt)); + cleanruv_log(task, rid, CLEANALLRUV_ID, "Replica not online (%s)", + agmt_get_long_name(agmt)); break; } agmt_obj = agmtlist_get_next_agreement_for_replica (replica, agmt_obj); @@ -2289,7 +2292,8 @@ check_agmts_are_alive(Replica *replica, ReplicaId rid, Slapi_Task *task) if(not_all_alive == 0 || is_task_aborted(rid)){ break; } - cleanruv_log(task, CLEANALLRUV_ID, "Not all replicas online, retrying in %d seconds...",interval); + cleanruv_log(task, rid, CLEANALLRUV_ID, "Not all replicas online, retrying in %d seconds...", + interval); PR_Lock( notify_lock ); PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); PR_Unlock( notify_lock ); @@ -2393,8 +2397,9 @@ replica_send_cleanruv_task(Repl_Agmt *agmt, cleanruv_data *clean_data) if(rc != LDAP_SUCCESS){ char *hostname = agmt_get_hostname(agmt); - cleanruv_log(clean_data->task, CLEANALLRUV_ID, "Failed to add CLEANRUV task (%s) to replica " - "(%s). You will need to manually run the CLEANRUV task on this replica (%s) error (%d)", + cleanruv_log(clean_data->task, clean_data->rid, CLEANALLRUV_ID, + "Failed to add CLEANRUV task (%s) to replica (%s). You will need " + "to manually run the CLEANRUV task on this replica (%s) error (%d)", repl_dn, agmt_get_long_name(agmt), hostname, rc); slapi_ch_free_string(&hostname); } @@ -2666,7 +2671,7 @@ delete_cleaned_rid_config(cleanruv_data *clean_data) int rc = -1, ret, rid; if(clean_data == NULL){ - cleanruv_log(NULL, CLEANALLRUV_ID, "delete_cleaned_rid_config: cleanruv data is NULL, " + cleanruv_log(NULL, clean_data->rid, CLEANALLRUV_ID, "delete_cleaned_rid_config: cleanruv data is NULL, " "failed to clean the config."); return; } @@ -2685,7 +2690,8 @@ delete_cleaned_rid_config(cleanruv_data *clean_data) slapi_search_internal_pb(pb); slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_RESULT, &ret); if (ret != LDAP_SUCCESS){ - cleanruv_log(clean_data->task, CLEANALLRUV_ID,"delete_cleaned_rid_config: internal search failed(%d).",ret); + cleanruv_log(clean_data->task, clean_data->rid, CLEANALLRUV_ID, + "delete_cleaned_rid_config: internal search failed(%d).",ret); goto bail; } else { slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_SEARCH_ENTRIES, &entries); @@ -2693,7 +2699,8 @@ delete_cleaned_rid_config(cleanruv_data *clean_data) /* * No matching entries! */ - cleanruv_log(clean_data->task, CLEANALLRUV_ID,"delete_cleaned_rid_config: failed to find any " + cleanruv_log(clean_data->task, clean_data->rid, CLEANALLRUV_ID, + "delete_cleaned_rid_config: failed to find any " "entries with nsds5ReplicaCleanRUV under (%s)", dn); goto bail; } else { @@ -2744,7 +2751,8 @@ delete_cleaned_rid_config(cleanruv_data *clean_data) } if (rc != LDAP_SUCCESS && rc != LDAP_NO_SUCH_OBJECT){ - cleanruv_log(clean_data->task, CLEANALLRUV_ID, "delete_cleaned_rid_config: failed to remove task data " + cleanruv_log(clean_data->task, clean_data->rid, CLEANALLRUV_ID, + "delete_cleaned_rid_config: failed to remove task data " "from (%s) error (%d), rid (%d)", edn, rc, clean_data->rid); goto bail; } @@ -2754,7 +2762,8 @@ delete_cleaned_rid_config(cleanruv_data *clean_data) bail: if (rc != LDAP_SUCCESS && rc != LDAP_NO_SUCH_OBJECT){ - cleanruv_log(clean_data->task, CLEANALLRUV_ID, "delete_cleaned_rid_config: failed to remove replica config " + cleanruv_log(clean_data->task, clean_data->rid, CLEANALLRUV_ID, + "delete_cleaned_rid_config: failed to remove replica config " "(%d), rid (%d)", rc, clean_data->rid); } slapi_free_search_results_internal(pb); @@ -2812,8 +2821,10 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter if(get_abort_cleanruv_task_count() >= CLEANRIDSIZ){ /* we are already running the maximum number of tasks */ - PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Exceeded maximum number of active ABORT CLEANALLRUV tasks(%d)",CLEANRIDSIZ); - cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext); + PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, + "Exceeded maximum number of active ABORT CLEANALLRUV tasks(%d)", + CLEANRIDSIZ); + cleanruv_log(task, -1, ABORT_CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OPERATIONS_ERROR; return SLAPI_DSE_CALLBACK_ERROR; } @@ -2829,7 +2840,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter */ if ((rid_str = fetch_attr(e, "replica-id", 0)) == NULL){ PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing required attr \"replica-id\""); - cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext); + cleanruv_log(task, -1, ABORT_CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OBJECT_CLASS_VIOLATION; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; @@ -2842,14 +2853,14 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter if (rid <= 0 || rid >= READ_ONLY_REPLICA_ID){ PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Invalid replica id (%d) for task - (%s)", rid, slapi_sdn_get_dn(slapi_entry_get_sdn(e))); - cleanruv_log(task, ABORT_CLEANALLRUV_ID,"%s", returntext); + cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID,"%s", returntext); *returncode = LDAP_OPERATIONS_ERROR; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; } if ((base_dn = fetch_attr(e, "replica-base-dn", 0)) == NULL){ PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing required attr \"replica-base-dn\""); - cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext); + cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OBJECT_CLASS_VIOLATION; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; @@ -2857,7 +2868,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter if(!is_cleaned_rid(rid) && !is_pre_cleaned_rid(rid)){ /* we are not cleaning this rid */ PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Replica id (%d) is not being cleaned, nothing to abort.", rid); - cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext); + cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_UNWILLING_TO_PERFORM; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; @@ -2865,7 +2876,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter if(is_task_aborted(rid)){ /* we are already aborting this rid */ PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Replica id (%d) is already being aborted", rid); - cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext); + cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_UNWILLING_TO_PERFORM; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; @@ -2876,7 +2887,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter sdn = slapi_sdn_new_dn_byval(base_dn); if((r = replica_get_replica_from_dn(sdn)) == NULL){ PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Failed to find replica from dn(%s)", base_dn); - cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext); + cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OPERATIONS_ERROR; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; @@ -2888,7 +2899,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter if(strcasecmp(certify_all,"yes") && strcasecmp(certify_all,"no")){ PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Invalid value for \"replica-certify-all\", the value " "must be \"yes\" or \"no\"."); - cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext); + cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OPERATIONS_ERROR; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; @@ -2903,7 +2914,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter payload = create_cleanruv_payload(ridstr); if(payload == NULL){ - cleanruv_log(task, ABORT_CLEANALLRUV_ID, "Failed to create extended op payload, aborting task"); + cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "Failed to create extended op payload, aborting task"); *returncode = LDAP_OPERATIONS_ERROR; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; @@ -2919,7 +2930,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter */ data = (cleanruv_data*)slapi_ch_calloc(1, sizeof(cleanruv_data)); if (data == NULL) { - cleanruv_log(task, ABORT_CLEANALLRUV_ID,"Failed to allocate abort_cleanruv_data. Aborting task."); + cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID,"Failed to allocate abort_cleanruv_data. Aborting task."); *returncode = LDAP_OPERATIONS_ERROR; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; @@ -2938,7 +2949,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter PR_UNJOINABLE_THREAD, SLAPD_DEFAULT_THREAD_STACKSIZE); if (thread == NULL) { object_release(r); - cleanruv_log(task, ABORT_CLEANALLRUV_ID,"Unable to create abort thread. Aborting task."); + cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID,"Unable to create abort thread. Aborting task."); *returncode = LDAP_OPERATIONS_ERROR; slapi_ch_free_string(&data->certify); rc = SLAPI_DSE_CALLBACK_ERROR; @@ -2949,7 +2960,7 @@ out: slapi_sdn_free(&sdn); if(rc != SLAPI_DSE_CALLBACK_OK){ - cleanruv_log(task, ABORT_CLEANALLRUV_ID, "Abort Task failed (%d)", rc); + cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "Abort Task failed (%d)", rc); slapi_task_finish(task, rc); } @@ -2977,7 +2988,7 @@ replica_abort_task_thread(void *arg) slapi_task_inc_refcount(data->task); slapi_log_error(SLAPI_LOG_PLUGIN, repl_plugin_name, "replica_abort_task_thread --> refcount incremented.\n"); } - cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Aborting task for rid(%d)...",data->rid); + cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Aborting task for rid(%d)...",data->rid); /* * Need to build the replica from the dn @@ -2988,7 +2999,7 @@ replica_abort_task_thread(void *arg) * to timing issues, we need to wait to grab the replica obj until we get here. */ if((data->repl_obj = replica_get_replica_from_dn(data->sdn)) == NULL){ - cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Failed to get replica object from dn (%s).", slapi_sdn_get_dn(data->sdn)); + cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Failed to get replica object from dn (%s).", slapi_sdn_get_dn(data->sdn)); goto done; } if(data->replica == NULL && data->repl_obj){ @@ -3036,7 +3047,7 @@ replica_abort_task_thread(void *arg) /* * need to sleep between passes */ - cleanruv_log(data->task, ABORT_CLEANALLRUV_ID,"Retrying in %d seconds",interval); + cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID,"Retrying in %d seconds",interval); PR_Lock( notify_lock ); PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); PR_Unlock( notify_lock ); @@ -3051,17 +3062,17 @@ replica_abort_task_thread(void *arg) done: if(agmt_not_notified){ /* failure */ - cleanruv_log(data->task, ABORT_CLEANALLRUV_ID,"Abort task failed, will resume the task at the next server startup."); + cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID,"Abort task failed, will resume the task at the next server startup."); } else { /* * Wait for this server to stop its cleanallruv task(which removes the rid from the cleaned list) */ - cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Waiting for CleanAllRUV task to abort..."); + cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Waiting for CleanAllRUV task to abort..."); while(is_cleaned_rid(data->rid)){ DS_Sleep(PR_SecondsToInterval(1)); count++; if(count == 60){ /* it should not take this long */ - cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "CleanAllRUV task failed to abort. You might need to " + cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "CleanAllRUV task failed to abort. You might need to " "rerun the task."); rc = -1; break; @@ -3076,9 +3087,9 @@ done: } delete_aborted_rid(data->replica, data->rid, data->repl_root, 0); /* remove the in-memory aborted rid */ if(rc == 0){ - cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Successfully aborted task for rid(%d)", data->rid); + cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Successfully aborted task for rid(%d)", data->rid); } else { - cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Failed to abort task for rid(%d)",data->rid); + cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Failed to abort task for rid(%d)",data->rid); } } @@ -3116,10 +3127,12 @@ replica_cleanallruv_send_abort_extop(Repl_Agmt *ra, Slapi_Task *task, struct ber */ rc = crc; if(rc){ - cleanruv_log(task, ABORT_CLEANALLRUV_ID, "Failed to send extop to replica(%s).", agmt_get_long_name(ra)); + cleanruv_log(task, agmt_get_consumer_rid(ra, conn), ABORT_CLEANALLRUV_ID, + "Failed to send extop to replica(%s).", agmt_get_long_name(ra)); } } else { - cleanruv_log(task, ABORT_CLEANALLRUV_ID, "Failed to connect to replica(%s).", agmt_get_long_name(ra)); + cleanruv_log(task, agmt_get_consumer_rid(ra, conn), ABORT_CLEANALLRUV_ID, + "Failed to connect to replica(%s).", agmt_get_long_name(ra)); rc = -1; } conn_delete_internal_ext(conn); @@ -3154,7 +3167,7 @@ replica_cleanallruv_send_extop(Repl_Agmt *ra, cleanruv_data *clean_data, int che /* extop was accepted */ rc = 0; } else { - cleanruv_log(clean_data->task, CLEANALLRUV_ID,"Replica %s does not support the CLEANALLRUV task. " + cleanruv_log(clean_data->task, clean_data->rid, CLEANALLRUV_ID,"Replica %s does not support the CLEANALLRUV task. " "Sending replica CLEANRUV task...", slapi_sdn_get_dn(agmt_get_dn_byref(ra))); /* * Ok, this replica doesn't know about CLEANALLRUV, so just manually @@ -3320,7 +3333,7 @@ replica_cleanallruv_check_maxcsn(Repl_Agmt *agmt, char *basedn, char *rid_text, csn_init_by_string(repl_max, remote_maxcsn); if(csn_compare (repl_max, max) < 0){ /* we are not caught up yet, free, and return */ - cleanruv_log(task, CLEANALLRUV_ID,"Replica maxcsn (%s) is not caught up with deleted replica's maxcsn(%s)", + cleanruv_log(task, atoi(rid_text), CLEANALLRUV_ID,"Replica maxcsn (%s) is not caught up with deleted replica's maxcsn(%s)", remote_maxcsn, maxcsn); rc = -1; } else { @@ -3482,7 +3495,7 @@ stop_ruv_cleaning() * Write our logging to the task and error log */ void -cleanruv_log(Slapi_Task *task, char *task_type, char *fmt, ...) +cleanruv_log(Slapi_Task *task, int rid, char *task_type, char *fmt, ...) { va_list ap1; va_list ap2; @@ -3500,7 +3513,7 @@ cleanruv_log(Slapi_Task *task, char *task_type, char *fmt, ...) slapi_task_log_status_ext(task, fmt, ap2); slapi_task_inc_progress(task); } - errlog_fmt = PR_smprintf("%s: %s\n",task_type, fmt); + errlog_fmt = PR_smprintf("%s (rid %d): %s \n",task_type, rid, fmt); slapi_log_error_ext(SLAPI_LOG_FATAL, repl_plugin_name, errlog_fmt, ap3, ap4); slapi_ch_free_string(&errlog_fmt); @@ -3562,7 +3575,7 @@ replica_cleanallruv_get_local_maxcsn(ReplicaId rid, char *base_dn) } } else { /* internal search failed */ - cleanruv_log(NULL, CLEANALLRUV_ID, "replica_cleanallruv_get_local_maxcsn: internal search failed (%d)\n", res); + cleanruv_log(NULL, (int)rid, CLEANALLRUV_ID, "replica_cleanallruv_get_local_maxcsn: internal search failed (%d)\n", res); } slapi_free_search_results_internal(search_pb);