From 0c194eb79aa381bf4e4cd05082956218512115a4 Mon Sep 17 00:00:00 2001 From: Rich Megginson Date: May 17 2013 22:01:04 +0000 Subject: Ticket #47362 - ipa upgrade selinuxusermap data not replicating https://fedorahosted.org/389/ticket/47362 Reviewed by: nhosoi (Thanks!) Branch: 389-ds-base-1.2.11 Fix Description: When nsslapd-port is set to 0, this causes the replica purl to be "ldap://hostname:0". At startup, the MMR code looks to see if this replica purl is in the RUV, by doing a string comparison of this purl with the ruv replica purl. If it is not there, the MMR code wipes out this ruv element. Later the code in replica_check_for_data_reload() uses this RUV to see if it needs to reinit the changelog. Since the RUV doesn't match the changelog RUV any more, the changelog is erased, which erases any changes that were made in the meantime. The missing RUV element causes the supplier to attempt to send over changes which may already exist on the consumer. If one of these is an ADD, the urp code will correctly flag this as an attempt to add an entry that already exists, and will turn this into a replConflict entry. A subsequent attempt to replicate the same ADD will cause an error in the urp code which will cause it to return err=53. Replication will then become stuck on this update - it will keep trying to send it over and over again, and will not be able to proceed. The only workaround is a replica reinit of the replica, to get the database RUV and changelog in a consistent state. I've also added some additional RUV debugging when using the REPL log level. Platforms tested: RHEL6 x86_64 Flag Day: no Doc impact: no --- diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c b/ldap/servers/plugins/replication/repl5_inc_protocol.c index 743be57..82b121c 100644 --- a/ldap/servers/plugins/replication/repl5_inc_protocol.c +++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c @@ -1931,6 +1931,44 @@ repl5_inc_stop(Private_Repl_Protocol *prp) agmt_get_long_name(prp->agmt), PR_IntervalToSeconds(now-start)); } + if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) { + if (NULL == prp->replica_object) { + slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, + "%s: repl5_inc_stop: protocol replica_object is NULL\n", + agmt_get_long_name(prp->agmt)); + } else { + Replica *replica; + object_acquire(prp->replica_object); + replica = object_get_data(prp->replica_object); + if (NULL == replica) { + slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, + "%s: repl5_inc_stop: replica is NULL\n", + agmt_get_long_name(prp->agmt)); + } else { + Object *ruv_obj = replica_get_ruv(replica); + if (NULL == ruv_obj) { + slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, + "%s: repl5_inc_stop: ruv_obj is NULL\n", + agmt_get_long_name(prp->agmt)); + } else { + RUV *ruv; + object_acquire(ruv_obj); + ruv = (RUV*)object_get_data (ruv_obj); + if (NULL == ruv) { + slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, + "%s: repl5_inc_stop: ruv is NULL\n", + agmt_get_long_name(prp->agmt)); + + } else { + ruv_dump(ruv, "Database RUV", NULL); + } + object_release(ruv_obj); + } + } + object_release(prp->replica_object); + } + + } return return_value; } diff --git a/ldap/servers/plugins/replication/repl5_ruv.c b/ldap/servers/plugins/replication/repl5_ruv.c index b52dd49..8fbd89c 100644 --- a/ldap/servers/plugins/replication/repl5_ruv.c +++ b/ldap/servers/plugins/replication/repl5_ruv.c @@ -208,6 +208,9 @@ ruv_init_from_slapi_attr_and_check_purl(Slapi_Attr *attr, RUV **ruv, ReplicaId * Slapi_Value *value; const struct berval *bval; const char *purl = NULL; + char *localhost = get_localhost_DNS(); + size_t localhostlen = localhost ? strlen(localhost) : 0; + int port = config_get_port(); return_value = RUV_SUCCESS; @@ -236,16 +239,30 @@ ruv_init_from_slapi_attr_and_check_purl(Slapi_Attr *attr, RUV **ruv, ReplicaId * RUVElement *ruve = get_ruvelement_from_berval(bval); if (NULL != ruve) { + char *ptr; /* Is the local purl already in the ruv ? */ if ( (*contain_purl==0) && ruve->replica_purl && purl && (strncmp(ruve->replica_purl, purl, strlen(purl))==0) ) { *contain_purl = ruve->rid; } + /* ticket 47362 - nsslapd-port: 0 causes replication to break */ + else if ((*contain_purl==0) && ruve->replica_purl && (port == 0) && localhost && + (ptr = strstr(ruve->replica_purl, localhost)) && (ptr != ruve->replica_purl) && + (*(ptr - 1) == '/') && (*(ptr+localhostlen) == ':')) + { + /* same hostname, but port number may have been temporarily set to 0 + * just allow it with whatever port number is already in the replica_purl + * do not reset the port number, do not tell the configure_ruv code that there + * is anything wrong + */ + *contain_purl = ruve->rid; + } dl_add ((*ruv)->elements, ruve); } } } } + slapi_ch_free_string(&localhost); } } return return_value; @@ -1279,6 +1296,11 @@ ruv_compare_ruv(const RUV *ruv1, const char *ruv1name, const RUV *ruv2, const ch const char *ruvbnames[] = {ruv2name, ruv1name}; const int nitems = 2; + if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) { + ruv_dump(ruv1, (char *)ruv1name, NULL); + ruv_dump(ruv2, (char *)ruv2name, NULL); + } + /* compare replica generations first */ if (ruv1->replGen == NULL || ruv2->replGen == NULL) { slapi_log_error(loglevel, repl_plugin_name, @@ -1335,7 +1357,17 @@ ruv_compare_ruv(const RUV *ruv1, const char *ruv1name, const RUV *ruv2, const ch "than the max CSN [%s] from RUV [%s] for element [%s]\n", csnstrb, ruvbname, csnstra, ruvaname, ruvelem); rc = RUV_COMP_CSN_DIFFERS; + } else { + csn_as_string(replicaa->csn, PR_FALSE, csnstra); + slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, + "ruv_compare_ruv: the max CSN [%s] from RUV [%s] is less than " + "or equal to the max CSN [%s] from RUV [%s] for element [%s]\n", + csnstrb, ruvbname, csnstra, ruvaname, ruvelem); } + } else { + slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, + "ruv_compare_ruv: RUV [%s] has an empty CSN\n", + ruvbname); } } }