From 4206d278118c25ef0816dc1df1b735ad32c3ed66 Mon Sep 17 00:00:00 2001 From: Thierry Bordaz Date: Jul 10 2018 13:30:48 +0000 Subject: Ticket 48818 - For a replica bindDNGroup, should be fetched the first time it is used not when the replica is started Bug Description: The fetching of the bindDNGroup is working as designed but this ticket is to make it more flexible At startup, if the group does not contain the replica_mgr. No replication session will succeed until bindDnGroupCheckInterval delay. updatedn_group_last_check is the timestamp of the last fetch. At startup updatedn_group_last_check is set to the current time. So the next fetch will happen not before updatedn_group_last_check+bindDnGroupCheckInterval. If the groupDn is changed after startup, no incoming replication can happen for the first bindDnGroupCheckInterval seconds Fix Description: The fix consist to unset updatedn_group_last_check so that the group will be fetch when the first incoming replication session will happen. https://pagure.io/389-ds-base/issue/49818 Reviewed by: Mark Reynolds, Simon Spichugi (thanks !!!) Platforms tested: F27 Flag Day: no Doc impact: no --- diff --git a/dirsrvtests/tests/suites/replication/regression_test.py b/dirsrvtests/tests/suites/replication/regression_test.py index 2b81916..f44bbaa 100644 --- a/dirsrvtests/tests/suites/replication/regression_test.py +++ b/dirsrvtests/tests/suites/replication/regression_test.py @@ -13,7 +13,10 @@ from lib389.topologies import topology_m2 as topo_m2, TopologyMain, topology_m3 from lib389._constants import * from . import get_repl_entries from lib389.idm.organizationalunit import OrganizationalUnits +from lib389.agreement import Agreements from lib389.idm.user import UserAccount +from lib389 import Entry +from lib389.idm.group import Groups, Group from lib389.replica import Replicas, ReplicationManager from lib389.changelog import Changelog5 @@ -32,6 +35,41 @@ else: logging.getLogger(__name__).setLevel(logging.INFO) log = logging.getLogger(__name__) +def find_start_location(file, no): + log_pattern = re.compile("slapd_daemon - slapd started.") + count = 0 + while True: + line = file.readline() + log.debug("_pattern_errorlog: [%d] %s" % (file.tell(), line)) + found = log_pattern.search(line) + if (found): + count = count + 1 + if (count == no): + return file.tell() + if (line == ''): + break + return -1 + + +def pattern_errorlog(file, log_pattern, start_location=0): + + count = 0 + log.debug("_pattern_errorlog: start from the beginning" ) + file.seek(start_location) + + # Use a while true iteration because 'for line in file: hit a + # python bug that break file.tell() + while True: + line = file.readline() + log.debug("_pattern_errorlog: [%d] %s" % (file.tell(), line)) + found = log_pattern.search(line) + if (found): + count = count + 1 + if (line == ''): + break + + log.debug("_pattern_errorlog: complete (count=%d)" % count) + return count @pytest.fixture() def test_entry(topo_m2, request): @@ -254,6 +292,161 @@ def test_invalid_agmt(topo_m2): repl.test_replication(m1, m2) repl.test_replication(m2, m1) +def test_fetch_bindDnGroup(topo_m2): + """Check the bindDNGroup is fetched on first replication session + + :id: 5f1b1f59-6744-4260-b091-c82d22130025 + :setup: 2 Master Instances + :steps: + 1. Create a replication bound user and group, but the user *not* member of the group + 2. Check that replication is working + 3. Some preparation is required because of lib389 magic that already define a replication via group + - define the group as groupDN for replication and 60sec as fetch interval + - pause RA in both direction + - Define the user as bindDn of the RAs + 4. restart servers. + It sets the fetch time to 0, so next session will refetch the group + 5. Before resuming RA, add user to groupDN (on both side as replication is not working at that time) + 6. trigger an update and check replication is working and + there is no failure logged on supplier side 'does not have permission to supply replication updates to the replica' + :expectedresults: + 1. Success + 2. Success + 3. Success + 4. Success + 5. Success + 6. Success + """ + + # If you need any test suite initialization, + # please, write additional fixture for that (including finalizer). + # Topology for suites are predefined in lib389/topologies.py. + + # If you need host, port or any other data about instance, + # Please, use the instance object attributes for that (for example, topo.ms["master1"].serverid) + M1 = topo_m2.ms['master1'] + M2 = topo_m2.ms['master2'] + + # Enable replication log level. Not really necessary + M1.modify_s('cn=config',[(ldap.MOD_REPLACE, 'nsslapd-errorlog-level', b'8192')]) + M2.modify_s('cn=config',[(ldap.MOD_REPLACE, 'nsslapd-errorlog-level', b'8192')]) + + # Create a group and a user + PEOPLE = "ou=People,%s" % SUFFIX + PASSWD = 'password' + REPL_MGR_BOUND_DN='repl_mgr_bound_dn' + + uid = REPL_MGR_BOUND_DN.encode() + users = UserAccounts(M1, PEOPLE, rdn=None) + user_props = TEST_USER_PROPERTIES.copy() + user_props.update({'uid': uid, 'cn': uid, 'sn': '_%s' % uid, 'userpassword': PASSWD.encode(), 'description': b'value creation'}) + test_user = users.create(properties=user_props) + + groups_M1 = Groups(M1, DEFAULT_SUFFIX) + group_properties = { + 'cn' : 'group1', + 'description' : 'testgroup'} + group_M1 = groups_M1.create(properties=group_properties) + group_M2 = Group(M2, group_M1.dn) + assert(not group_M1.is_member(test_user.dn)) + + + + # Check that M1 and M2 are in sync + repl = ReplicationManager(DEFAULT_SUFFIX) + repl.wait_for_replication(M1, M2, timeout=20) + + # Define the group as the replication manager and fetch interval as 60sec + replicas = Replicas(M1) + replica = replicas.list()[0] + replica.apply_mods([(ldap.MOD_REPLACE, 'nsDS5ReplicaBindDnGroupCheckInterval', '60'), + (ldap.MOD_REPLACE, 'nsDS5ReplicaBindDnGroup', group_M1.dn)]) + + + replicas = Replicas(M2) + replica = replicas.list()[0] + replica.apply_mods([(ldap.MOD_REPLACE, 'nsDS5ReplicaBindDnGroupCheckInterval', '60'), + (ldap.MOD_REPLACE, 'nsDS5ReplicaBindDnGroup', group_M1.dn)]) + + + # Then pause the replication agreement to prevent them trying to acquire + # while the user is not member of the group + topo_m2.pause_all_replicas() + + # Define the user as the bindDN of the RAs + for inst in (M1, M2): + agmts = Agreements(inst) + agmt = agmts.list()[0] + agmt.replace('nsDS5ReplicaBindDN', test_user.dn.encode()) + agmt.replace('nsds5ReplicaCredentials', PASSWD.encode()) + + + # Key step + # The restart will fetch the group/members define in the replica + # + # The user NOT member of the group replication will not work until bindDNcheckInterval + # + # With the fix, the first fetch is not taken into account (fetch time=0) + # so on the first session, the group will be fetched + M1.restart() + M2.restart() + + # Replication being broken here we need to directly do the same update. + # Sorry not found another solution except total update + group_M1.add_member(test_user.dn) + group_M2.add_member(test_user.dn) + + topo_m2.resume_all_replicas() + + # trigger updates to be sure to have a replication session, giving some time + M1.modify_s(test_user.dn,[(ldap.MOD_ADD, 'description', b'value_1_1')]) + M2.modify_s(test_user.dn,[(ldap.MOD_ADD, 'description', b'value_2_2')]) + time.sleep(10) + + # Check replication is working + ents = M1.search_s(test_user.dn, ldap.SCOPE_BASE, '(objectclass=*)') + for ent in ents: + assert (ent.hasAttr('description')) + found = 0 + for val in ent.getValues('description'): + if (val == b'value_1_1'): + found = found + 1 + elif (val == b'value_2_2'): + found = found + 1 + assert (found == 2) + + ents = M2.search_s(test_user.dn, ldap.SCOPE_BASE, '(objectclass=*)') + for ent in ents: + assert (ent.hasAttr('description')) + found = 0 + for val in ent.getValues('description'): + if (val == b'value_1_1'): + found = found + 1 + elif (val == b'value_2_2'): + found = found + 1 + assert (found == 2) + + # Check in the logs that the member was detected in the group although + # at startup it was not member of the group + regex = re.compile("does not have permission to supply replication updates to the replica.") + errorlog_M1 = open(M1.errlog, "r") + errorlog_M2 = open(M1.errlog, "r") + + # Find the last restart position + restart_location_M1 = find_start_location(errorlog_M1, 2) + assert (restart_location_M1 != -1) + restart_location_M2 = find_start_location(errorlog_M2, 2) + assert (restart_location_M2 != -1) + + # Then check there is no failure to authenticate + count = pattern_errorlog(errorlog_M1, regex, start_location=restart_location_M1) + assert(count <= 1) + count = pattern_errorlog(errorlog_M2, regex, start_location=restart_location_M2) + assert(count <=1) + + if DEBUGGING: + # Add debugging steps(if any)... + pass def test_cleanallruv_repl(topo_m3): """Test that cleanallruv could not break replication if anchor csn in ruv originated in deleted replica @@ -360,6 +553,7 @@ def test_cleanallruv_repl(topo_m3): assert set(expected_m2_users).issubset(current_m2_users) + if __name__ == '__main__': # Run isolated # -s for DEBUG mode diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c index 2738b7a..79c79c2 100644 --- a/ldap/servers/plugins/replication/repl5_replica.c +++ b/ldap/servers/plugins/replication/repl5_replica.c @@ -2028,7 +2028,7 @@ _replica_init_from_config(Replica *r, Slapi_Entry *e, char *errortext) /* get replication bind dn groups */ r->updatedn_groups = replica_updatedn_group_new(e); r->groupdn_list = replica_groupdn_list_new(r->updatedn_groups); - r->updatedn_group_last_check = time(NULL); + r->updatedn_group_last_check = 0; /* get groupdn check interval */ if ((val = slapi_entry_attr_get_charptr(e, attr_replicaBindDnGroupCheckInterval))) { if (repl_config_valid_num(attr_replicaBindDnGroupCheckInterval, val, -1, INT_MAX, &rc, errormsg, &interval) != 0) {