Commit 4206d27 Ticket 48818 - For a replica bindDNGroup, should be fetched the first time it is used not when the replica is started

2 files Authored and Committed by tbordaz 6 days ago
Ticket 48818 - For a replica bindDNGroup, should be fetched the first time it is used not when the replica is started

Bug Description:
	The fetching of the bindDNGroup is working as designed but this ticket is to make it more flexible

	At startup, if the group does not contain the replica_mgr.
	No replication session will succeed until bindDnGroupCheckInterval delay.
	updatedn_group_last_check is the timestamp of the last fetch. At startup
	updatedn_group_last_check is set to the current time. So the next fetch will happen not before
	updatedn_group_last_check+bindDnGroupCheckInterval.

	If the groupDn is changed after startup, no incoming replication can happen for the first
	bindDnGroupCheckInterval seconds

Fix Description:
	The fix consist to unset updatedn_group_last_check so that the group will be fetch when the first
	incoming replication session will happen.

https://pagure.io/389-ds-base/issue/49818

Reviewed by: Mark Reynolds, Simon Spichugi (thanks !!!)

Platforms tested: F27

Flag Day: no

Doc impact: no

    
  1 @@ -13,7 +13,10 @@
  2   from lib389._constants import *
  3   from . import get_repl_entries
  4   from lib389.idm.organizationalunit import OrganizationalUnits
  5 + from lib389.agreement import Agreements
  6   from lib389.idm.user import UserAccount
  7 + from lib389 import Entry
  8 + from lib389.idm.group import Groups, Group
  9   from lib389.replica import Replicas, ReplicationManager
 10   from lib389.changelog import Changelog5
 11   
 12 @@ -32,6 +35,41 @@
 13       logging.getLogger(__name__).setLevel(logging.INFO)
 14   log = logging.getLogger(__name__)
 15   
 16 + def find_start_location(file, no):
 17 +     log_pattern = re.compile("slapd_daemon - slapd started.")
 18 +     count = 0
 19 +     while True:
 20 +         line = file.readline()
 21 +         log.debug("_pattern_errorlog: [%d] %s" % (file.tell(), line))
 22 +         found = log_pattern.search(line)
 23 +         if (found):
 24 +             count = count + 1
 25 +             if (count == no):
 26 +                 return file.tell()
 27 +         if (line == ''):
 28 +             break
 29 +     return -1
 30 + 
 31 + 
 32 + def pattern_errorlog(file, log_pattern, start_location=0):
 33 + 
 34 +     count = 0
 35 +     log.debug("_pattern_errorlog: start from the beginning" )
 36 +     file.seek(start_location)
 37 + 
 38 +     # Use a while true iteration because 'for line in file: hit a
 39 +     # python bug that break file.tell()
 40 +     while True:
 41 +         line = file.readline()
 42 +         log.debug("_pattern_errorlog: [%d] %s" % (file.tell(), line))
 43 +         found = log_pattern.search(line)
 44 +         if (found):
 45 +             count = count + 1
 46 +         if (line == ''):
 47 +             break
 48 + 
 49 +     log.debug("_pattern_errorlog: complete (count=%d)" % count)
 50 +     return count
 51   
 52   @pytest.fixture()
 53   def test_entry(topo_m2, request):
 54 @@ -254,6 +292,161 @@
 55       repl.test_replication(m1, m2)
 56       repl.test_replication(m2, m1)
 57   
 58 + def test_fetch_bindDnGroup(topo_m2):
 59 +     """Check the bindDNGroup is fetched on first replication session
 60 + 
 61 +     :id: 5f1b1f59-6744-4260-b091-c82d22130025
 62 +     :setup: 2 Master Instances
 63 +     :steps:
 64 +         1. Create a replication bound user and group, but the user *not* member of the group
 65 +         2. Check that replication is working
 66 +         3. Some preparation is required because of lib389 magic that already define a replication via group
 67 +            - define the group as groupDN for replication and 60sec as fetch interval
 68 +            - pause RA in both direction
 69 +            - Define the user as bindDn of the RAs
 70 +         4. restart servers.
 71 +             It sets the fetch time to 0, so next session will refetch the group
 72 +         5. Before resuming RA, add user to groupDN (on both side as replication is not working at that time)
 73 +         6. trigger an update and check replication is working and
 74 +            there is no failure logged on supplier side 'does not have permission to supply replication updates to the replica'
 75 +     :expectedresults:
 76 +         1. Success
 77 +         2. Success
 78 +         3. Success
 79 +         4. Success
 80 +         5. Success
 81 +         6. Success
 82 +     """
 83 + 
 84 +     # If you need any test suite initialization,
 85 +     # please, write additional fixture for that (including finalizer).
 86 +     # Topology for suites are predefined in lib389/topologies.py.
 87 + 
 88 +     # If you need host, port or any other data about instance,
 89 +     # Please, use the instance object attributes for that (for example, topo.ms["master1"].serverid)
 90 +     M1 = topo_m2.ms['master1']
 91 +     M2 = topo_m2.ms['master2']
 92 + 
 93 +     # Enable replication log level. Not really necessary
 94 +     M1.modify_s('cn=config',[(ldap.MOD_REPLACE, 'nsslapd-errorlog-level', b'8192')])
 95 +     M2.modify_s('cn=config',[(ldap.MOD_REPLACE, 'nsslapd-errorlog-level', b'8192')])
 96 + 
 97 +     # Create a group and a user
 98 +     PEOPLE = "ou=People,%s" % SUFFIX
 99 +     PASSWD = 'password'
100 +     REPL_MGR_BOUND_DN='repl_mgr_bound_dn'
101 + 
102 +     uid = REPL_MGR_BOUND_DN.encode()
103 +     users = UserAccounts(M1, PEOPLE, rdn=None)
104 +     user_props = TEST_USER_PROPERTIES.copy()
105 +     user_props.update({'uid': uid, 'cn': uid, 'sn': '_%s' % uid, 'userpassword': PASSWD.encode(), 'description': b'value creation'})
106 +     test_user = users.create(properties=user_props)
107 + 
108 +     groups_M1 = Groups(M1, DEFAULT_SUFFIX)
109 +     group_properties = {
110 +         'cn' : 'group1',
111 +         'description' : 'testgroup'}
112 +     group_M1 = groups_M1.create(properties=group_properties)
113 +     group_M2 = Group(M2, group_M1.dn)
114 +     assert(not group_M1.is_member(test_user.dn))
115 + 
116 + 
117 + 
118 +     # Check that M1 and M2 are in sync
119 +     repl = ReplicationManager(DEFAULT_SUFFIX)
120 +     repl.wait_for_replication(M1, M2, timeout=20)
121 + 
122 +     # Define the group as the replication manager and fetch interval as 60sec
123 +     replicas = Replicas(M1)
124 +     replica = replicas.list()[0]
125 +     replica.apply_mods([(ldap.MOD_REPLACE, 'nsDS5ReplicaBindDnGroupCheckInterval', '60'),
126 +                         (ldap.MOD_REPLACE, 'nsDS5ReplicaBindDnGroup', group_M1.dn)])
127 + 
128 + 
129 +     replicas = Replicas(M2)
130 +     replica = replicas.list()[0]
131 +     replica.apply_mods([(ldap.MOD_REPLACE, 'nsDS5ReplicaBindDnGroupCheckInterval', '60'),
132 +                         (ldap.MOD_REPLACE, 'nsDS5ReplicaBindDnGroup', group_M1.dn)])
133 + 
134 + 
135 +     # Then pause the replication agreement to prevent them trying to acquire
136 +     # while the user is not member of the group
137 +     topo_m2.pause_all_replicas()
138 + 
139 +     # Define the user as the bindDN of the RAs
140 +     for inst in (M1, M2):
141 +         agmts = Agreements(inst)
142 +         agmt = agmts.list()[0]
143 +         agmt.replace('nsDS5ReplicaBindDN', test_user.dn.encode())
144 +         agmt.replace('nsds5ReplicaCredentials', PASSWD.encode())
145 + 
146 + 
147 +     # Key step
148 +     # The restart will fetch the group/members define in the replica
149 +     #
150 +     # The user NOT member of the group replication will not work until bindDNcheckInterval
151 +     #
152 +     # With the fix, the first fetch is not taken into account (fetch time=0)
153 +     # so on the first session, the group will be fetched
154 +     M1.restart()
155 +     M2.restart()
156 + 
157 +     # Replication being broken here we need to directly do the same update.
158 +     # Sorry not found another solution except total update
159 +     group_M1.add_member(test_user.dn)
160 +     group_M2.add_member(test_user.dn)
161 + 
162 +     topo_m2.resume_all_replicas()
163 + 
164 +     # trigger updates to be sure to have a replication session, giving some time
165 +     M1.modify_s(test_user.dn,[(ldap.MOD_ADD, 'description', b'value_1_1')])
166 +     M2.modify_s(test_user.dn,[(ldap.MOD_ADD, 'description', b'value_2_2')])
167 +     time.sleep(10)
168 + 
169 +     # Check replication is working
170 +     ents = M1.search_s(test_user.dn, ldap.SCOPE_BASE, '(objectclass=*)')
171 +     for ent in ents:
172 +         assert (ent.hasAttr('description'))
173 +         found = 0
174 +         for val in ent.getValues('description'):
175 +             if (val == b'value_1_1'):
176 +                 found = found + 1
177 +             elif (val == b'value_2_2'):
178 +                 found = found + 1
179 +         assert (found == 2)
180 + 
181 +     ents = M2.search_s(test_user.dn, ldap.SCOPE_BASE, '(objectclass=*)')
182 +     for ent in ents:
183 +         assert (ent.hasAttr('description'))
184 +         found = 0
185 +         for val in ent.getValues('description'):
186 +             if (val == b'value_1_1'):
187 +                 found = found + 1
188 +             elif (val == b'value_2_2'):
189 +                 found = found + 1
190 +         assert (found == 2)
191 + 
192 +     # Check in the logs that the member was detected in the group although
193 +     # at startup it was not member of the group
194 +     regex = re.compile("does not have permission to supply replication updates to the replica.")
195 +     errorlog_M1 = open(M1.errlog, "r")
196 +     errorlog_M2 = open(M1.errlog, "r")
197 + 
198 +     # Find the last restart position
199 +     restart_location_M1 = find_start_location(errorlog_M1, 2)
200 +     assert (restart_location_M1 != -1)
201 +     restart_location_M2 = find_start_location(errorlog_M2, 2)
202 +     assert (restart_location_M2 != -1)
203 + 
204 +     # Then check there is no failure to authenticate
205 +     count = pattern_errorlog(errorlog_M1, regex, start_location=restart_location_M1)
206 +     assert(count <= 1)
207 +     count = pattern_errorlog(errorlog_M2, regex, start_location=restart_location_M2)
208 +     assert(count <=1)
209 + 
210 +     if DEBUGGING:
211 +         # Add debugging steps(if any)...
212 +         pass
213   
214   def test_cleanallruv_repl(topo_m3):
215       """Test that cleanallruv could not break replication if anchor csn in ruv originated in deleted replica
216 @@ -360,6 +553,7 @@
217       assert set(expected_m2_users).issubset(current_m2_users)
218   
219   
220 + 
221   if __name__ == '__main__':
222       # Run isolated
223       # -s for DEBUG mode
1 @@ -2028,7 +2028,7 @@
2       /* get replication bind dn groups */
3       r->updatedn_groups = replica_updatedn_group_new(e);
4       r->groupdn_list = replica_groupdn_list_new(r->updatedn_groups);
5 -     r->updatedn_group_last_check = time(NULL);
6 +     r->updatedn_group_last_check = 0;
7       /* get groupdn check interval */
8       if ((val = slapi_entry_attr_get_charptr(e, attr_replicaBindDnGroupCheckInterval))) {
9           if (repl_config_valid_num(attr_replicaBindDnGroupCheckInterval, val, -1, INT_MAX, &rc, errormsg, &interval) != 0) {