From e92d77baf477a7f1cab3370454934c38d443c24b Mon Sep 17 00:00:00 2001 From: William Brown Date: Sep 07 2020 03:11:03 +0000 Subject: [PATCH 1/2] Ticket 51260 - fix potential syncrepl data corruption Bug Description: The cookie encodes which changelog entries we have seen up to and including. However, the sync process would then re-send the cl item from the cookie number. This could cause corruption in some cases as some combinations of actions between two points are no-oped in the server. Fix Description: Fix the changelog search to always process that entries of the CL must be greater than, but not equal to the already seen CL items from the cookie. fixes #51260 https://pagure.io/389-ds-base/issue/51260 Author: William Brown Review by: ??? --- diff --git a/ldap/servers/plugins/sync/sync_refresh.c b/ldap/servers/plugins/sync/sync_refresh.c index 34ac958..25dd8d6 100644 --- a/ldap/servers/plugins/sync/sync_refresh.c +++ b/ldap/servers/plugins/sync/sync_refresh.c @@ -323,8 +323,25 @@ sync_refresh_update_content(Slapi_PBlock *pb, Sync_Cookie *client_cookie, Sync_C cb_data.orig_pb = pb; cb_data.change_start = client_cookie->cookie_change_info; + /* + * The client has already seen up to AND including change_info, so this should + * should reflect that. originally was: + * + * filter = slapi_ch_smprintf("(&(changenumber>=%lu)(changenumber<=%lu))", + * client_cookie->cookie_change_info, + * server_cookie->cookie_change_info); + * + * which would create a situation where if the previous cn was say 5, and the next + * is 6, we'd get both 5 and 6, even though the client has already seen 5. But worse + * if 5 was an "add" of the entry, and 6 was a "delete" of the same entry then sync + * would over-optimise and remove the sync value because it things the add/delete was + * in the same operation so we'd never send it. But the client HAD seen the add, and + * now we'd never send the delete so this would be a bug. This created some confusion + * for me in the tests, but the sync repl tests now correctly work and reflect the behaviour + * expected. + */ filter = slapi_ch_smprintf("(&(changenumber>=%lu)(changenumber<=%lu))", - client_cookie->cookie_change_info, + client_cookie->cookie_change_info + 1, server_cookie->cookie_change_info); slapi_search_internal_set_pb( seq_pb, From 902e128c7e35d14fa1058c819dc042fd76bddc20 Mon Sep 17 00:00:00 2001 From: William Brown Date: Sep 07 2020 03:23:03 +0000 Subject: [PATCH 2/2] Ticket 51260 - improve tests and improve readme re refdel Bug Description: This is a supplement to 51260. Fix Description: This expands the test cases to be able to detect the subsequent data corruption of 51260. This also improves documentation around the rfc, and some todo comments for future work with entryuuid + openldap. fixes #51260 Author: William Brown Review by: ??? --- diff --git a/dirsrvtests/tests/suites/syncrepl_plugin/__init__.py b/dirsrvtests/tests/suites/syncrepl_plugin/__init__.py index d51f52f..3b2db27 100644 --- a/dirsrvtests/tests/suites/syncrepl_plugin/__init__.py +++ b/dirsrvtests/tests/suites/syncrepl_plugin/__init__.py @@ -21,6 +21,8 @@ from lib389._constants import * log = logging.getLogger(__name__) +OU_PEOPLE = "ou=people,%s" % DEFAULT_SUFFIX + class ISyncRepl(DirSrv, SyncreplConsumer): """ This implements a test harness for checking syncrepl, and allowing us to check various actions or @@ -55,6 +57,7 @@ class ISyncRepl(DirSrv, SyncreplConsumer): self.delete = [] self.present = [] self.entries = {} + self.refdel = False self.next_cookie = None # Start the sync # If cookie is none, will call "get_cookie" we have. @@ -89,6 +92,9 @@ class ISyncRepl(DirSrv, SyncreplConsumer): def syncrepl_present(self, uuids, refreshDeletes=False): log.debug(f'=====> refdel -> {refreshDeletes} uuids -> {uuids}') + if refreshDeletes: + # Indicate we recieved a refdel in the process. + self.refdel = True if uuids is not None: self.present = self.present + uuids @@ -105,8 +111,9 @@ class ISyncRepl(DirSrv, SyncreplConsumer): def syncstate_assert(st, sync): # How many entries do we have? + # We setup sync under ou=people so we can modrdn out of the scope. r = st.search_ext_s( - base=DEFAULT_SUFFIX, + base=OU_PEOPLE, scope=ldap.SCOPE_SUBTREE, filterstr='(objectClass=*)', attrsonly=1, @@ -115,49 +122,154 @@ def syncstate_assert(st, sync): # Initial sync log.debug("*test* initial") - sync.syncrepl_search() + sync.syncrepl_search(base=OU_PEOPLE) sync.syncrepl_complete() # check we caught them all assert len(r) == len(sync.entries.keys()) assert len(r) == len(sync.present) assert 0 == len(sync.delete) + if sync.openldap: + assert True == sync.refdel + else: + assert False == sync.refdel # Add a new entry - account = nsUserAccounts(st, DEFAULT_SUFFIX).create_test_user() + + # Find the primary uuid we expect to see in syncrepl. + # This will be None if not present. + acc_uuid = account.get_attr_val_utf8('entryuuid') + if acc_uuid is None: + nsid = account.get_attr_val_utf8('nsuniqueid') + # nsunique has a diff format, so we change it up. + # 431cf081-b44311ea-83fdb082-f24d490e + # Add a hyphen V + # 431cf081-b443-11ea-83fdb082-f24d490e + nsid_a = nsid[:13] + '-' + nsid[13:] + # Add a hyphen V + # 431cf081-b443-11ea-83fd-b082-f24d490e + nsid_b = nsid_a[:23] + '-' + nsid_a[23:] + # Remove a hyphen V + # 431cf081-b443-11ea-83fd-b082-f24d490e + acc_uuid = nsid_b[:28] + nsid_b[29:] + # Tada! + # 431cf081-b443-11ea-83fd-b082f24d490e + log.debug(f"--> expected sync uuid (from nsuniqueid): {acc_uuid}") + else: + log.debug(f"--> expected sync uuid (from entryuuid): {acc_uuid}") + # Check log.debug("*test* add") - sync.syncrepl_search() + sync.syncrepl_search(base=OU_PEOPLE) sync.syncrepl_complete() sync.check_cookie() + log.debug(f"sd: {sync.delete}, sp: {sync.present} sek: {sync.entries.keys()}") + assert 1 == len(sync.entries.keys()) assert 1 == len(sync.present) + #################################### + ## assert sync.present == [acc_uuid] assert 0 == len(sync.delete) + if sync.openldap: + assert True == sync.refdel + else: + assert False == sync.refdel # Mod account.replace('description', 'change') # Check log.debug("*test* mod") - sync.syncrepl_search() + sync.syncrepl_search(base=OU_PEOPLE) + sync.syncrepl_complete() + sync.check_cookie() + log.debug(f"sd: {sync.delete}, sp: {sync.present} sek: {sync.entries.keys()}") + assert 1 == len(sync.entries.keys()) + assert 1 == len(sync.present) + #################################### + ## assert sync.present == [acc_uuid] + assert 0 == len(sync.delete) + if sync.openldap: + assert True == sync.refdel + else: + assert False == sync.refdel + + ## ModRdn (remain in scope) + account.rename('uid=test1_modrdn') + # newsuperior=None + # Check + log.debug("*test* modrdn (in scope)") + sync.syncrepl_search(base=OU_PEOPLE) sync.syncrepl_complete() sync.check_cookie() + log.debug(f"sd: {sync.delete}, sp: {sync.present} sek: {sync.entries.keys()}") assert 1 == len(sync.entries.keys()) assert 1 == len(sync.present) + #################################### + ## assert sync.present == [acc_uuid] assert 0 == len(sync.delete) + if sync.openldap: + assert True == sync.refdel + else: + assert False == sync.refdel + + ## Modrdn (out of scope, then back into scope) + account.rename('uid=test1_modrdn', newsuperior=DEFAULT_SUFFIX) + + # Check it's gone. + log.debug("*test* modrdn (move out of scope)") + sync.syncrepl_search(base=OU_PEOPLE) + sync.syncrepl_complete() + sync.check_cookie() + log.debug(f"sd: {sync.delete}, sp: {sync.present} sek: {sync.entries.keys()}") + assert 0 == len(sync.entries.keys()) + assert 0 == len(sync.present) + assert 0 == len(sync.delete) + if sync.openldap: + assert True == sync.refdel + else: + assert True == sync.refdel + + # Put it back + account.rename('uid=test1_modrdn', newsuperior=OU_PEOPLE) + log.debug("*test* modrdn (move in to scope)") + sync.syncrepl_search(base=OU_PEOPLE) + sync.syncrepl_complete() + sync.check_cookie() + log.debug(f"sd: {sync.delete}, sp: {sync.present} sek: {sync.entries.keys()}") + assert 1 == len(sync.entries.keys()) + assert 1 == len(sync.present) + #################################### + ## assert sync.present == [acc_uuid] + assert 0 == len(sync.delete) + if sync.openldap: + assert True == sync.refdel + else: + assert False == sync.refdel ## Delete account.delete() + # import time + # print("attach now ....") + # time.sleep(45) + # Check log.debug("*test* del") - sync.syncrepl_search() + sync.syncrepl_search(base=OU_PEOPLE) sync.syncrepl_complete() # In a delete, the cookie isn't updated (?) sync.check_cookie() log.debug(f'{sync.entries.keys()}') log.debug(f'{sync.present}') log.debug(f'{sync.delete}') + log.debug(f"sd: {sync.delete}, sp: {sync.present} sek: {sync.entries.keys()}") assert 0 == len(sync.entries.keys()) assert 0 == len(sync.present) - assert 1 == len(sync.delete) + assert 0 == len(sync.delete) + #################################### + ## assert sync.delete == [acc_uuid] + if sync.openldap: + assert True == sync.refdel + else: + assert True == sync.refdel diff --git a/ldap/servers/plugins/sync/README.md b/ldap/servers/plugins/sync/README.md new file mode 100644 index 0000000..af6d302 --- /dev/null +++ b/ldap/servers/plugins/sync/README.md @@ -0,0 +1,42 @@ +With support from a techwriter at SUSE, we managed to translate RCF4533 +into something we can understand. + +* Server issues Sync Operation to search. +* Search returns entries +* Each entry has a state add which includes the UUID of the entry +* Distinguished names can change, but UUIDs are stable. +* At the end of the search results, a sync done control cookie is sent to indicate that the session is done. + +* Clients issue a Sync Operation to search, and include previously returned cookie. +* Server determines what would be returned in a normal search operation. +* Server uses cookie to work out what was previously returned. +* Server issues results of only the things that have changed since last search. +* Each result sent also includes info about the status (changed or unchanged) + +* Server has two phases to sync deleted entries: present or delete. +* Each phase ends with a sync done control cookie. +* The present phase ends with sync done control value of searchresultdone=false +* the delete phase ends with sync done control value of searchresultdone=true +* The present phase can be followed by the delete phase. +* Each phase is complete only after a sync info message of refreshdone=false + +* During the present phase, the server sends an empty entry with state=present for each unchanged entry. +* During the present phase, the client is changed to match the server state, in preparation for the delete phase. +* During the delete phase, the server determines which entries in the client copy are no longer present. It also checks that the the number of changed entries is less than the number of unchanged entries. + For each entry that is no longer present, the server sends a state=delete. It does not* return a state=present for each present entry. + +* The server can send sync info messages that contain the list of UUIDs of either unchanged present entries, or deleted entries. This is instead of sending individual messages for each entry. +* If refreshDeletes=false the UUIDs of unchanged present entries are included in the syncUUIDs set. +* If refreshDeletes=true the UUIDs of unchanged deleted entries are included in the syncUUIDs set. +* Optionally, you can include a syncIdSet cookie to indicate the state of the content in the syncUUIDs set. + +* If the syncDoneValue is refreshDeletes=false the new copy includes: +- All changed entries returned by the current sync +- All unchanged entries from a previous sync that have been confirmed to still be present +- Unchanged entries confirmed as deleted are deleted from the client. In this case, they are assumed to have been deleted or moved. + +* If the syncDoneValue is refreshDeletes=true the new copy includes: +- All changed entries returned by the current sync +- All entries from a previous sync that have not been marked as deleted. + +* Clients can request that the synchronized copy is refreshed at any time. diff --git a/ldap/servers/plugins/sync/sync_refresh.c b/ldap/servers/plugins/sync/sync_refresh.c index 25dd8d6..28963cc 100644 --- a/ldap/servers/plugins/sync/sync_refresh.c +++ b/ldap/servers/plugins/sync/sync_refresh.c @@ -234,6 +234,8 @@ sync_srch_refresh_post_search(Slapi_PBlock *pb) * Point is, if we set refresh to true for openldap mode, it works, and if * it's false, the moment we send a single intermediate delete message, we * delete literally everything 🔥. + * + * See README.md for more about how this works. */ if (info->cookie->openldap_compat) { sync_create_sync_done_control(&ctrl[0], 1, cookiestr);