The nightly test test_backup_and_restore.py::TestBackupAndRestoreWithReplica::()::test_full_backup_and_restore_with_replica failed during [testing_ipa-4.7] Nightly PR #3706. The failure needs to be investigated.
test_backup_and_restore.py::TestBackupAndRestoreWithReplica::()::test_full_backup_and_restore_with_replica
Logs available at the following location.
Metadata Update from @pcech: - Issue tagged with: Falcon
Also happened on testing_ipa-4-8 PR 3875
And on testing_master_latest PR #4125
And on testing_master_previous PR #4477
Also on testing_master_previous PR #4579: logs extract:
self = <ipatests.test_integration.test_backup_and_restore.TestBackupAndRestoreWithReplica object at 0x7fbc85b4fad0> cert_sign_request = {'master.ipa.test': '/tmp/tmp.hm76IxMjHb', 'replica0.ipa.test': '/tmp/tmp.HaIOsi4lSl', 'replica1.ipa.test': '/tmp/tmp.jlMVqZjJpj'} def test_full_backup_and_restore_with_replica(self, cert_sign_request): # check prerequisites self.check_replication_success(self.master) self.check_replication_success(self.replica1) self.master.run_command( ['ipa', 'service-add', 'TEST/' + self.master.hostname]) tasks.user_add(self.master, 'test1_master') tasks.user_add(self.replica1, 'test1_replica') with restore_checker(self.master): backup_path = tasks.get_backup_dir(self.master) # change data after backup self.master.run_command(['ipa', 'user-del', 'test1_master']) self.replica1.run_command(['ipa', 'user-del', 'test1_replica']) tasks.user_add(self.master, 'test2_master') tasks.user_add(self.replica1, 'test2_replica') # simulate master crash self.master.run_command(['ipactl', 'stop']) tasks.uninstall_master(self.master, clean=False) logger.info("Stopping and disabling oddjobd service") self.master.run_command([ "systemctl", "stop", "oddjobd" ]) self.master.run_command([ "systemctl", "disable", "oddjobd" ]) self.master.run_command(['ipa-restore', '-U', backup_path]) status = self.master.run_command([ "systemctl", "status", "oddjobd" ]) assert "active (running)" in status.stdout_text # replication should not work after restoration # create users to force master and replica to try to replicate tasks.user_add(self.master, 'test3_master') tasks.user_add(self.replica1, 'test3_replica') self.check_replication_error(self.master) self.check_replication_error(self.replica1) assert {'admin', 'test1_master', 'test1_replica', 'test3_master'} == \ self.get_users(self.master) assert {'admin', 'test2_master', 'test2_replica', 'test3_replica'} == \ self.get_users(self.replica1) # reestablish and check replication self.replica1.run_command(['ipa-replica-manage', 're-initialize', '--from', self.master.hostname]) # create users to force master and replica to try to replicate tasks.user_add(self.master, 'test4_master') tasks.user_add(self.replica1, 'test4_replica') self.check_replication_success(self.master) > self.check_replication_success(self.replica1) test_integration/test_backup_and_restore.py:616: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ test_integration/test_backup_and_restore.py:547: in check_replication_success raise_on_timeout=True) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ldap = <ipatests.pytest_ipa.integration.host.LDAPClientWithoutCertCheck object at 0x7fbc844f72d0> timeout = 30 target_status_re = 'Error \\(0\\) Replica acquired successfully: Incremental update succeeded' raise_on_timeout = True def wait_for_replication(ldap, timeout=30, target_status_re=r'^0 |^Error \(0\) ', raise_on_timeout=False): """Wait for all replication agreements to reach desired state With defaults waits until updates on all replication agreements are done (or failed) and exits without exception :param ldap: LDAP client autenticated with necessary rights to read the mapping tree :param timeout: Maximum time to wait, in seconds :param target_status_re: Regexp of status to wait for :param raise_on_timeout: if True, raises AssertionError if status not reached in specified time Note that this waits for updates originating on this host, not those coming from other hosts. """ logger.debug('Waiting for replication to finish') start = time.time() while True: status_attr = 'nsds5replicaLastUpdateStatus' progress_attr = 'nsds5replicaUpdateInProgress' entries = ldap.get_entries( DN(('cn', 'mapping tree'), ('cn', 'config')), filter='(objectclass=nsds5replicationagreement)', attrs_list=[status_attr, progress_attr]) logger.debug('Replication agreements: \n%s', _entries_to_ldif(entries)) statuses = [entry.single_value[status_attr] for entry in entries] wrong_statuses = [s for s in statuses if not re.match(target_status_re, s)] if any(e.single_value[progress_attr] == 'TRUE' for e in entries): msg = 'Replication not finished' logger.debug(msg) elif wrong_statuses: msg = 'Unexpected replication status: %s' % wrong_statuses[0] logger.debug(msg) else: logger.debug('Replication finished') return if time.time() - start > timeout: logger.error('Giving up wait for replication to finish') if raise_on_timeout: > raise AssertionError(msg) E AssertionError: Unexpected replication status: Error (11) Replication error acquiring replica: Unable to acquire replica: the replica has the same Replica ID as this one. Replication is aborting. (duplicate replica ID detected)
testing_master_latest Nightly PR #305, logs
The failure did not happen for the last 10+ runs on master branch, closing. Feel free to re-open if a new failure is seen.
Metadata Update from @frenaud: - Issue close_status updated to: worksforme - Issue status updated to: Closed (was: Open)
Login to comment on this ticket.