#50821 Issue 49624 - DB Deadlock on modrdn appears to corrupt database and entry cache
Closed 3 years ago by spichugi. Opened 4 years ago by vashirov.
vashirov/389-ds-base ds49624  into  master

@@ -0,0 +1,90 @@ 

+ # --- BEGIN COPYRIGHT BLOCK ---

+ # Copyright (C) 2020 Red Hat, Inc.

+ # All rights reserved.

+ #

+ # License: GPL (version 3 or any later version).

+ # See LICENSE for details.

+ # --- END COPYRIGHT BLOCK ---

+ #

+ import os

+ import ldap

+ import pytest

+ import subprocess

+ from lib389.tasks import *

+ from lib389.utils import *

+ from lib389.topologies import topology_st

+ from lib389.idm.user import UserAccounts

+ from lib389._constants import DEFAULT_SUFFIX

+ 

+ pytestmark = pytest.mark.tier1

+ 

+ logging.getLogger(__name__).setLevel(logging.DEBUG)

+ log = logging.getLogger(__name__)

+ 

+ txn_testing_config = """[Service]

+ Environment=TXN_TESTING=1

+ Environment=TXN_TEST_VERBOSE=1

+ Environment=TXN_TEST_HOLD_MSEC=200

+ """

+ 

+ @pytest.fixture(scope='function')

I think this needs a test skip if systemd == false/0, because this may not work in containers.

+ def set_systemd_unit_env(request, topology_st):

+     systemd_dir_path=f'/etc/systemd/system/dirsrv@{topology_st.standalone.serverid}.service.d'

+     if not os.path.exists(systemd_dir_path):

+         os.mkdir(systemd_dir_path)

+     with open(f'{systemd_dir_path}/override.conf', 'w') as f:

+         f.write(txn_testing_config)

+     subprocess.run(["systemctl", "daemon-reload"])

+     topology_st.standalone.restart()

+ 

+     def fin():

+         if os.path.exists(systemd_dir_path):

+             os.remove(f'{systemd_dir_path}/override.conf')

+             os.rmdir(systemd_dir_path)

+ 

+     request.addfinalizer(fin)

+ 

+ 

+ @pytest.mark.ds49624

+ @pytest.mark.bz1744623

+ def test_db_deadlock_on_modrdn(topology_st, set_systemd_unit_env):

+     """Test that DB deadlock on MODRDN doesn't cause entry cache corruption

+ 

+     :id: 631b2be9-5c03-44c7-9853-a87c923d5b30

+ 

+     :setup: Standalone instance

+ 

+     :steps: 1. Set TXN_TESTING env variable for the current DS instance

+             2. Restart the instance

+             3. Do MODRDN on the same entry back and forth multiple times

+ 

+     :expectedresults:

+             1. Pass

+             2. Instance should be up and running

+             3. Operations should be successful

+     """

+ 

+     topo = topology_st.standalone

+     topo.config.set('nsslapd-auditlog-logging-enabled','on')

+     topo.config.set('nsslapd-auditfaillog-logging-enabled','on')

+     TEST_ENTRY_NAME = 'tuser'

+     users = UserAccounts(topo, DEFAULT_SUFFIX)

+     user_properties = {

+         'uid': TEST_ENTRY_NAME,

+         'cn': TEST_ENTRY_NAME,

+         'sn': TEST_ENTRY_NAME,

+         'uidNumber': '1001',

+         'gidNumber': '2001',

+         'homeDirectory': '/home/{}'.format(TEST_ENTRY_NAME)

+     }

+ 

+     tuser = users.create(properties=user_properties)

+     for i in range(100):

+         try:

+             tuser.rename('uid=tuser', newsuperior=DEFAULT_SUFFIX, deloldrdn=False)

+         except:

+             pass

Maybe I miss something but it looks a bit odd to me.

You say in the steps: Operations should be successful, but you skip all exceptions here... So, in my opinion, either the expectedresults step should be more accurate or we should have better defined try-except as here...

+         try:

+             tuser.rename('uid=tuser', newsuperior=f'ou=People,{DEFAULT_SUFFIX}', deloldrdn=False)

+         except:

+             pass

rebased onto 66301c0

4 years ago

Maybe I miss something but it looks a bit odd to me.

You say in the steps: Operations should be successful, but you skip all exceptions here... So, in my opinion, either the expectedresults step should be more accurate or we should have better defined try-except as here...

I think this needs a test skip if systemd == false/0, because this may not work in containers.

Maybe I miss something but it looks a bit odd to me.
You say in the steps: Operations should be successful, but you skip all exceptions here... So, in my opinion, either the expectedresults step should be more accurate or we should have better defined try-except as here...

This was a reproducer, not a proper test, there is no recovery step after the deadlock (i.e. the server will hang and instance removal might fail).
I'll update this test case to handle err=68 and add a proper cleanup.

I think this needs a test skip if systemd == false/0, because this may not work in containers.

Yes, I will add it, thanks.

389-ds-base is moving from Pagure to Github. This means that new issues and pull requests
will be accepted only in 389-ds-base's github repository.

This pull request has been cloned to Github as issue and is available here:
- https://github.com/389ds/389-ds-base/issues/3875

If you want to continue to work on the PR, please navigate to the github issue,
download the patch from the attachments and file a new pull request.

Thank you for understanding. We apologize for all inconvenience.

Pull-Request has been closed by spichugi

3 years ago
Metadata