From cf14d5e63263a57e0145b2d0516b79f210e279ef Mon Sep 17 00:00:00 2001 From: Thierry bordaz (tbordaz) Date: Oct 11 2017 15:24:04 +0000 Subject: Ticket 47595 - fail to detect/reinit already existing instance/backup Bug Description: We need to re-initialize an instance from a previously taken backup. lib389 had not the capability to test if an instance existed and a backup existed. Fix Description: Fix for backup/restore: - before restore clean up DB directory from previous '.db' files and backend dirs errors/access log files Testing existance of backup/instance - support offline dirsrv instance for offline access - add checkInstance/_offlineDirsrv/existsBackup/existsInstance - fix clearInstanceBackupFS Misc: - fix backup/restore functions (replica.changelog, instanceBackupFS, instanceRestoreFS) https://fedorahosted.org/389/ticket/47595 Reviewed by: Rich Megginson Platforms tested: Fedora 19 (jenkins) Flag Day: no Doc impact: no --- diff --git a/src/lib389/lib389/__init__.py b/src/lib389/lib389/__init__.py index 217136c..42672b4 100644 --- a/src/lib389/lib389/__init__.py +++ b/src/lib389/lib389/__init__.py @@ -176,10 +176,12 @@ class DirSrv(SimpleLDAPObject): ent = self.getEntry(DN_CONFIG, attrlist=[ 'nsslapd-instancedir', 'nsslapd-errorlog', + 'nsslapd-accesslog', 'nsslapd-certdir', 'nsslapd-schemadir']) - self.errlog = ent.getValue('nsslapd-errorlog') - self.confdir = ent.getValue('nsslapd-certdir') + self.errlog = ent.getValue('nsslapd-errorlog') + self.accesslog = ent.getValue('nsslapd-accesslog') + self.confdir = ent.getValue('nsslapd-certdir') if self.isLocal: if not self.confdir or not os.access(self.confdir + '/dse.ldif', os.R_OK): @@ -273,7 +275,7 @@ class DirSrv(SimpleLDAPObject): self.config = Config(self) self.index = Index(self) - def __init__(self, host='localhost', port=389, binddn='', bindpw='', serverId=None, nobind=False, sslport=0, verbose=False): # default to anon bind + def __init__(self, host='localhost', port=389, binddn='', bindpw='', serverId=None, nobind=False, sslport=0, verbose=False, offline=False): # default to anon bind """We just set our instance variables and wrap the methods. The real work is done in the following methods, reused during instance creation & co. @@ -295,13 +297,15 @@ class DirSrv(SimpleLDAPObject): self.isLocal = isLocalHost(host) self.serverId = serverId + # # dict caching DS structure # self.suffixes = {} self.agmt = {} - # the real init - self.__localinit__() + if not offline: + # the real init + self.__localinit__() self.log = log # add brookers self.__add_brookers__() diff --git a/src/lib389/lib389/brooker.py b/src/lib389/lib389/brooker.py index 935f5dc..932d9fc 100644 --- a/src/lib389/lib389/brooker.py +++ b/src/lib389/lib389/brooker.py @@ -216,7 +216,6 @@ class Agreement(object): @raise InvalidArgumentError - If the suffix is missing @raise NosuchEntryError - if a replica doesn't exist for that suffix - @raise ALREADY_EXISTS - If the replica agreement already exists @raise UNWILLING_TO_PERFORM if the database was previously in read-only state. To create new agreements you need to *restart* the directory server @@ -277,8 +276,8 @@ class Agreement(object): try: entry = self.conn.getEntry(dn_agreement, ldap.SCOPE_BASE) - self.log.warn("Agreement exists: %r" % dn_agreement) - raise ldap.ALREADY_EXISTS + self.log.warn("Agreement already exists: %r" % dn_agreement) + return dn_agreement except ldap.NO_SUCH_OBJECT: entry = None @@ -527,6 +526,7 @@ class Replica(object): 'nsslapd-changelogdir': dirpath }) self.log.debug("adding changelog entry: %r" % entry) + self.changelogdir = dirpath try: self.conn.add_s(entry) except ldap.ALREADY_EXISTS: diff --git a/src/lib389/lib389/tools.py b/src/lib389/lib389/tools.py index 3fef1e4..b4b741e 100644 --- a/src/lib389/lib389/tools.py +++ b/src/lib389/lib389/tools.py @@ -172,7 +172,11 @@ class DirSrvTools(object): timeout += int(time.time()) if cmd == 'stop': log.warn("unbinding before stop") - self.unbind() + try: + self.unbind() + except: + log.warn("Unbinding fails: Instance already down (stopped or killed) ?") + pass log.info("Setup error log") logfp = open(errLog, 'r') @@ -273,7 +277,7 @@ class DirSrvTools(object): return backup_dir, backup_pattern @staticmethod - def clearInstanceBackupFS(dirsrv, backup_file=None): + def clearInstanceBackupFS(dirsrv=None, backup_file=None): """ Remove a backup_file or all backup up of a given instance """ @@ -284,7 +288,7 @@ class DirSrvTools(object): except: log.info("clearInstanceBackupFS: fail to remove %s" % backup_file) pass - else: + elif dirsrv: backup_dir, backup_pattern = DirSrvTools._infoInstanceBackupFS(dirsrv) list_backup_files = glob.glob(backup_pattern) for f in list_backup_files: @@ -344,12 +348,16 @@ class DirSrvTools(object): # build the list of directories to scan instroot = "%s/slapd-%s" % (dirsrv.sroot, dirsrv.inst) ldir = [ instroot ] - if hasattr(dirsrv, 'confir'): + if hasattr(dirsrv, 'confdir'): ldir.append(dirsrv.confdir) if hasattr(dirsrv, 'dbdir'): ldir.append(dirsrv.dbdir) - if hasattr(dirsrv, 'changelogdb'): - ldir.append(dirsrv.changelogdb) + if hasattr(dirsrv, 'changelogdir'): + ldir.append(dirsrv.changelogdir) + if hasattr(dirsrv, 'errlog'): + ldir.append(os.path.dirname(dirsrv.errlog)) + if hasattr(dirsrv, 'accesslog') and os.path.dirname(dirsrv.accesslog) not in ldir: + ldir.append(os.path.dirname(dirsrv.accesslog)) # now scan the directory list to find the files to backup for dirToBackup in ldir: @@ -393,6 +401,29 @@ class DirSrvTools(object): log.warning("Unable to restore the instance (%s is not a file)" % backup_file) return 1 + # + # Second do some clean up + # + + # previous db (it may exists new db files not in the backup) + log.debug("instanceRestoreFS: remove subtree %s/*" % dirsrv.dbdir) + for root, dirs, files in os.walk(dirsrv.dbdir): + for d in dirs: + if d not in ("bak", "ldif"): + log.debug("instanceRestoreFS: before restore remove directory %s/%s" % (root, d)) + shutil.rmtree("%s/%s" % (root, d)) + + # previous error/access logs + log.debug("instanceRestoreFS: remove error logs %s" % dirsrv.errlog) + for f in glob.glob("%s*" % dirsrv.errlog): + log.debug("instanceRestoreFS: before restore remove file %s" % (f)) + os.remove(f) + log.debug("instanceRestoreFS: remove access logs %s" % dirsrv.accesslog) + for f in glob.glob("%s*" % dirsrv.accesslog): + log.debug("instanceRestoreFS: before restore remove file %s" % (f)) + os.remove(f) + + # Then restore from the directory where DS was deployed here = os.getcwd() os.chdir(dirsrv.prefix) @@ -415,9 +446,21 @@ class DirSrvTools(object): tar.close() + # + # Now be safe, triggers a recovery at restart + # + guardian_file = os.path.join(dirsrv.dbdir, "db/guardian") + if os.path.isfile(guardian_file): + try: + log.debug("instanceRestoreFS: remove %s" % guardian_file) + os.remove(guardian_file) + except: + log.warning("instanceRestoreFS: fail to remove %s" % guardian_file) + pass + + os.chdir(here) - @staticmethod def setupSSL(dirsrv, secport=636, sourcedir=None, secargs=None): """configure and setup SSL with a given certificate and restart the server. @@ -517,8 +560,60 @@ class DirSrvTools(object): os.system(cmd) except: log.exception("error executing %r" % cmd) + + @staticmethod + def _offlineDirsrv(args): + ''' + Function to allocate an offline DirSrv instance. + This instance is not initialized with the Directory instance + (__localinit__() and __add_brookers__() are not called) + The properties set are: + instance.host + instance.port + instance.serverId + instance.inst + instance.prefix + instance.backup + ''' + instance = lib389.DirSrv(host=args['newhost'], port=args['newport'], + serverId=args['newinstance'], offline=True) + instance.prefix = args.get('prefix', '/') + instance.backupdir = args.get('backupdir', '/tmp') + instance.inst = instance.serverId + return instance + + @staticmethod + def existsBackup(args): + ''' + If the backup of the instance exists, it returns it. + Else None + ''' + instance = DirSrvTools._offlineDirsrv(args) + return DirSrvTools.checkInstanceBackupFS(instance) + - + @staticmethod + def existsInstance(args): + ''' + Check if an instance exists. + It checks if the following directories/files exist: + /slapd- + + If it exists it returns a DirSrv instance NOT initialized, else None + ''' + instance = DirSrvTools._offlineDirsrv(args) + dirname = os.path.join(instance.prefix, "etc/dirsrv/slapd-%s" % instance.serverId) + errorlog = os.path.join(instance.prefix, "var/log/dirsrv/slapd-%s/errors" % instance.serverId) + sroot = os.path.join(instance.prefix, "lib/dirsrv") + if os.path.isdir(dirname) and \ + os.path.isfile(errorlog) and \ + os.path.isdir(sroot): + instance.sroot = sroot + instance.errlog = errorlog + return instance + + return None + @staticmethod def createInstance(args, verbose=0): """Create a new instance of directory server and return a connection to it. @@ -642,7 +737,8 @@ class DirSrvTools(object): return newconn except ldap.SERVER_DOWN: pass # not running - create new one - + + if not isLocal or 'cfgdshost' in args: for param in ('cfgdshost', 'cfgdsport', 'cfgdsuser', 'cfgdspwd', 'admin_domain'): if param not in args: