#50585 Ticket 50584, 49212 - docker healthcheck and configuration
Closed 3 years ago by spichugi. Opened 4 years ago by firstyear.
firstyear/389-ds-base 50584-49212-healthcheck-and-config  into  master

@@ -75,4 +75,8 @@ 

  # Set the userup correctly.

  # USER dirsrv

  

+ HEALTHCHECK --start-period=5m --timeout=5s --interval=5s --retries=2 \

+     CMD /usr/sbin/dscontainer -H

+ 

  CMD [ "/usr/sbin/dscontainer", "-r" ]

+ 

file modified
+82 -11
@@ -27,6 +27,7 @@ 

  import pwd

  import atexit

  import os

+ import time

  import signal

  import sys

  import subprocess
@@ -42,6 +43,8 @@ 

  from lib389.paths import Paths

  from lib389._constants import DSRC_CONTAINER

  

+ from lib389.idm.directorymanager import DirectoryManager

+ 

  # We setup the logger in verbose mode to make sure debug info

  # is always available!

  log = setup_script_logger("container-init", True)
@@ -52,9 +55,34 @@ 

  # We take *args and **kwargs here to handle the fact that this signal gets args, but

  # we don't need or care about them.

  def _sigchild_handler(*args, **kwargs):

-     log.debug("Received SIGCHLD ...")

+     # log.debug("Received SIGCHLD ...")

      os.waitpid(-1, os.WNOHANG)

  

+ def _gen_instance():

+     inst = DirSrv(verbose=True)

+     inst.local_simple_allocate("localhost")

+     inst.setup_ldapi()

+     return inst

+ 

+ def _begin_environment_config():

+     inst = _gen_instance()

+     inst.open()

+     # TODO: Should we reset cn=Directory Manager from env?

+     dm_pass = os.getenv("DS_DM_PASSWORD", None)

+     if dm_pass is not None:

+         dm = DirectoryManager(inst)

+         dm.change_password(dm_pass)

+     # TODO: Should we set replica id from env?

+     # TODO: Should we set replication agreements from env?

+ 

+     inst.close()

+ 

+ def _begin_check_reindex():

+     if os.getenv('DS_REINDEX', None) is not None:

+         log.info("Reindexing database. This may take a while ...")

+         inst = _gen_instance()

+         inst.db2index()

+ 

  def begin_magic():

      log.info("The 389 Directory Server Container Bootstrap")

      # Leave this comment here: UofA let me take this code with me provided
@@ -182,29 +210,26 @@ 

              """)

          os.chmod(DSRC_CONTAINER, 0o755)

  

-     # TODO: All of this is contingent on the server starting *and*

-     # ldapi working ... Perhaps these are better inside ns-slapd core

-     # and we just proxy/filter the env through?

-     # TODO: Should we reset cn=Directory Manager from env?

-     # TODO: Should we set replica id from env?

-     # TODO: Should we set replication agreements from env?

-     # TODO: Should we allow re-indexing at startup from env?

+     # If we have been requested to re-index, do so now ...

+     _begin_check_reindex()

  

      # Yep! Run it ...

      # Now unlike a normal lib389 start, we use subprocess and don't fork!

      # TODO: Should we pass in a loglevel from env?

      log.info("Starting 389-ds-container ...")

  

+     # We can't use the instance "start" because we need the pid handler so we can do

+     # a wait/block on it. That's why we do the Popen here direct.

      global ds_proc

      ds_proc = subprocess.Popen([

          "%s/ns-slapd" % paths.sbin_dir,

          "-D", paths.config_dir,

+         "-i", "/data/run/slapd-localhost.pid",

          # See /ldap/servers/slapd/slap.h SLAPD_DEFAULT_ERRORLOG_LEVEL

          "-d", "266354688",

          ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

  

-     # To make sure we really do shutdown, we actually re-block on the proc

-     # again here to be sure it's done.

+     # Setup the process and shutdown handler in an init-esque fashion.

      def kill_ds():

          if ds_proc is None:

              pass
@@ -215,17 +240,53 @@ 

                  # It's already gone ...

                  pass

          log.info("STOPPING: Shutting down 389-ds-container ...")

+         # To make sure we really do shutdown, we actually re-block on the proc

+         # again here to be sure it's done.

          ds_proc.wait()

  

      atexit.register(kill_ds)

  

-     # Now wait ...

+     # Wait on the health check to show we are ready for ldapi.

+     failure_count = 0

+     max_failure_count = 5

+     for i in range(0, max_failure_count):

+         status = begin_healthcheck()

+         if status is True:

+             break

+         failure_count += 1

+         time.sleep(3)

+     if failure_count == max_failure_count:

+         log.error("389-ds-container failed to start")

+         sys.exit(1)

+ 

+     # Now via ldapi, set some values.

+     log.info("Applying environment configuration (if present) ...")

+     _begin_environment_config()

+ 

+     log.info("389-ds-container started.")

+ 

+     # Now block until we get shutdown! If we are signaled to exit, this

+     # will trigger the atexit handler from above.

      try:

          ds_proc.wait()

      except KeyboardInterrupt:

          pass

      # THE LETTER OF THE DAY IS C AND THE NUMBER IS 10

  

+ 

+ def begin_healthcheck():

+     # Is there an ns-slapd pid?

+     # Can we get ldapi response?

+     inst = _gen_instance()

+     if inst.status() is not True:

+         return False

+     # Now do an ldapi check, make sure we are dm.

+     inst.open()

+     if "dn: cn=Directory Manager" == inst.whoami_s():

+         return True

+     return False

+ 

+ 

  if __name__ == '__main__':

      # Before all else, we are INIT so setup sigchild

      signal.signal(signal.SIGCHLD, _sigchild_handler)
@@ -259,10 +320,20 @@ 

      parser.add_argument('-r', '--runit',

                          help="Actually run the instance! You understand what that means ...",

                          action='store_true', default=False, dest='runit')

+     parser.add_argument('-H', '--healthcheck',

+                         help="Start a healthcheck inside of the container for an instance. You should understand what this means ...",

+                         action='store_true', default=False, dest='healthcheck')

+ 

      argcomplete.autocomplete(parser)

  

      args = parser.parse_args()

  

      if args.runit:

          begin_magic()

+     elif args.healthcheck:

+         if begin_healthcheck() is True:

+             sys.exit(0)

+         else:

+             sys.exit(1)

+ 

  

@@ -465,6 +465,11 @@ 

          self.state = DIRSRV_STATE_ALLOCATED

          self.log.debug("Allocate local instance %s with %s", self.__class__, self.ldapuri)

  

+     def setup_ldapi(self):

+         self.ldapi_enabled = "on"

+         self.ldapi_socket = self.ds_paths.ldapi

+         self.ldapi_autobind = "on"

+ 

      def remote_simple_allocate(self, ldapuri, binddn='cn=Directory Manager', password=None):

          """Allocate an instance, and perform a simple bind. This instance is remote, so

          local tasks will not operate.
@@ -1219,15 +1224,15 @@ 

                                    "dirsrv@%s" % self.serverid])

              if rc == 0:

                  return True

-                 # This .... probably will mess something up

+                 # We don't reset the state here because we don't know what state

+                 # we are in re shutdown. The state is for us internally anyway.

                  # self.state = DIRSRV_STATE_RUNNING

              self.state = DIRSRV_STATE_OFFLINE

              return False

          else:

              self.log.debug("systemd status -> False")

-             # TODO: Make the pid path in the files things

-             # TODO: use the status call instead!!!!

              pid = pid_from_file(self.ds_paths.pid_file)

+             self.log.debug("pid file -> %s" % pid)

              if pid is None:

                  self.log.debug("No pidfile found for %s", self.serverid)

                  # No pidfile yet ...
@@ -1541,7 +1546,7 @@ 

          if self.ldapuri:

              return self.ldapuri

          elif self.ldapi_enabled == 'on' and self.ldapi_socket is not None:

-             return "ldapi://%s" % (ldapurl.ldapUrlEscape(ensure_str(ldapi_socket)))

+             return "ldapi://%s" % (ldapurl.ldapUrlEscape(ensure_str(self.ldapi_socket)))

          elif self.sslport and not self.realm:

              # Gssapi can't use SSL so we have to nuke it here.

              return "ldaps://%s:%d/" % (ensure_str(self.host), self.sslport)

file modified
+9 -1
@@ -9,7 +9,7 @@ 

  import sys

  import os

  

- from lib389._constants import DIRSRV_STATE_ONLINE

+ from lib389._constants import DIRSRV_STATE_ONLINE, DSRC_CONTAINER

  

  MAJOR, MINOR, _, _, _ = sys.version_info

  
@@ -108,6 +108,7 @@ 

          to know about paths, shouldn't need to have a copy of 389-ds-base

          installed to remotely admin a server.

          """

+         self._is_container = os.path.exists(DSRC_CONTAINER)

          self._defaults_cached = False

          self._config = None

          self._serverid = serverid
@@ -131,6 +132,10 @@ 

          spath = self._get_defaults_loc(DEFAULTS_PATH)

          self._config = configparser.ConfigParser()

          self._config.read([spath])

+         if self._is_container:

+             # Load some values over the top that are container specific

+             self._config.set(SECTION, "pid_file", "/data/run/slapd-localhost.pid")

+             self._config.set(SECTION, "ldapi", "/data/run/slapd-localhost.socket")

          self._defaults_cached = True

  

      def _validate_defaults(self):
@@ -175,6 +180,9 @@ 

          if self._defaults_cached is False:

              self._read_defaults()

              self._validate_defaults()

+         if self._is_container:

+             # We never have systemd in a container, so check the marker.

+             return False

          if self._config.has_option(SECTION, 'with_systemd'):

              if self._config.get(SECTION, 'with_systemd') == '1':

                  return True

Bug Description: Docker is managed by providing values from the
environment. To know if this is ready to make changes to our
local instance, we need to be able to check the instance is
healthy. In addition, docker has a health check process which
can allow monitoring and management of instances as they start.

Fix Description: This provides a healthcheck tool and allows
configuration by the envirnoment for the directory manager
password, and allows indicating via the env to perform a
db2index on startup.

https://pagure.io/389-ds-base/issue/49212
https://pagure.io/389-ds-base/issue/50584

Author: William Brown william@blackhats.net.au

Review by: ???

rebased onto 828ebf6

4 years ago

Pull-Request has been merged by firstyear

4 years ago

389-ds-base is moving from Pagure to Github. This means that new issues and pull requests
will be accepted only in 389-ds-base's github repository.

This pull request has been cloned to Github as issue and is available here:
- https://github.com/389ds/389-ds-base/issues/3641

If you want to continue to work on the PR, please navigate to the github issue,
download the patch from the attachments and file a new pull request.

Thank you for understanding. We apologize for all inconvenience.

Pull-Request has been closed by spichugi

3 years ago