#51001 Issue 50545 - Port dbmon.sh to dsconf
Closed 3 years ago by spichugi. Opened 4 years ago by mreynolds.
mreynolds/389-ds-base issue50545  into  master

@@ -1,5 +1,5 @@ 

  # --- BEGIN COPYRIGHT BLOCK ---

- # Copyright (C) 2015 Red Hat, Inc.

+ # Copyright (C) 2020 Red Hat, Inc.

  # All rights reserved.

  #

  # License: GPL (version 3 or any later version).
@@ -85,6 +85,7 @@ 

  DN_MONITOR = "cn=monitor"

  DN_MONITOR_SNMP = "cn=snmp,cn=monitor"

  DN_MONITOR_LDBM = "cn=monitor,cn=ldbm database,cn=plugins,cn=config"

+ DN_MONITOR_DATABASE = "cn=database,cn=monitor,cn=ldbm database,cn=plugins,cn=config"

  DN_PWDSTORAGE_SCHEMES = "cn=Password Storage Schemes,cn=plugins,cn=config"

  

  CMD_PATH_SETUP_DS = "setup-ds.pl"

@@ -978,6 +978,7 @@ 

                      'nsslapd-db-transaction-wait',

                      'nsslapd-db-checkpoint-interval',

                      'nsslapd-db-compactdb-interval',

+                     'nsslapd-db-page-size',

                      'nsslapd-db-transaction-batch-val',

                      'nsslapd-db-transaction-batch-min-wait',

                      'nsslapd-db-transaction-batch-max-wait',

@@ -1,12 +1,13 @@ 

  # --- BEGIN COPYRIGHT BLOCK ---

  # Copyright (C) 2019 William Brown <william@blackhats.net.au>

- # Copyright (C) 2019 Red Hat, Inc.

+ # Copyright (C) 2020 Red Hat, Inc.

  # All rights reserved.

  #

  # License: GPL (version 3 or any later version).

  # See LICENSE for details.

  # --- END COPYRIGHT BLOCK ---

  

+ import datetime

  import json

  from lib389.monitor import (Monitor, MonitorLDBM, MonitorSNMP, MonitorDiskSpace)

  from lib389.chaining import (ChainingLinks)
@@ -30,8 +31,6 @@ 

          for be in bes.list():

              be_monitor = be.get_monitor()

              _format_status(log, be_monitor, args.json)

-             # Inejct a new line for now ... see https://pagure.io/389-ds-base/issue/50189

-             log.info("")

  

  

  def ldbm_monitor(inst, basedn, log, args):
@@ -57,6 +56,7 @@ 

              # Inject a new line for now ... see https://pagure.io/389-ds-base/issue/50189

              log.info("")

  

+ 

  def disk_monitor(inst, basedn, log, args):

      disk_space_mon = MonitorDiskSpace(inst)

      disks = disk_space_mon.get_disks()
@@ -88,6 +88,204 @@ 

          log.info(json.dumps({"type": "list", "items": disk_list}, indent=4))

  

  

+ def db_monitor(inst, basedn, log, args):

+     """Report on all the database statistics

+     """

+     ldbm_monitor = MonitorLDBM(inst)

+     backends_obj = Backends(inst)

+     backend_objs = []

+     args_backends = None

+ 

+     # Gather all the backends

+     if args.backends is not None:

+         # This is a space separated list, it could be backend names or suffixes

+         args_backends = args.backends.lower().split()

+ 

+     for be in backends_obj.list():

+         if args_backends is not None:

+             for arg_be in args_backends:

+                 if '=' in arg_be:

+                     # We have a suffix

+                     if arg_be == be.get_suffix():

+                         backend_objs.append(be)

+                         break

+                 else:

+                     # We have a backend name

+                     if arg_be == be.rdn.lower():

+                         backend_objs.append(be)

+                         break

+         else:

+             # Get all the backends

+             backend_objs.append(be)

+ 

+     if args_backends is not None and len(backend_objs) == 0:

+         raise ValueError("Could not find any backends from the provided list: {}".format(args.backends))

+ 

+     # Gather the global DB stats

+     report_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

+     ldbm_mon = ldbm_monitor.get_status()

+     dbcachesize = int(ldbm_mon['nsslapd-db-cache-size-bytes'][0])

+     if 'nsslapd-db-page-size' in ldbm_mon:

+         pagesize = int(ldbm_mon['nsslapd-db-page-size'][0])

+     else:

+         pagesize = 8 * 1024  # Taken from DBLAYER_PAGESIZE

+     dbhitratio = ldbm_mon['dbcachehitratio'][0]

+     dbcachepagein = ldbm_mon['dbcachepagein'][0]

+     dbcachepageout = ldbm_mon['dbcachepageout'][0]

+     dbroevict = ldbm_mon['nsslapd-db-page-ro-evict-rate'][0]

+     dbpages = int(ldbm_mon['nsslapd-db-pages-in-use'][0])

+     dbcachefree = int(dbcachesize - (pagesize * dbpages))

+     dbcachefreeratio = dbcachefree/dbcachesize

+     ndnratio = ldbm_mon['normalizeddncachehitratio'][0]

+     ndncursize = int(ldbm_mon['currentnormalizeddncachesize'][0])

+     ndnmaxsize = int(ldbm_mon['maxnormalizeddncachesize'][0])

+     ndncount = ldbm_mon['currentnormalizeddncachecount'][0]

+     ndnevictions = ldbm_mon['normalizeddncacheevictions'][0]

+     if ndncursize > ndnmaxsize:

+         ndnfree = 0

+         ndnfreeratio = 0

+     else:

+         ndnfree = ndnmaxsize - ndncursize

+         ndnfreeratio = "{:.1f}".format(ndnfree / ndnmaxsize * 100)

+ 

+     # Build global cache stats

+     result = {

+         'date': report_time,

+         'dbcache': {

+             'hit_ratio': dbhitratio,

+             'free': convert_bytes(str(dbcachefree)),

+             'free_percentage': "{:.1f}".format(dbcachefreeratio * 100),

+             'roevicts': dbroevict,

+             'pagein': dbcachepagein,

+             'pageout': dbcachepageout

+         },

+         'ndncache': {

+             'hit_ratio': ndnratio,

+             'free': convert_bytes(str(ndnfree)),

+             'free_percentage': ndnfreeratio,

+             'count': ndncount,

+             'evictions': ndnevictions

+         },

+         'backends': {},

+     }

+ 

+     # Build the backend results

+     for be in backend_objs:

+         be_name = be.rdn

+         be_suffix = be.get_suffix()

+         monitor = be.get_monitor()

+         all_attrs = monitor.get_status()

+ 

+         # Process entry cache stats

+         entcur = int(all_attrs['currententrycachesize'][0])

+         entmax = int(all_attrs['maxentrycachesize'][0])

+         entcnt = int(all_attrs['currententrycachecount'][0])

+         entratio = all_attrs['entrycachehitratio'][0]

+         entfree = entmax - entcur

+         entfreep = "{:.1f}".format(entfree / entmax * 100)

+         if entcnt == 0:

+             entsize = 0

+         else:

+             entsize = int(entcur / entcnt)

+ 

+         # Process DN cache stats

+         dncur = int(all_attrs['currentdncachesize'][0])

+         dnmax = int(all_attrs['maxdncachesize'][0])

+         dncnt = int(all_attrs['currentdncachecount'][0])

+         dnratio = all_attrs['dncachehitratio'][0]

+         dnfree = dnmax - dncur

+         dnfreep = "{:.1f}".format(dnfree / dnmax * 100)

+         if dncnt == 0:

+             dnsize = 0

+         else:

+             dnsize = int(dncur / dncnt)

+ 

+         # Build the backend result

+         result['backends'][be_name] = {

+             'suffix': be_suffix,

+             'entry_cache_count': all_attrs['currententrycachecount'][0],

+             'entry_cache_free': convert_bytes(str(entfree)),

+             'entry_cache_free_percentage': entfreep,

+             'entry_cache_size': convert_bytes(str(entsize)),

+             'entry_cache_hit_ratio': entratio,

+             'dn_cache_count': all_attrs['currentdncachecount'][0],

+             'dn_cache_free': convert_bytes(str(dnfree)),

+             'dn_cache_free_percentage': dnfreep,

+             'dn_cache_size': convert_bytes(str(dnsize)),

+             'dn_cache_hit_ratio': dnratio,

+             'indexes': []

+         }

+ 

+         # Process indexes if requested

+         if args.indexes:

+             index = {}

+             index_name = ''

+             for attr, val in all_attrs.items():

+                 if attr.startswith('dbfile'):

+                     if attr.startswith("dbfilename-"):

+                         if index_name != '':

+                             # Update backend index list

+                             result['backends'][be_name]['indexes'].append(index)

+                         index_name = val[0].split('/')[1]

+                         index = {'name': index_name}

+                     elif attr.startswith('dbfilecachehit-'):

+                         index['cachehit'] = val[0]

+                     elif attr.startswith('dbfilecachemiss-'):

+                         index['cachemiss'] = val[0]

+                     elif attr.startswith('dbfilepagein-'):

+                         index['pagein'] = val[0]

+                     elif attr.startswith('dbfilepageout-'):

+                         index['pageout'] = val[0]

+             if index_name != '':

+                 # Update backend index list

+                 result['backends'][be_name]['indexes'].append(index)

+ 

+     # Return the report

+     if args.json:

+         log.info(json.dumps(result, indent=4))

+     else:

+         log.info("DB Monitor Report: " + result['date'])

+         log.info("--------------------------------------------------------")

+         log.info("Database Cache:")

+         log.info(" - Cache Hit Ratio:     {}%".format(result['dbcache']['hit_ratio']))

+         log.info(" - Free Space:          {}".format(result['dbcache']['free']))

+         log.info(" - Free Percentage:     {}%".format(result['dbcache']['free_percentage']))

+         log.info(" - RO Page Drops:       {}".format(result['dbcache']['roevicts']))

+         log.info(" - Pages In:            {}".format(result['dbcache']['pagein']))

+         log.info(" - Pages Out:           {}".format(result['dbcache']['pageout']))

+         log.info("")

+         log.info("Normalized DN Cache:")

+         log.info(" - Cache Hit Ratio:     {}%".format(result['ndncache']['hit_ratio']))

+         log.info(" - Free Space:          {}".format(result['ndncache']['free']))

+         log.info(" - Free Percentage:     {}%".format(result['ndncache']['free_percentage']))

+         log.info(" - DN Count:            {}".format(result['ndncache']['count']))

+         log.info(" - Evictions:           {}".format(result['ndncache']['evictions']))

+         log.info("")

+         log.info("Backends:")

+         for be_name, attr_dict in result['backends'].items():

+             log.info(f"  - {attr_dict['suffix']} ({be_name}):")

+             log.info("    - Entry Cache Hit Ratio:        {}%".format(attr_dict['entry_cache_hit_ratio']))

+             log.info("    - Entry Cache Count:            {}".format(attr_dict['entry_cache_count']))

+             log.info("    - Entry Cache Free Space:       {}".format(attr_dict['entry_cache_free']))

+             log.info("    - Entry Cache Free Percentage:  {}%".format(attr_dict['entry_cache_free_percentage']))

+             log.info("    - Entry Cache Average Size:     {}".format(attr_dict['entry_cache_size']))

+             log.info("    - DN Cache Hit Ratio:           {}%".format(attr_dict['dn_cache_hit_ratio']))

+             log.info("    - DN Cache Count:               {}".format(attr_dict['dn_cache_count']))

+             log.info("    - DN Cache Free Space:          {}".format(attr_dict['dn_cache_free']))

+             log.info("    - DN Cache Free Percentage:     {}%".format(attr_dict['dn_cache_free_percentage']))

+             log.info("    - DN Cache Average Size:        {}".format(attr_dict['dn_cache_size']))

+             if len(result['backends'][be_name]['indexes']) > 0:

+                 log.info("    - Indexes:")

+                 for index in result['backends'][be_name]['indexes']:

+                     log.info("      - Index:      {}".format(index['name']))

+                     log.info("      - Cache Hit:  {}".format(index['cachehit']))

+                     log.info("      - Cache Miss: {}".format(index['cachemiss']))

+                     log.info("      - Page In:    {}".format(index['pagein']))

+                     log.info("      - Page Out:   {}".format(index['pageout']))

+                     log.info("")

+             log.info("")

+ 

+ 

  def create_parser(subparsers):

      monitor_parser = subparsers.add_parser('monitor', help="Monitor the state of the instance")

      subcommands = monitor_parser.add_subparsers(help='action')
@@ -95,10 +293,16 @@ 

      server_parser = subcommands.add_parser('server', help="Monitor the server statistics, connections and operations")

      server_parser.set_defaults(func=monitor)

  

+     dbmon_parser = subcommands.add_parser('dbmon', help="Monitor the all the database statistics in a single report")

+     dbmon_parser.set_defaults(func=db_monitor)

+     dbmon_parser.add_argument('-i', '--incr', type=int, help="Keep refreshing the report every N seconds")

+     dbmon_parser.add_argument('-b', '--backends', help="List of space separated backends to monitor.  Default is all backends.")

+     dbmon_parser.add_argument('-x', '--indexes', action='store_true', default=False, help="Show index stats for each backend")

+ 

      ldbm_parser = subcommands.add_parser('ldbm', help="Monitor the ldbm statistics, such as dbcache")

      ldbm_parser.set_defaults(func=ldbm_monitor)

  

-     backend_parser = subcommands.add_parser('backend', help="Monitor the behaviour of a backend database")

+     backend_parser = subcommands.add_parser('backend', help="Monitor the behavior of a backend database")

      backend_parser.add_argument('backend', nargs='?', help="Optional name of the backend to monitor")

      backend_parser.set_defaults(func=backend_monitor)

  

file modified
+2 -2
@@ -1,5 +1,5 @@ 

  # --- BEGIN COPYRIGHT BLOCK ---

- # Copyright (C) 2019 Red Hat, Inc.

+ # Copyright (C) 2020 Red Hat, Inc.

  # All rights reserved.

  #

  # License: GPL (version 3 or any later version).
@@ -504,7 +504,7 @@ 

      def __init__(self, conn):

          super(BDB_LDBMConfig, self).__init__(instance=conn)

          self._dn = DN_CONFIG_LDBM_BDB

-         config_compare_exclude = []

+         self._config_compare_exclude = []

          self._rdn_attribute = 'cn'

          self._lint_functions = []

          self._protected = True

file modified
+106 -4
@@ -1,5 +1,5 @@ 

  # --- BEGIN COPYRIGHT BLOCK ---

- # Copyright (C) 2019 Red Hat, Inc.

+ # Copyright (C) 2020 Red Hat, Inc.

  # All rights reserved.

  #

  # License: GPL (version 3 or any later version).
@@ -107,9 +107,15 @@ 

  

  

  class MonitorLDBM(DSLdapObject):

+     """An object that helps reading the global database statistics.

+         :param instance: An instance

+         :type instance: lib389.DirSrv

+         :param dn: not used

+     """

      def __init__(self, instance, dn=None):

          super(MonitorLDBM, self).__init__(instance=instance)

          self._dn = DN_MONITOR_LDBM

+         self._db_mon = MonitorDatabase(instance)

          self._backend_keys = [

              'dbcachehits',

              'dbcachetries',
@@ -119,6 +125,42 @@ 

              'dbcacheroevict',

              'dbcacherwevict',

          ]

+         self._db_mon_keys = [

+             'nsslapd-db-abort-rate',

+             'nsslapd-db-active-txns',

+             'nsslapd-db-cache-hit',

+             'nsslapd-db-cache-try',

+             'nsslapd-db-cache-region-wait-rate',

+             'nsslapd-db-cache-size-bytes',

+             'nsslapd-db-clean-pages',

+             'nsslapd-db-commit-rate',

+             'nsslapd-db-deadlock-rate',

+             'nsslapd-db-dirty-pages',

+             'nsslapd-db-hash-buckets',

+             'nsslapd-db-hash-elements-examine-rate',

+             'nsslapd-db-hash-search-rate',

+             'nsslapd-db-lock-conflicts',

+             'nsslapd-db-lock-region-wait-rate',

+             'nsslapd-db-lock-request-rate',

+             'nsslapd-db-lockers',

+             'nsslapd-db-configured-locks',

+             'nsslapd-db-current-locks',

+             'nsslapd-db-max-locks',

+             'nsslapd-db-current-lock-objects',

+             'nsslapd-db-max-lock-objects',

+             'nsslapd-db-log-bytes-since-checkpoint',

+             'nsslapd-db-log-region-wait-rate',

+             'nsslapd-db-log-write-rate',

+             'nsslapd-db-longest-chain-length',

+             'nsslapd-db-page-create-rate',

+             'nsslapd-db-page-read-rate',

+             'nsslapd-db-page-ro-evict-rate',

+             'nsslapd-db-page-rw-evict-rate',

+             'nsslapd-db-page-trickle-rate',

+             'nsslapd-db-page-write-rate',

+             'nsslapd-db-pages-in-use',

+             'nsslapd-db-txn-region-wait-rate',

+         ]

          if not ds_is_older("1.4.0", instance=instance):

              self._backend_keys.extend([

                  'normalizeddncachetries',
@@ -134,6 +176,58 @@ 

              ])

  

      def get_status(self, use_json=False):

+         ldbm_dict = self.get_attrs_vals_utf8(self._backend_keys)

+         db_dict = self._db_mon.get_attrs_vals_utf8(self._db_mon_keys)

+         return {**ldbm_dict, **db_dict}

+ 

+ 

+ class MonitorDatabase(DSLdapObject):

+     """An object that helps reading the global libdb(bdb) statistics.

+         :param instance: An instance

+         :type instance: lib389.DirSrv

+         :param dn: not used

+     """

+     def __init__(self, instance, dn=None):

+         super(MonitorDatabase, self).__init__(instance=instance)

+         self._dn = DN_MONITOR_DATABASE

+         self._backend_keys = [

+             'nsslapd-db-abort-rate',

+             'nsslapd-db-active-txns',

+             'nsslapd-db-cache-hit',

+             'nsslapd-db-cache-try',

+             'nsslapd-db-cache-region-wait-rate',

+             'nsslapd-db-cache-size-bytes',

+             'nsslapd-db-clean-pages',

+             'nsslapd-db-commit-rate',

+             'nsslapd-db-deadlock-rate',

+             'nsslapd-db-dirty-pages',

+             'nsslapd-db-hash-buckets',

+             'nsslapd-db-hash-elements-examine-rate',

+             'nsslapd-db-hash-search-rate',

+             'nsslapd-db-lock-conflicts',

+             'nsslapd-db-lock-region-wait-rate',

+             'nsslapd-db-lock-request-rate',

+             'nsslapd-db-lockers',

+             'nsslapd-db-configured-locks',

+             'nsslapd-db-current-locks',

+             'nsslapd-db-max-locks',

+             'nsslapd-db-current-lock-objects',

+             'nsslapd-db-max-lock-objects',

+             'nsslapd-db-log-bytes-since-checkpoint',

+             'nsslapd-db-log-region-wait-rate',

+             'nsslapd-db-log-write-rate',

+             'nsslapd-db-longest-chain-length',

+             'nsslapd-db-page-create-rate',

+             'nsslapd-db-page-read-rate',

+             'nsslapd-db-page-ro-evict-rate',

+             'nsslapd-db-page-rw-evict-rate',

+             'nsslapd-db-page-trickle-rate',

+             'nsslapd-db-page-write-rate',

+             'nsslapd-db-pages-in-use',

+             'nsslapd-db-txn-region-wait-rate',

+        ]

+ 

+     def get_status(self, use_json=False):

          return self.get_attrs_vals_utf8(self._backend_keys)

  

  
@@ -172,10 +266,18 @@ 

                  'currentnormalizeddncachecount'

              ])

  

-     # Issue: get status should return a dict and the called should be

-     # formatting it. See: https://pagure.io/389-ds-base/issue/50189

      def get_status(self, use_json=False):

-         return self.get_attrs_vals_utf8(self._backend_keys)

+         result = {}

+         all_attrs = self.get_all_attrs_utf8()

+         for attr in self._backend_keys:

+             result[attr] = all_attrs[attr]

+ 

+         # Now gather all the dbfile* attributes

+         for attr, val in all_attrs.items():

+             if attr.startswith('dbfile'):

+                 result[attr] = val

+ 

+         return result

  

  

  class MonitorChaining(DSLdapObject):

Description:

dbmon.sh has been ported to dsconf with basically the same feature set. You can continuously refresh the report at a specified interval, you can choose to get stats one multiple/specific backends, and the option to display individual index stats. There is now a human friendly report and a JSON version.

There was also other improvements made to lib389 to take into account the new bdb split configuration under cn=confg.

Design Doc: https://www.port389.org/docs/389ds/design/dbmon-design.html

Relates: https://pagure.io/389-ds-base/issue/50545

I'm happy with most of this, but I'm not sure about the while True loop - can't we just use "watch command" instead if someone wants this feature? What if someone just wants the monitor output once for a command like in nagios etc?

If I understand correctly, you have fixed the issue - https://pagure.io/389-ds-base/issue/50189
Not 'get_monitor' returns a dict. Could you please mention that in the issue and clean the cooments?

The rest looks good to me

I'm happy with most of this, but I'm not sure about the while True loop - can't we just use "watch command" instead if someone wants this feature? What if someone just wants the monitor output once for a command like in nagios etc?

I don't know what you mean by "watch command". If you don't specify the "incr" then it prints the report once and exits.

If I understand correctly, you have fixed the issue - https://pagure.io/389-ds-base/issue/50189

Oh I guess I did, I didn't know this ticket was out there. So I fixed it by accident...

Now 'get_monitor' returns a dict. Could you please mention that in the issue and clean the cooments?

Yes I will update comments and tickets...

rebased onto a2d9f88a48ab687d457b5a4b639b017b6a1069e0

4 years ago

I'm happy with most of this, but I'm not sure about the while True loop - can't we just use "watch command" instead if someone wants this feature? What if someone just wants the monitor output once for a command like in nagios etc?

I don't know what you mean by "watch command". If you don't specify the "incr" then it prints the report once and exits.

For more information, I added the whole "--incr" option as that was available in dbmon.sh. I was trying to maintain/port the same functionality. Are suggesting to just remove it entirely?

Yeah, I think we shouldn't have the --incr?

Okay, question is - do we need 100% feature parity? Or can we trim something that we don't really need to maintain ourself? I guess that's the question here. I'll leave it up to you but I think "looping" is not our problem, that's up to tools like watch. :) I'll leave it to you to decide though....

Okay, question is - do we need 100% feature parity? Or can we trim something that we don't really need to maintain ourself? I guess that's the question here. I'll leave it up to you but I think "looping" is not our problem, that's up to tools like watch. :) I'll leave it to you to decide though....

To be honest, I know dbmon.sh was used by a lot of people, but I have no idea if the loop was a commonly used feature. I agree though there is no need to build in the loop, as there are other ways to accomplish the same thing. I'll remove it, and if people start to complain then we can discuss maybe adding it back.

rebased onto 2523f48

4 years ago

Pull-Request has been merged by mreynolds

4 years ago

Sounds like a plan, thanks!

389-ds-base is moving from Pagure to Github. This means that new issues and pull requests
will be accepted only in 389-ds-base's github repository.

This pull request has been cloned to Github as issue and is available here:
- https://github.com/389ds/389-ds-base/issues/4054

If you want to continue to work on the PR, please navigate to the github issue,
download the patch from the attachments and file a new pull request.

Thank you for understanding. We apologize for all inconvenience.

Pull-Request has been closed by spichugi

3 years ago