#310 Basic monitor API
Merged 6 months ago by jkaluza. Opened 7 months ago by fivaldi.
fivaldi/freshmaker fivaldi_monitoring  into  master

file modified
+8

@@ -29,6 +29,9 @@ 

  import moksha.hub

  

  from freshmaker import log, conf, messaging, events, app

+ from freshmaker.monitor import (

+     messaging_received_counter, messaging_received_ignored_counter,

+     messaging_received_passed_counter, messaging_received_failed_counter)

  from freshmaker.utils import load_classes

  

  

@@ -82,6 +85,8 @@ 

              super(FreshmakerConsumer, self).validate(message)

  

      def consume(self, message):

+         messaging_received_counter.inc()

+ 

          # Sometimes, the messages put into our queue are artificially put there

          # by other parts of our own codebase.  If they are already abstracted

          # messages, then just use them as-is.  If they are not already

@@ -95,6 +100,7 @@ 

          if not msg:

              # We do not log here anything, because it would create lot of

              # useless messages in the logs.

+             messaging_received_ignored_counter.inc()

              return

  

          # Primary work is done here.

@@ -109,7 +115,9 @@ 

              # to have global app_context.

              with app.app_context():

                  self.process_event(msg)

+             messaging_received_passed_counter.inc()

          except Exception:

+             messaging_received_failed_counter.inc()

              log.exception('Failed while handling {0!r}'.format(msg))

  

          if self.stop_condition and self.stop_condition(message):

file modified
+2

@@ -299,6 +299,7 @@ 

              return

  

          self.state = state

+         EventState(state).counter.inc()

          if state_reason is not None:

              self.state_reason = state_reason

  

@@ -494,6 +495,7 @@ 

              return

  

          self.state = state

+         ArtifactBuildState(state).counter.inc()

          self.state_reason = state_reason

          if self.state in [ArtifactBuildState.DONE.value,

                            ArtifactBuildState.FAILED.value,

file added
+159

@@ -0,0 +1,159 @@ 

+ # -*- coding: utf-8 -*-

+ # Copyright (c) 2018  Red Hat, Inc.

+ #

+ # Permission is hereby granted, free of charge, to any person obtaining a copy

+ # of this software and associated documentation files (the "Software"), to deal

+ # in the Software without restriction, including without limitation the rights

+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

+ # copies of the Software, and to permit persons to whom the Software is

+ # furnished to do so, subject to the following conditions:

+ #

+ # The above copyright notice and this permission notice shall be included in all

+ # copies or substantial portions of the Software.

+ #

+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

+ # SOFTWARE.

+ 

+ import os

+ import tempfile

+ 

+ from freshmaker import db

+ from flask import Response

+ from flask.views import MethodView

+ from prometheus_client import (

+     ProcessCollector, CollectorRegistry, Counter, multiprocess,

+     Histogram, generate_latest)

+ from sqlalchemy import event

+ 

+ 

+ if not os.environ.get('prometheus_multiproc_dir'):

+     os.environ.setdefault('prometheus_multiproc_dir', tempfile.mkdtemp())

+ registry = CollectorRegistry()

+ ProcessCollector(registry=registry)

+ multiprocess.MultiProcessCollector(registry)

+ 

+ messaging_received_counter = Counter(

+     'messaging_received',

+     'Total number of messages received',

+     registry=registry)

+ messaging_received_ignored_counter = Counter(

+     'messaging_received_ignored',

+     'Number of received messages, which were ignored',

+     registry=registry)

+ messaging_received_passed_counter = Counter(

+     'messaging_received_passed',

+     'Number of received messages, which were processed successfully',

+     registry=registry)

+ messaging_received_failed_counter = Counter(

+     'messaging_received_failed',

+     'Number of received messages, which failed during processing',

+     registry=registry)

+ 

+ db_dbapi_error_counter = Counter(

+     'db_dbapi_error',

+     'Number of DBAPI errors',

+     registry=registry)

+ db_engine_connect_counter = Counter(

+     'db_engine_connect',

+     'Number of \'engine_connect\' events',

+     registry=registry)

+ db_handle_error_counter = Counter(

+     'db_handle_error',

+     'Number of exceptions during connection',

+     registry=registry)

+ db_transaction_begin_counter = Counter(

+     'db_transaction_begin',

+     'Number of started transactions',

+     registry=registry)

+ db_transaction_commit_counter = Counter(

+     'db_transaction_commit',

+     'Number of transactions, which were committed',

+     registry=registry)

+ db_transaction_rollback_counter = Counter(

+     'db_transaction_rollback',

+     'Number of transactions, which were rolled back',

+     registry=registry)

+ 

+ freshmaker_artifact_build_done_counter = Counter(

+     'freshmaker_artifact_build_done',

+     'Number of successful artifact builds',

+     registry=registry)

+ freshmaker_artifact_build_failed_counter = Counter(

+     'freshmaker_artifact_build_failed',

+     'Number of artifact builds, which failed due to error(s)',

+     registry=registry)

+ freshmaker_artifact_build_canceled_counter = Counter(

+     'freshmaker_artifact_build_canceled',

+     'Number of artifact builds, which were canceled',

+     registry=registry)

+ 

+ 

+ freshmaker_event_complete_counter = Counter(

+     'freshmaker_event_complete',

+     'Number of successfully handled events',

+     registry=registry)

+ freshmaker_event_failed_counter = Counter(

+     'freshmaker_event_failed',

+     'Number of events, which failed due to error(s)',

+     registry=registry)

+ freshmaker_event_skipped_counter = Counter(

+     'freshmaker_event_skipped',

+     'Number of events, for which no action was taken',

+     registry=registry)

+ 

+ 

+ build_api_latency = Histogram(

+     'build_api_latency',

+     'BuildAPI latency', registry=registry)

+ event_api_latency = Histogram(

+     'event_api_latency',

+     'EventAPI latency', registry=registry)

+ 

+ 

+ @event.listens_for(db.engine, 'dbapi_error', named=True)

+ def receive_dbapi_error(**kw):

+     db_dbapi_error_counter.inc()

+ 

+ 

+ @event.listens_for(db.engine, 'engine_connect')

+ def receive_engine_connect(conn, branch):

+     db_engine_connect_counter.inc()

+ 

+ 

+ @event.listens_for(db.engine, 'handle_error')

+ def receive_handle_error(exception_context):

+     db_handle_error_counter.inc()

+ 

+ 

+ @event.listens_for(db.engine, 'begin')

+ def receive_begin(conn):

+     db_transaction_begin_counter.inc()

+ 

+ 

+ @event.listens_for(db.engine, 'commit')

+ def receive_commit(conn):

+     db_transaction_commit_counter.inc()

+ 

+ 

+ @event.listens_for(db.engine, 'rollback')

+ def receive_rollback(conn):

+     db_transaction_rollback_counter.inc()

+ 

+ 

+ class MonitorAPI(MethodView):

+     rest_api_v1 = {

+         'basic': {

+             'url': '/api/1/monitor/metrics/',

+             'options': {

+                 'methods': ['GET'],

+             }

+         }

+     }

+ 

+     def get(self):

+         return Response(generate_latest(registry))

file modified
+26 -10

@@ -20,6 +20,12 @@ 

  # SOFTWARE.

  

  from enum import Enum

+ from freshmaker.monitor import (

+     freshmaker_artifact_build_done_counter,

+     freshmaker_artifact_build_failed_counter,

+     freshmaker_artifact_build_canceled_counter,

+     freshmaker_event_complete_counter, freshmaker_event_failed_counter,

+     freshmaker_event_skipped_counter)

  

  

  class ArtifactType(Enum):

@@ -29,20 +35,30 @@ 

  

  

  class ArtifactBuildState(Enum):

-     BUILD = 0

-     DONE = 1

-     FAILED = 2

-     CANCELED = 3

-     PLANNED = 4

+ 

+     def __init__(self, value, counter):

+         self._value_ = value

+         self.counter = counter

+ 

+     BUILD = (0, None)

+     DONE = (1, freshmaker_artifact_build_done_counter)

+     FAILED = (2, freshmaker_artifact_build_failed_counter)

+     CANCELED = (3, freshmaker_artifact_build_canceled_counter)

+     PLANNED = (4, None)

  

  

  class EventState(Enum):

-     INITIALIZED = 0

+ 

+     def __init__(self, value, counter):

+         self._value_ = value

+         self.counter = counter

+ 

+     INITIALIZED = (0, None)

      # some artifacts has been found and under building

-     BUILDING = 1

+     BUILDING = (1, None)

      # event is handled successfully

-     COMPLETE = 2

+     COMPLETE = (2, freshmaker_event_complete_counter)

      # error happens while handling the event

-     FAILED = 3

+     FAILED = (3, freshmaker_event_failed_counter)

      # no action to take upon the event

-     SKIPPED = 4

+     SKIPPED = (4, freshmaker_event_skipped_counter)

file modified
+5

@@ -36,6 +36,7 @@ 

  from freshmaker.api_utils import pagination_metadata

  from freshmaker.auth import login_required, requires_role, require_scopes

  from freshmaker.parsers.internal.manual_rebuild import FreshmakerManualRebuildParser

+ from freshmaker.monitor import MonitorAPI, build_api_latency, event_api_latency

  

  api_v1 = {

      'event_types': {

@@ -119,6 +120,7 @@ 

              }

          },

      },

+     'monitor': MonitorAPI.rest_api_v1,

  }

  

  

@@ -186,6 +188,7 @@ 

  

  

  class EventAPI(MethodView):

+     @event_api_latency.time()

      def get(self, id):

          if id is None:

              p_query = filter_events(request)

@@ -206,6 +209,7 @@ 

  

  

  class BuildAPI(MethodView):

+     @build_api_latency.time()

      def get(self, id):

          if id is None:

              p_query = filter_artifact_builds(request)

@@ -276,6 +280,7 @@ 

      'event_types': EventTypeAPI,

      'build_types': BuildTypeAPI,

      'build_states': BuildStateAPI,

+     'monitor': MonitorAPI,

  }

  

  

file modified
+1

@@ -26,3 +26,4 @@ 

  koji

  tabulate

  lxml

+ prometheus_client

file added
+105

@@ -0,0 +1,105 @@ 

+ # -*- coding: utf-8 -*-

+ # Copyright (c) 2017  Red Hat, Inc.

+ #

+ # Permission is hereby granted, free of charge, to any person obtaining a copy

+ # of this software and associated documentation files (the "Software"), to deal

+ # in the Software without restriction, including without limitation the rights

+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

+ # copies of the Software, and to permit persons to whom the Software is

+ # furnished to do so, subject to the following conditions:

+ #

+ # The above copyright notice and this permission notice shall be included in all

+ # copies or substantial portions of the Software.

+ #

+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

+ # SOFTWARE.

+ 

+ import fedmsg.config

+ import mock

+ import freshmaker

+ 

+ from freshmaker import app, db, events, models, login_manager

+ from tests import helpers

+ 

+ 

+ @login_manager.user_loader

+ def user_loader(username):

+     return models.User.find_user_by_name(username=username)

+ 

+ 

+ class TestViews(helpers.ModelsTestCase):

+     def setUp(self):

+         super(TestViews, self).setUp()

+         self._init_data()

+         self.client = app.test_client()

+ 

+     def _init_data(self):

+         event = models.Event.create(db.session, "2017-00000000-0000-0000-0000-000000000001", "RHSA-2018-101", events.TestingEvent)

+         build = models.ArtifactBuild.create(db.session, event, "ed", "module", 1234)

+         build.build_args = '{"key": "value"}'

+         models.ArtifactBuild.create(db.session, event, "mksh", "module", 1235)

+         models.ArtifactBuild.create(db.session, event, "bash", "module", 1236)

+         models.Event.create(db.session, "2017-00000000-0000-0000-0000-000000000002", "RHSA-2018-102", events.TestingEvent)

+         db.session.commit()

+         db.session.expire_all()

+ 

+     def test_monitor_api_structure(self):

+         resp = self.client.get('/api/1/monitor/metrics/')

+         assert len([l.startswith('# TYPE')

+                     for l in resp.get_data(as_text=True).splitlines()]) == 104

+         assert len([l.startswith('# HELP')

+                     for l in resp.get_data(as_text=True).splitlines()]) == 104

+ 

+ 

+ class ConsumerTest(helpers.ModelsTestCase):

+     def setUp(self):

+         self.client = app.test_client()

+ 

+     def tearDown(self):

+         pass

+ 

+     def _create_consumer(self):

+         hub = mock.MagicMock()

+         hub.config = fedmsg.config.load_config()

+         hub.config['freshmakerconsumer'] = True

+         return freshmaker.consumer.FreshmakerConsumer(hub)

+ 

+     def _module_state_change_msg(self, state=None):

+         msg = {'body': {

+             "msg_id": "2017-7afcb214-cf82-4130-92d2-22f45cf59cf7",

+             "topic": "org.fedoraproject.prod.mbs.module.state.change",

+             "signature": "qRZ6oXBpKD/q8BTjBNa4MREkAPxT+KzI8Oret+TSKazGq/6gk0uuprdFpkfBXLR5dd4XDoh3NQWp\nyC74VYTDVqJR7IsEaqHtrv01x1qoguU/IRWnzrkGwqXm+Es4W0QZjHisBIRRZ4ywYBG+DtWuskvy\n6/5Mc3dXaUBcm5TnT0c=\n",

+             "msg": {

+                 "state": 5,

+                 "id": 70,

+                 "state_name": state or "ready"

+             }

+         }}

+ 

+         return msg

+ 

+     @mock.patch("freshmaker.handlers.mbs.module_state_change.MBSModuleStateChangeHandler.handle")

+     @mock.patch("freshmaker.consumer.get_global_consumer")

+     def test_consumer_processing_message(self, global_consumer, handle):

+         """

+         Tests that consumer parses the message, forwards the event

+         to proper handler and is able to get the further work from

+         the handler.

+         """

+         consumer = self._create_consumer()

+         global_consumer.return_value = consumer

+         handle.return_value = [freshmaker.events.TestingEvent("ModuleBuilt handled")]

+ 

+         msg = self._module_state_change_msg()

+         consumer.consume(msg)

+ 

+         event = consumer.incoming.get()

+         self.assertEqual(event.msg_id, "ModuleBuilt handled")

+ 

+         resp = self.client.get('/api/1/monitor/metrics/')

+         assert 'messaging_received_passed 1.0' in resp.get_data(as_text=True).splitlines()

no initial comment

rebased onto 5f394e0cbe60c41f8506dba3e825524d451d3ed6

7 months ago

@fivaldi Can you please write down some background or context of this monitor API? I think that could be helpful to understand the implementation.

I think I like how this is integrated, but I'm not sure what to do with prometheus_client dependency. So far we are installing only RPM dependencies in Freshmaker's container image and prometheus_client is not installed. I think I'm not against adding "pip install prometheus_client" there for Freshmaker, but for ODCS or MBS (which does not run in container image), it would be great to have package with prometheus_client.

Did you check if this is working properly? Because I tried to do the same, but it was every time creating a new temp directory and making mess.. so I just chose "/tmp". I'm not proposing to change this, I would just like to know if it is working properly for you, so maybe we can change it also in Greenwave.

I like it. +1
But there's no generic "metrics" endpoint right? You created specific endpoints for each type.

rebased onto 600fd75

6 months ago

Hi.

I changed the endpoint to /monitor/metrics. We can use /monitor/<...> so that we have the metrics and we can re-use the same API (soon) for other stuff, like logs, tracebacks, self-tests etc. as proposed in the Monitoring doc. As a benefit/side effect, we'll have it all under one hood.

@gnaponie The /tmp/.... dir (if set via the env var), should be the same for all workers.

Commit 5a9932c fixes this pull-request

Pull-Request has been merged by jkaluza

6 months ago

Pull-Request has been merged by jkaluza

6 months ago