#1340 Added an REST endpoint to display log messages
Merged 4 years ago by jkaluza. Opened 4 years ago by mcurlej.
mcurlej/fm-orchestrator reused_reason  into  master

@@ -0,0 +1,30 @@ 

+ """add log_messages table

+ 

+ Revision ID: 0b00036c540f

+ Revises: 40b2c7d988d7

+ Create Date: 2019-07-14 07:24:21.059144

+ 

+ """

+ 

+ # revision identifiers, used by Alembic.

+ revision = "0b00036c540f"

+ down_revision = "40b2c7d988d7"

+ 

+ from alembic import op

+ import sqlalchemy as sa

+ 

+ 

+ def upgrade():

+     op.create_table("log_messages",

+         sa.Column("id", sa.Integer(), nullable=False),

+         sa.Column("component_build_id", sa.Integer(), nullable=True),

+         sa.Column("module_build_id", sa.Integer(), nullable=False),

+         sa.Column("message", sa.String(), nullable=False),

+         sa.ForeignKeyConstraint(["component_build_id"], ["component_builds.id"], ),

+         sa.ForeignKeyConstraint(["module_build_id"], ["module_builds.id"], ),

+         sa.PrimaryKeyConstraint("id"),

+     )

+ 

+ 

+ def downgrade():

+     op.drop_table("log_messages")

@@ -276,6 +276,7 @@ 

          "VirtualStream", secondary=module_builds_to_virtual_streams, back_populates="module_builds")

      reused_module_id = db.Column(db.Integer, db.ForeignKey("module_builds.id"))

      reused_module = db.relationship("ModuleBuild", remote_side="ModuleBuild.id")

+     log_messages = db.relationship("LogMessage", backref="module_build", lazy="dynamic")

  

      # List of arches against which the module is built.

      # NOTE: It is not filled for imported modules, because imported module builds have not been
@@ -965,6 +966,17 @@ 

  

          return rv

  

+     def log_message(self, session, message):

+         log.info(message)

+         log_msg = LogMessage(

+             component_build_id=None,

+             module_build_id=self.id,

+             message=message,

+             time_created=datetime.utcnow(),

+         )

+         session.add(log_msg)

+         session.commit()

+ 

      def tasks(self, db_session):

          """

          :return: dictionary containing the tasks associated with the build
@@ -1176,6 +1188,7 @@ 

      module_id = db.Column(db.Integer, db.ForeignKey("module_builds.id"), nullable=False)

      module_build = db.relationship("ModuleBuild", backref="component_builds", lazy=False)

      reused_component_id = db.Column(db.Integer, db.ForeignKey("component_builds.id"))

+     log_messages = db.relationship("LogMessage", backref="component_build", lazy="dynamic")

  

      # Weight defines the complexity of the component build as calculated by the builder's

      # get_build_weights function
@@ -1261,6 +1274,18 @@ 

  

          return json

  

+     def log_message(self, session, message):

+         log.info(message)

+         log_msg = LogMessage(

+             component_build_id=self.id,

+             module_build_id=self.module_id,

+             message=message,

+             time_created=datetime.utcnow(),

+         )

+ 

+         session.add(log_msg)

+         session.commit()

+ 

      def __repr__(self):

          return "<ComponentBuild %s, %r, state: %r, task_id: %r, batch: %r, state_reason: %s>" % (

              self.package,
@@ -1311,6 +1336,36 @@ 

          )

  

  

+ class LogMessage(MBSBase):

+     __tablename__ = "log_messages"

+     id = db.Column(db.Integer, primary_key=True)

+     component_build_id = db.Column(db.Integer, db.ForeignKey("component_builds.id"), nullable=True)

+     module_build_id = db.Column(db.Integer, db.ForeignKey("module_builds.id"), nullable=False)

+     message = db.Column(db.String, nullable=False)

+     time_created = db.Column(db.DateTime, nullable=False)

+ 

+     def json(self):

+         retval = {

+             "id": self.id,

+             "time_created": _utc_datetime_to_iso(self.time_created),

+             "message": self.message,

+         }

+ 

+         return retval

+ 

+     def __repr__(self):

+         return (

+             "<LogMessage %s, component_build_id: %s, module_build_id: %r, message: %s,"

+             " time_created: %s>"

+         ) % (

+             self.id,

+             self.component_build_id,

+             self.module_build_id,

+             self.message,

+             self.time_created,

+         )

+ 

+ 

  def session_before_commit_handlers(session):

      # new and updated items

      for item in set(session.new) | set(session.dirty):

@@ -252,12 +252,19 @@ 

  

      # If the rebuild strategy is "all", that means that nothing can be reused

      if module.rebuild_strategy == "all":

-         log.info('Cannot re-use the component because the rebuild strategy is "all".')

+         message = ("Cannot reuse the component {component_name} because the module "

+                    "rebuild strategy is \"all\".").format(

+                        component_name=component_name)

+         module.log_message(db_session, message)

          return None

  

      if not previous_module_build:

          previous_module_build = get_reusable_module(db_session, module)

          if not previous_module_build:

+             message = ("Cannot reuse because no previous build of "

+                        "module {module_name} found!").format(

+                 module_name=module.name)

+             module.log_message(db_session, message)

              return None

  

      if not mmd:
@@ -275,7 +282,9 @@ 

          or not new_module_build_component.batch

          or not new_module_build_component.ref

      ):

-         log.info("Cannot re-use.  New component not found in the db.")

+         message = ("Cannot reuse the component {} because it can't be found in the "

+                    "database").format(component_name)

+         module.log_message(db_session, message)

          return None

  

      prev_module_build_component = models.ComponentBuild.from_component_name(
@@ -289,13 +298,17 @@ 

          or not prev_module_build_component.batch

          or not prev_module_build_component.ref

      ):

-         log.info("Cannot re-use.  Previous component not found in the db.")

+         message = ("Cannot reuse the component {} because a previous build of "

+                    "it can't be found in the database").format(component_name)

+         new_module_build_component.log_message(db_session, message)

          return None

  

      # Make sure the ref for the component that is trying to be reused

      # hasn't changed since the last build

      if prev_module_build_component.ref != new_module_build_component.ref:

-         log.info("Cannot re-use.  Component commit hashes do not match.")

+         message = ("Cannot reuse the component because the commit hash changed"

+                    " since the last build")

+         new_module_build_component.log_message(db_session, message)

          return None

  

      # At this point we've determined that both module builds contain the component
@@ -304,7 +317,9 @@ 

          # Make sure the batch number for the component that is trying to be reused

          # hasn't changed since the last build

          if prev_module_build_component.batch != new_module_build_component.batch:

-             log.info("Cannot re-use.  Batch numbers do not match.")

+             message = ("Cannot reuse the component because it is being built in "

+                        "a different batch than in the compatible module build")

+             new_module_build_component.log_message(db_session, message)

              return None

  

          # If the mmd.buildopts.macros.rpms changed, we cannot reuse
@@ -321,7 +336,9 @@ 

              old_modulemd_macros = None

  

          if modulemd_macros != old_modulemd_macros:

-             log.info("Cannot re-use.  Old modulemd macros do not match the new.")

+             message = ("Cannot reuse the component because the modulemd's macros are"

+                        " different than those of the compatible module build")

+             new_module_build_component.log_message(db_session, message)

              return None

  

          # At this point we've determined that both module builds contain the component
@@ -363,16 +380,22 @@ 

          # If the previous batches don't have the same ordering and hashes, then the

          # component can't be reused

          if previous_module_build_components != new_module_build_components:

-             log.info("Cannot re-use.  Ordering or commit hashes of previous batches differ.")

+             message = ("Cannot reuse the component because a component in a previous"

+                        " batch has been rebuilt")

+             new_module_build_component.log_message(db_session, message)

              return None

  

      for pkg_name in mmd.get_rpm_component_names():

          pkg = mmd.get_rpm_component(pkg_name)

          if pkg_name not in old_mmd.get_rpm_component_names():

-             log.info("Cannot re-use. Package lists are different.")

+             message = ("Cannot reuse the component because a component was added or "

+                        "removed since the compatible module")

+             new_module_build_component.log_message(db_session, message)

              return None

          if set(pkg.get_arches()) != set(old_mmd.get_rpm_component(pkg_name).get_arches()):

-             log.info("Cannot re-use. Architectures are different for package: %s." % pkg_name)

+             message = ("Cannot reuse the component because the architectures for the package {}"

+                        " have changed since the compatible module build").format(pkg_name)

+             new_module_build_component.log_message(db_session, message)

              return None

  

      reusable_component = db_session.query(models.ComponentBuild).filter_by(

@@ -85,6 +85,14 @@ 

          "url": "/module-build-service/<int:api_version>/import-module/",

          "options": {"methods": ["POST"]},

      },

+     "log_messages_module_build": {

+         "url": "/module-build-service/<int:api_version>/module-builds/<int:id>/messages",

+         "options": {"methods": ["GET"], "defaults": {"model": models.ModuleBuild}},

+     },

+     "log_messages_component_build": {

+         "url": "/module-build-service/<int:api_version>/component-builds/<int:id>/messages",

+         "options": {"methods": ["GET"], "defaults": {"model": models.ComponentBuild}},

+     },

  }

  

  
@@ -307,6 +315,30 @@ 

          return jsonify(json_data), 201

  

  

+ class LogMessageAPI(MethodView):

+ 

+     @validate_api_version()

+     def get(self, api_version, id, model):

+ 

+         if not model:

+             raise ValidationError("Model is not set for this log messages endpoint")

+ 

+         query = model.query.filter_by(id=id).first().log_messages.order_by(

+             models.LogMessage.time_created.desc())

+ 

+         page = request.args.get("page", 1, type=int)

+         per_page = request.args.get("per_page", 10, type=int)

+         p_query = query.paginate(page, per_page, False)

+ 

+         request_args = {"id": id}

+         json_data = {"meta": pagination_metadata(p_query, api_version, request_args)}

+         json_data["messages"] = [

+             getattr(message, "json")() for message in p_query.items

+         ]

+ 

+         return jsonify(json_data), 200

+ 

+ 

  class BaseHandler(object):

      valid_params = set([

          "branch",
@@ -501,6 +533,7 @@ 

      about_view = AboutAPI.as_view("about")

      rebuild_strategies_view = RebuildStrategies.as_view("rebuild_strategies")

      import_module = ImportModuleAPI.as_view("import_module")

+     log_message = LogMessageAPI.as_view("log_messages")

      for key, val in api_routes.items():

          if key.startswith("component_build"):

              app.add_url_rule(val["url"], endpoint=key, view_func=component_view, **val["options"])
@@ -514,6 +547,8 @@ 

              )

          elif key == "import_module":

              app.add_url_rule(val["url"], endpoint=key, view_func=import_module, **val["options"])

+         elif key.startswith("log_message"):

+             app.add_url_rule(val["url"], endpoint=key, view_func=log_message, **val["options"])

          else:

              raise NotImplementedError("Unhandled api key.")

  

@@ -867,7 +867,8 @@ 

              ],

          )

          current_module = module_build_service.models.ModuleBuild.get_by_id(db_session, 3)

-         rv = KojiModuleBuilder._get_filtered_rpms_on_self_dep(current_module, br_filtered_rpms)

+         with patch.object(module_build_service.models.ModuleBuild, 'log_message'):

+             rv = KojiModuleBuilder._get_filtered_rpms_on_self_dep(current_module, br_filtered_rpms)

          assert set(rv) == set(expected)

          session.assert_not_called()

  

@@ -2741,3 +2741,39 @@ 

              'You cannot specify the parameter "reuse_components_from" when the "rebuild_strategy" '

              'parameter is set to "all"'

          )

+ 

+ 

+ class TestLogMessageViews:

+ 

+     def setup_method(self, test_method):

+         self.client = app.test_client()

+         init_data(2)

+         self.module_id = 2

+         self.component_id = 1

+         self.module_build = db.session.query(ModuleBuild).filter_by(id=self.module_id).first()

+         self.module_build.log_message(db.session, "Build-1 msg")

+         self.module_build.log_message(db.session, "Build-2 msg")

+         self.component_build = self.module_build.component_builds[0]

+         self.component_build.log_message(db.session, "Component-1 msg")

+ 

+     def test_view_log_messages_for_module_builds(self):

+         url = "/module-build-service/1/module-builds/{module_id}/messages".format(

+             module_id=self.module_id)

+         res = self.client.get(url)

+ 

+         json_res = str(res.data)

+ 

+         assert "Build-1" in json_res

+         assert "Build-2" in json_res

+         assert "Component-1" in json_res

+ 

+     def test_view_log_messages_for_component_builds(self):

+         url = "/module-build-service/1/component-builds/{component_id}/messages".format(

+             component_id=self.component_id)

+         res = self.client.get(url)

+ 

+         json_res = str(res.data)

+ 

+         assert "Build-1" not in json_res

+         assert "Build-2" not in json_res

+         assert "Component-1" in json_res

The issue is that users don't get feedback from MBS about why a
component was not reused. There was added logic which enables to
store log messages in the database and can be viewed through the
REST api of MBS.

Ticket-ID: #1284

Signed-off-by: Martin Curlej mcurlej@redhat.com

Can we still keep this log here? When someone would like to get some information from log file, this might be useful.

Looks we have more than one names through the code for a module build which can be used to provide reusable components, they are previous module build, reusable module build, and probably the compatible module build. I'm not 100% sure if the last one could be counted in. My thought is if we use one name consistently through the code, logs and documentation, it would be much easier to understand for code reader.

This log only covers the first case in the if. it should be good to log for every case.

Is it proper to use "is" rather than "was"?

There is an additional space between Component and commit. And no need of () here.

50 + new_module_build_component.store_log_message(session, message)

Some logs are logged into module build, and some other logs like this one are logged into a ComponentBuild object. This would cause a UX problem for a user who is getting logs to for a module build. From the point of view of #1284, user must get logs from two endpoints to get full list of logs relative to module and component reuse, and it must be difficult for the user to combine those logs together according to the timeline in order to understand the logs.

In my opinion, every module build should have a full list of logs from the beginning of submit to the end of build process, and whatever the log could be provided via a REST API endpoint or log files. So, when given a module build ID, for example 1, then MBS could return a list of logs or result containing link or links to log files (something like what Koji/Brew does to provide log files in a way).

The logs for a module build could look like these, just an example,

10:20 Submit module build N:S:V:C
...
10:30 Module build N:S:V:C is expanded to builds ...
...
10:50 Found a previous module build N:S:V:C for MSE build .... to reuse
...
11:00 Reuse component A for component  MSE build
11:01 Component B was built but ref is changed, build it this time.
11:02 Component C does not appear in module build reused, build it this time.
...
[other logs through the build process]

In addition, FYI, to implement a logging handler in order to log in a consistent way like others?

[1] https://pagure.io/fm-orchestrator/issue/1284

@cqi, I think the logs are stored in single database table and if you query for ModuleBuild.log_messages, you will get all the logs including those logged using the ComponentBuild.store_log_message method. The log messages table has two IDs - ModuleBuild id and ComponentBuild id. ModuleBuild id is always set - even if you log using the ComponentBuild class.

Getting the complete logs would be useful in general, but for end users, the logs are too verbose to understand. We therefore wanted to cherry-pick some important message to end user and add them to this user-facing log. I'm 100% for a way to get complete logs, because it would save our time to debug issues, but it would not fix the issues people have with getting more information from MBS, because they would get so many log lines that they would not be able to understand it.

For me personally, the name of this method suggests that it only stores the message, but does not really logs it into general MBS logs. I would personally rename it to just log_message().

But let's wait for other opinions, because naming is hard :).

Use pagination here the same way as we do in AbstractQueryableBuildAPI.get - it means include pagination_metadata in the response and use query.paginate he same way as in utils.views.filter_component_builds for example:

    page = flask_request.args.get("page", 1, type=int)
    per_page = flask_request.args.get("per_page", 10, type=int)
    return query.paginate(page, per_page, False)

Also probably order the messages by timestamp to show the newest.

I like it, wrote few comments on things which I think could be improved.

@jkaluza I think I ignored the line of code in ComponentBuild.store_log_message that also associates the log with module build. In this way, it is doable to get all re-use relative logs from endpoint log_messages_module_build. :thumbsup:

@mcurlej What is the point of associating a log message with a Component build?

@cqi The idea was to be able to add messages to both, so users could quickly filter them, if they want to see only messages about a particular component build. When you have a module build, which has 100s of components, its a QoL improvement for the users, to filter out messages for a component.

Optional: It'd be nice to use double quotes throughout the file. I know we don't enforce coding style in the migrations, but it'd be nice to do this for newer migrations.

What does lazy=False do? I can't find it in the docs. Perhaps lazy="joined" is a better option since when accessing the log messages, we likely will always need them all (e.g. returning it in the JSON).

log_message is fine since it's on the ModuleBuild class.

It seems that we already use the convention of naming date time columns with the time_ prefix. How about time_createdfor consistency?

Why is this nullable? This doesn't match the migration script.

Could you move created_at before message since message will sometimes be quite long, and it'll make it harder to read created_at. Alternatively, you could just remove created_at since it's not that useful here.

We use the term compatible in the documentation. How about the following message?
"Cannot reuse any components because a compatible module build of {}:{} was not found".format(module.name, module.stream)

I know the previous log message says re-use, but the correct spelling reuse. Could you fix this in all the logs stored in the database?

Won't this be called for every component in the module? Perhaps there should be a check to verify the log message doesn't already exist before adding another one.

I know the original the log message had two spaces after a period. It'd be nice to make this consistent and just use one space for all log messages we store.

Since this is user-facing, how about a better message such as:

message = "Cannot reuse the component {} because it can't be found in the database".format(component_name)

Since this is user-facing, how about a better message such as:

message = "Cannot reuse the component {} because a previous build of it can't be found in the database".format(component_name)

Shouldn't this be stored on the new component build and not the module itself?

Since this is user-facing, how about a better message such as:

message = "Cannot reuse the component because the commit hash changed since the last build"

Since this is user-facing, how about a better message such as:

message = "Cannot reuse the component because it is being built in a different batch than in the compatible module build"

Since this is user-facing, how about a better message such as:

message = "Cannot reuse the component because the modulemd's macros are different than those of the compatible module build"

Since this is user-facing, how about a better message such as:

message = "Cannot reuse the component because a component in a previous batch has been rebuilt"

Since this is user-facing, how about a better message such as:

message = "Cannot reuse the component because a component was added or removed since the compatible module"

Since this is user-facing, how about a better message such as:

message = "Cannot reuse the component because the architectures for the package {} have changed since the compatible module build".format(pkg_name)

Why a separate API endpoint instead of adding it to the verbose JSON output for the module build or component build? What do you think @jkaluza?

What if model.query.filter_by(id=id).first() is None?

Why a separate API endpoint instead of adding it to the verbose JSON output for the module build or component build? What do you think @jkaluza?

I think I see your point here. It might be easier for the rpkg (or any other client app) to just query single API endpoint and get both log messages and module build or component related log messages. I like this idea, but I'm not sure how to do pagination there, because you would return just single object (one ModuleBuild or one ComponentBuild), but within that object, we want to paginate the log messages.

^ Because of that, I think I still prefer original endpoint, but if you have some nice way in mind, share it please :).

@mprahl @jkaluza In my mind, its better to separate utility metadata from the actual metadata of the object you are requesting. As log messages will be necessary mostly only during debuging, not all the time. Also there could be situation, that you would display a lot of data on request, which is not really necessary for the user. So if you have a module and has 100 components, which everyone will log at least 1 message. Thats a lot of data. I know that right know we don't care about bandwith, but its a good practice to have your JSON responses, concise and small as possible so when your service becomes popular it will not kill your env.

@mprahl lazy=False is the same as lazy="joined". Used False, as it is already used in the code like that, before my change.

@cqi @mprahl @jkaluza wrt the if covering only the first part, ehm i just changed the logging part. The message and if is unchanged from previous implementation. Should i rewrite it into several ifs with different messages?

rebased onto e1bb084b5d4723ecbf63aad5e3bd6ebf02a10975

4 years ago

@cqi @mprahl @jkaluza Please review again, the code is updated.

Build #249 failed (commit: e1bb084b5d4723ecbf63aad5e3bd6ebf02a10975).
Rebase or make new commits to rebuild.

rebased onto b810fc08bc4d4958b96fc37179e403a81673c3eb

4 years ago

I was thinking about module_build_id and component_build_id in the REST API response.

With the proposed REST API, I see no point in sending those values, because to get the list of messages, you need to know the module_build_id or component_build_id to construct the REST API URL. i think it's waste of resources to repeat component_build_id and module_build_id in every message sent. On the other side, showing component_build_id could add additional context to messages sent in /module-builds/$id/messages...

@mprahl, any opinions?

@jkaluza I think it's a good point. In my opinion, it's not useful to have module_build_id, but it might be nice to expand component_build_id (rename it to component_build) to be the component's JSON or a subset of it. The reason is that as a user, I'd like to know the NVR of the component this message is about.

What do you think about that?

Optional: What do you think about adding a keyword argument of component_build_id=None to ModuleBuild.log_message, and this method could then call self.module_build.log_message(session, message, self.id)? It would reduce some of the code duplication.

Optional: You don't need to assign this to a variable. You could simply return the dictionary as it is.

Here, the issue is that the new component isn't in the database for some reason. This should never happen, but the log message does need to be adjusted.

@mprahl wrt retval, did as it is done in other models.

rebased onto 72d45bcf52a81b2eccfe733977dae10fc4cb240a

4 years ago

pretty please pagure-ci rebuild

4 years ago

pretty please pagure-ci rebuild

4 years ago

Jenkins CI timeouts, but tests are passing for me locally. I'm therefore merging this PR.

@mcurlej, actually, you need to rebase this.

rebased onto 9ba22eee8bd9a6624f409584af6519a74b4dd9aa

4 years ago

Build #302 failed (commit: 9ba22eee8bd9a6624f409584af6519a74b4dd9aa).
Rebase or make new commits to rebuild.

@mcurlej, tests are failing after the rebase now.

rebased onto c5d484f

4 years ago

Commit 8dd65a7 fixes this pull-request

Pull-Request has been merged by jkaluza

4 years ago

Pull-Request has been merged by jkaluza

4 years ago