#1269 Add state_reason on GW failure
Merged 4 years ago by mprahl. Opened 4 years ago by vmaljulin.
vmaljulin/fm-orchestrator FACTORY-4010  into  master

@@ -48,6 +48,7 @@ 

  import logging

  import os

  import time

+ from datetime import datetime

  

  logging.basicConfig(level=logging.DEBUG)

  
@@ -137,7 +138,12 @@ 

      if not build.scratch:

          if greenwave is None or greenwave.check_gating(build):

              build.transition(config, state="ready")

-             session.commit()

+         else:

+             build.state_reason = "Gating failed"

Can you add a check for this value in test_submit_build_no_components?

+             if greenwave.error_occurred:

+                 build.state_reason += " (Error occured while querying Greenwave)"

+             build.time_modified = datetime.utcnow()

+         session.commit()

  

      build_logs.stop(build)

      module_build_service.builder.GenericBuilder.clear_cache(build)

@@ -488,4 +488,11 @@ 

          for build in module_builds:

              if greenwave.check_gating(build):

                  build.transition(config, state=models.BUILD_STATES["ready"])

-                 session.commit()

+             else:

+                 build.state_reason = "Gating failed (MBS will retry in {0} seconds)".format(

+                     conf.polling_interval

+                 )

+                 if greenwave.error_occurred:

+                     build.state_reason += " (Error occured while querying Greenwave)"

+                 build.time_modified = datetime.utcnow()

+             session.commit()

@@ -37,9 +37,10 @@ 

          self.url = conf.greenwave_url

          self._decision_context = conf.greenwave_decision_context

          if not self.decision_context:

-             raise GreenwaveError("No Greenwave decision context set")

+             raise RuntimeError("No Greenwave decision context set")

          self._subj_type = conf.greenwave_subject_type

          self._gw_timeout = conf.greenwave_timeout

+         self.error_occurred = False

  

      def _greenwave_query(self, query_type, payload=None):

          """
@@ -63,7 +64,7 @@ 

          except requests.exceptions.Timeout:

              raise GreenwaveError("Greenwave request timed out")

          except Exception as exc:

-             error_message = "Unspecified greenwave request error" \

+             error_message = "Unspecified greenwave request error " \

                              '(original exception was: "{0}")'.format(str(exc))

              log.exception(error_message)

              raise GreenwaveError(error_message)
@@ -156,10 +157,12 @@ 

          :return: True if at least one GW response contains policies_satisfied set to true

          :rtype: bool

          """

+         self.error_occurred = False

          try:

              versions = self.get_product_versions()

          except GreenwaveError:

              log.warning('An error occured while getting a product versions')

+             self.error_occurred = True

              return False

  

          for ver in versions:
@@ -168,6 +171,7 @@ 

                      # at least one positive result is enough

                      return True

              except (KeyError, GreenwaveError) as exc:

+                 self.error_occurred = True

                  log.warning('Incorrect greenwave result "%s", ignoring', str(exc))

  

          return False
@@ -180,7 +184,7 @@ 

      def url(self, value):

          value = value.rstrip("/")

          if not value:

-             raise GreenwaveError("No Greenwave URL set")

+             raise RuntimeError("No Greenwave URL set")

          self._url = value

  

      @property
@@ -202,6 +206,6 @@ 

  

  try:

      greenwave = Greenwave()

- except GreenwaveError:

+ except RuntimeError:

      log.warning('Greenwave is not configured or configured improperly')

      greenwave = None

@@ -528,6 +528,7 @@ 

              assert module_build.state == models.BUILD_STATES["ready"]

          else:

              assert module_build.state == models.BUILD_STATES["done"]

+             assert module_build.state_reason == "Gating failed"

  

      @patch(

          "module_build_service.config.Config.check_for_eol",

@@ -18,6 +18,7 @@ 

  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

  # SOFTWARE.

  

+ import re

  import pytest

  from mock import patch

  from module_build_service import models, conf
@@ -666,4 +667,8 @@ 

              assert set([m.id for m in module]) == {1, 2}

          else:

              assert len(module) == 1

-             assert set([m.id for m in module]) == {1}

+             assert module[0].id == 1

+             module = models.ModuleBuild.query.filter_by(state=models.BUILD_STATES["done"]).all()

+             assert len(module) == 1

+             assert module[0].id == 2

+             assert re.match("Gating failed.*", module[0].state_reason)

You need to call session.commit() after the build.state_reason change.

This is a misleading reason. How about "The Greenwave gating decision failed"? It'd be even better if you let them know the decision context, and etc as well.

This is a misleading reason. How about "The Greenwave gating decision failed"? It'd be even better if you let them know the decision context, and etc as well.

This error may happen not only if greenwave returned False, but also when the connection failed or any other error occurred. So it's universal. User has to check logs to see what happened.

rebased onto 078c27fb4c8881af1348fc1550f41d807b1c9846

4 years ago

You need to call session.commit() after the build.state_reason change.

done

This is a misleading reason. How about "The Greenwave gating decision failed"? It'd be even better if you let them know the decision context, and etc as well.

This error may happen not only if greenwave returned False, but also when the connection failed or any other error occurred. So it's universal. User has to check logs to see what happened.

How can the user check the logs?

So, what kind of message you suggest to describe both Greenwave negative response and Greenwave connection/other errors?

@vmaljulin it'd be ideal if you had a separate message for when it is a Greenwave connection error versus the gating failing. Since we can assume that Greenwave will be available most of the time, it's probably safe to just assume the module didn't pass gating.

rebased onto 8e73bc88ce26302dfada9e1b0737a6b15a371aa3

4 years ago

rebased onto cd095eb4cca1f52480436244fe6a8738f22aa2aa

4 years ago

Greenwave should return the reason why the decision failed. Can we include that in the error message to the user?

rebased onto 9e55beffae48d4a441ab2f981dbccb056f135613

4 years ago

Build 9e55beffae48d4a441ab2f981dbccb056f135613 FAILED!
Rebase or make new commits to rebuild.

rebased onto ea4a7a58566602dec31a99556c05520907bafd63

4 years ago

Build ea4a7a58566602dec31a99556c05520907bafd63 FAILED!
Rebase or make new commits to rebuild.

Build ea4a7a58566602dec31a99556c05520907bafd63 FAILED!
Rebase or make new commits to rebuild.

Build ea4a7a58566602dec31a99556c05520907bafd63 FAILED!
Rebase or make new commits to rebuild.

rebased onto 0fdf7d2618049b167cfa38a6bd47f1df883f6dcc

4 years ago

rebased onto 4c770ec2e4f6ad4a88ef8ebb73d5c55a82dc9f54

4 years ago

This seems like an unusual pattern. If we want to store the errors, how about we stop using the greenwave global instance, and instantiate a Greenwave object for every decision request instead? Otherwise, the Greenwave errors just keep getting appended in the greenwave_errors list.

Now I see why decision_summary is a list. I'm wondering if it's valuable to the user that you are providing the summary for every product version, since many are not applicable to the user. I'm also not sure if each summary is distinguishable from the other, which would make the state reason confusing to the user.

Since @lucarval requested this extra information, I'll defer to him on this.

Since greenwave is a single instance of the Greenwave class, wouldn't the errors carry over in the state reason from the previous build that was checked? This seems like even more reason to just instantiate a Greenwave object when needed and just use a single one across the application.

What is a scenario that would cause there to be more than a single error?

1 new commit added

  • fixup! Add state_reason on GW failure
4 years ago

@mprahl Ok I removed errors information from the message (now it will only set a status that there were some errors).

querying a Greenwave => querying Greenwave

It'd be nice to add something like "MBS will try again in 10 minutes" to let the user know they can just wait for MBS to try again. The 10 minutes part of the text would be derived from conf.POLLING_INTERVAL.

Could you also please update build.time_modified here?

Now I see why decision_summary is a list. I'm wondering if it's valuable to the user that you are providing the summary for every product version, since many are not applicable to the user. I'm also not sure if each summary is distinguishable from the other, which would make the state reason confusing to the user.
Since @lucarval requested this extra information, I'll defer to him on this.

This is still an open question, so @lucarval, could you please take a look when you can?

Now I see why decision_summary is a list. I'm wondering if it's valuable to the user that you are providing the summary for every product version, since many are not applicable to the user. I'm also not sure if each summary is distinguishable from the other, which would make the state reason confusing to the user.
Since @lucarval requested this extra information, I'll defer to him on this.

This is still an open question, so @lucarval, could you please take a look when you can?

Using decision['summary'] addresses my comment. All good.

Now I see why decision_summary is a list. I'm wondering if it's valuable to the user that you are providing the summary for every product version, since many are not applicable to the user. I'm also not sure if each summary is distinguishable from the other, which would make the state reason confusing to the user.
Since @lucarval requested this extra information, I'll defer to him on this.
This is still an open question, so @lucarval, could you please take a look when you can?

Using decision['summary'] addresses my comment. All good.

The issue is that MBS brute-forces the product version by trying every product version that there is a policy for. My concern is that using decision['summary'] for every failure will be misleading to the user as there will be multiple summaries.

Now I see why decision_summary is a list. I'm wondering if it's valuable to the user that you are providing the summary for every product version, since many are not applicable to the user. I'm also not sure if each summary is distinguishable from the other, which would make the state reason confusing to the user.
Since @lucarval requested this extra information, I'll defer to him on this.
This is still an open question, so @lucarval, could you please take a look when you can?
Using decision['summary'] addresses my comment. All good.

The issue is that MBS brute-forces the product version by trying every product version that there is a policy for. My concern is that using decision['summary'] for every failure will be misleading to the user as there will be multiple summaries.

Hmm I see your point. The decision summary doesn't give you much context of what's missing ("All required tests passed", "2 of 15 required tests failed", etc). Concatenating those together will make even less sense. Alright, I'm happy with simply saying "Greenwave policy not satisfied" for now. We may have to provide additional info to users at some point somehow, but maybe that can be a future enhancement?

Now I see why decision_summary is a list. I'm wondering if it's valuable to the user that you are providing the summary for every product version, since many are not applicable to the user. I'm also not sure if each summary is distinguishable from the other, which would make the state reason confusing to the user.
Since @lucarval requested this extra information, I'll defer to him on this.
This is still an open question, so @lucarval, could you please take a look when you can?
Using decision['summary'] addresses my comment. All good.
The issue is that MBS brute-forces the product version by trying every product version that there is a policy for. My concern is that using decision['summary'] for every failure will be misleading to the user as there will be multiple summaries.

Hmm I see your point. The decision summary doesn't give you much context of what's missing ("All required tests passed", "2 of 15 required tests failed", etc). Concatenating those together will make even less sense. Alright, I'm happy with simply saying "Greenwave policy not satisfied" for now. We may have to provide additional info to users at some point somehow, but maybe that can be a future enhancement?

This works for me. In the future, we could also try to guess the product version. Estuary currently does this.

1 new commit added

  • fixup! Add state_reason on GW failure
4 years ago

3 new commits added

  • fixup! Add state_reason on GW failure
  • fixup! Add state_reason on GW failure
  • Add state_reason on GW failure
4 years ago

Comments were addressed

@vmaljulin this change looks good, but the "Pythonic" way would be to raise an exception instead of setting a boolean property on the class. Then the code using the Greenwave object would catch the exception and modify the state_reason based on that. I'll leave it up to you if you'd like to pursue that approach.

If you don't pursue that approach, could you please rename greenwave_errors? Because it is plural, it implies a list. Perhaps it could be named something like error_occurred.

rebased onto 2376e3fbb3e215d572a9daf7e82b56605e785bd5

4 years ago

@vmaljulin is this ready for review again?

@vmaljulin this ready for review again?

Yes

Please indent this since build.transition already updates this value for us.

This is misspelled. Please change it to error_occurred (it's missing an r).

Is there a unit test that tests this else block?

Is there a unit test that tests this else block?

@vmaljulin I'm done reviewing. I'm sorry I missed some of these in my last review.

Yes, there's a @pytest.mark.parametrize("greenwave_result", [True, False]) in a test which tests this condition.

rebased onto 79358d1ade19d3a37f8446beb88128679d537732

4 years ago

Can you add a check for this value in test_submit_build_no_components?

@vmaljulin you missed one of my comments. I clarified my request for adding to the unit tests. Once that is addressed this is good to merge.

rebased onto 92cdcd240e30b93771a9d32c1fe526dc7ae64155

4 years ago

Can you add a check for this value in test_submit_build_no_components?

Done

rebased onto f0dc0b8

4 years ago

Pull-Request has been merged by mprahl

4 years ago