#1240 Resolve the commit in the SCM.commit property method instead of the constructor
Merged 4 years ago by mprahl. Opened 4 years ago by mprahl.

file modified
+57 -111
@@ -51,12 +51,16 @@ 

      types = scm_url_schemes()

  

      def __init__(self, url, branch=None, allowed_scm=None, allow_local=False):

-         """Initialize the SCM object using the specified scmurl.

+         """

+         Initialize the SCM object using the specified SCM URL.

  

          If url is not in the list of allowed_scm, an error will be raised.

  

          :param str url: The unmodified scmurl

-         :param list allowed_scm: The list of allowed SCMs, optional

+         :param str branch: The optional source control branch. This defaults to "master" when git

+             is used.

+         :param list allowed_scm: The optional list of allowed SCM URL prefixes

+         :param bool allow_local: Allow SCM URLs that start with "file://"

          :raises: Forbidden or ValidationError

          """

  
@@ -94,8 +98,6 @@ 

                  self.name = self.name[:-4]

              self.commit = match.group("commit")

              self.branch = branch if branch else "master"

-             if not self.commit:

-                 self.commit = self.get_latest(self.branch)

              self.version = None

          else:

              raise ValidationError("Unhandled SCM scheme: %s" % self.scheme)
@@ -167,30 +169,24 @@ 

          if self.scheme == "git":

              self.sourcedir = "%s/%s" % (scmdir, self.name)

  

-             module_clone_cmd = ["git", "clone", "-q"]

-             if self.commit:

-                 module_clone_cmd.append("--no-checkout")

-                 module_checkout_cmd = ["git", "checkout", "-q", self.commit]

-             else:

-                 module_clone_cmd.extend(["--depth", "1"])

-             module_clone_cmd.extend([self.repository, self.sourcedir])

- 

+             module_clone_cmd = [

+                 "git", "clone", "-q", "--no-checkout", self.repository, self.sourcedir

+             ]

+             module_checkout_cmd = ["git", "checkout", "-q", self.commit]

              # perform checkouts

              SCM._run(module_clone_cmd, chdir=scmdir)

-             if self.commit:

-                 try:

-                     SCM._run(module_checkout_cmd, chdir=self.sourcedir)

-                 except RuntimeError as e:

-                     if (

-                         e.message.endswith(' did not match any file(s) known to git.\\n"')

-                         or "fatal: reference is not a tree: " in e.message

-                     ):

-                         raise UnprocessableEntity(

-                             "checkout: The requested commit hash was not found "

-                             "within the repository. Perhaps you forgot to push. "

-                             "The original message was: %s" % e.message

-                         )

-                     raise

+             try:

+                 SCM._run(module_checkout_cmd, chdir=self.sourcedir)

+             except RuntimeError as e:

+                 if (

+                     e.message.endswith(' did not match any file(s) known to git.\\n"')

+                     or "fatal: reference is not a tree: " in e.message

+                 ):

+                     raise UnprocessableEntity(

+                         "checkout: The requested commit hash was not found within the repository. "

+                         "Perhaps you forgot to push. The original message was: %s" % e.message

+                     )

+                 raise

  

              timestamp = SCM._run(["git", "show", "-s", "--format=%ct"], chdir=self.sourcedir)[1]

              dt = datetime.datetime.utcfromtimestamp(int(timestamp))
@@ -199,28 +195,28 @@ 

              raise RuntimeError("checkout: Unhandled SCM scheme.")

          return self.sourcedir

  

-     def get_latest(self, ref="master"):

+     def get_latest(self, ref=None):

          """ Get the latest commit hash based on the provided git ref

  

-         :param ref: a string of a git ref (either a branch or commit hash)

+         :param ref: a string of a git ref (either a branch or commit hash). This defaults to

+             self.branch.

          :returns: a string of the latest commit hash

          :raises: RuntimeError

          """

          if ref is None:

-             ref = "master"

+             ref = self.branch

+ 

          if self.scheme == "git":

              log.debug("Getting/verifying commit hash for %s" % self.repository)

              try:

-                 # This will fail if `ref` is not a branch name, but for example commit hash.

-                 # It is valid use-case to use commit hashes in modules instead of branch names

-                 # and in this case, there is no other way to get the full commit hash then

-                 # fallbac to `get_full_commit_hash`. We do not want to retry here, because

-                 # in case module contains only commit hashes, it would block for very long

-                 # time.

-                 _, output, _ = SCM._run_without_retry(

-                     ["git", "ls-remote", "--exit-code", self.repository, "refs/heads/" + ref]

-                 )

+                 # This will fail if `ref` is not a branch name, but this works for commit hashes.

+                 # If the ref is not a branch, then fallback to `get_full_commit_hash`. We do not

+                 # want to retry here, since if it's a commit, it would block for a very long time.

+                 cmd = ["git", "ls-remote", "--exit-code", self.repository, "refs/heads/" + ref]

+                 log.debug("Checking to see if the ref %s is a branch with `%s`", ref, " ".join(cmd))

+                 _, output, _ = SCM._run_without_retry(cmd)

              except UnprocessableEntity:

+                 log.debug("The ref %s is not a branch. Checking to see if it's a commit hash", ref)

                  # The call below will either return the commit hash as is (if a full one was

                  # provided) or the full commit hash (if a short hash was provided). If ref is not

                  # a commit hash, then this will raise an exception.
@@ -242,18 +238,33 @@ 

          """

          if commit_hash:

              commit_to_check = commit_hash

-         elif self.commit:

-             commit_to_check = self.commit

+         elif self._commit:

+             commit_to_check = self._commit

          else:

-             raise RuntimeError('No commit hash was specified for "{0}"'.format(self.url))

+             try:

+                 # If self._commit was None, then calling `self.commit` will resolve the ref based

+                 # on the branch

+                 return self.commit

+             except UnprocessableEntity:

+                 # If there was an exception resolving the ref based on the branch (could be the

+                 # default branch that doesn't exist), then there is not enough information to get

+                 # the commit hash

+                 raise RuntimeError('No commit hash was specified for "{0}"'.format(self.url))

  

          if self.scheme == "git":

-             log.debug('Getting the full commit hash for "{0}"'.format(self.repository))

+             log.debug(

+                 "Getting the full commit hash on %s from %s", self.repository, commit_to_check)

              td = None

              try:

                  td = tempfile.mkdtemp()

                  SCM._run(["git", "clone", "-q", self.repository, td, "--bare"])

-                 output = SCM._run(["git", "rev-parse", commit_to_check], chdir=td)[1]

+                 cmd = ["git", "rev-parse", commit_to_check]

+                 log.debug(

+                     "Running `%s` to get the full commit hash for %s",

+                     " ".join(cmd),

+                     commit_to_check

+                 )

+                 output = SCM._run(cmd, chdir=td)[1]

              finally:

                  if td and os.path.exists(td):

                      shutil.rmtree(td)
@@ -304,79 +315,14 @@ 

          else:

              raise RuntimeError("is_full_commit_hash: Unhandled SCM scheme.")

  

-     def is_available(self, strict=False):

-         """Check whether the scmurl is available for checkout.

- 

-         :param bool strict: When True, raise expection on error instead of

-                             returning False.

-         :returns: bool -- the scmurl is available for checkout

-         """

-         td = None

-         try:

-             td = tempfile.mkdtemp()

-             self.checkout(td)

-             return True

-         except Exception:

-             if strict:

-                 raise

-             return False

-         finally:

-             try:

-                 if td is not None:

-                     shutil.rmtree(td)

-             except Exception as e:

-                 log.warning("Failed to remove temporary directory {!r}: {}".format(td, str(e)))

- 

-     @property

-     def url(self):

-         """The original scmurl."""

-         return self._url

- 

-     @url.setter

-     def url(self, s):

-         self._url = str(s)

- 

-     @property

-     def scheme(self):

-         """The SCM scheme."""

-         return self._scheme

- 

-     @scheme.setter

-     def scheme(self, s):

-         self._scheme = str(s)

- 

-     @property

-     def sourcedir(self):

-         """The SCM source directory."""

-         return self._sourcedir

- 

-     @sourcedir.setter

-     def sourcedir(self, s):

-         self._sourcedir = str(s)

- 

-     @property

-     def repository(self):

-         """The repository part of the scmurl."""

-         return self._repository

- 

-     @repository.setter

-     def repository(self, s):

-         self._repository = str(s)

- 

      @property

      def commit(self):

          """The commit ID, for example the git hash, or None."""

+         if not self._commit:

+             self._commit = self.get_latest(self.branch)

+ 

          return self._commit

  

      @commit.setter

      def commit(self, s):

          self._commit = str(s) if s else None

- 

-     @property

-     def name(self):

-         """The module name."""

-         return self._name

- 

-     @name.setter

-     def name(self, s):

-         self._name = str(s)

@@ -105,7 +105,10 @@ 

          # we want to pull from, we need to resolve that f25 branch

          # to the specific commit available at the time of

          # submission (now).

-         pkgref = module_build_service.scm.SCM(pkg.get_repository()).get_latest(pkg.get_ref())

+         repo = pkg.get_repository()

+         ref = pkg.get_ref()

+         log.debug("Getting the commit hash for the ref %s on the repo %s", ref, repo)

+         pkgref = module_build_service.scm.SCM(repo).get_latest(ref)

      except Exception as e:

          log.exception(e)

          return {
@@ -142,18 +145,13 @@ 

      # If module build was submitted via yaml file, there is no scmurl

      if scmurl:

          scm = SCM(scmurl)

-         # If a commit hash is provided, add that information to the modulemd

-         if scm.commit:

-             # We want to make sure we have the full commit hash for consistency

-             if SCM.is_full_commit_hash(scm.scheme, scm.commit):

-                 full_scm_hash = scm.commit

-             else:

-                 full_scm_hash = scm.get_full_commit_hash()

- 

-             xmd["mbs"]["commit"] = full_scm_hash

-         # If a commit hash wasn't provided then just get the latest from master

+         # We want to make sure we have the full commit hash for consistency

+         if SCM.is_full_commit_hash(scm.scheme, scm.commit):

+             full_scm_hash = scm.commit

          else:

-             xmd["mbs"]["commit"] = scm.get_latest()

+             full_scm_hash = scm.get_full_commit_hash()

+ 

+         xmd["mbs"]["commit"] = full_scm_hash

  

      if mmd.get_rpm_components() or mmd.get_module_components():

          if "rpms" not in xmd["mbs"]:

@@ -28,6 +28,7 @@ 

  from shutil import copyfile

  from datetime import datetime, timedelta

  from random import randint

+ import hashlib

  from module_build_service.utils import to_text_type

  

  import module_build_service.messaging
@@ -73,6 +74,8 @@ 

          self.mocked_scm.return_value.repository_root = "https://src.stg.fedoraproject.org/modules/"

          self.mocked_scm.return_value.sourcedir = self.sourcedir

          self.mocked_scm.return_value.get_module_yaml = self.get_module_yaml

+         self.mocked_scm.return_value.is_full_commit_hash.return_value = commit and len(commit) == 40

+         self.mocked_scm.return_value.get_full_commit_hash.return_value = self.get_full_commit_hash

  

      def checkout(self, temp_dir):

          self.sourcedir = path.join(temp_dir, self.name)
@@ -89,6 +92,12 @@ 

      def get_module_yaml(self):

          return path.join(self.sourcedir, self.name + ".yaml")

  

+     def get_full_commit_hash(self, commit_hash=None):

+         if not commit_hash:

+             commit_hash = self.commit

+         sha1_hash = hashlib.sha1("random").hexdigest()

+         return commit_hash + sha1_hash[len(commit_hash):]

+ 

  

  class FakeModuleBuilder(GenericBuilder):

      """

file modified
+4 -1
@@ -93,7 +93,10 @@ 

  

      def test_verify_unknown_branch(self):

          with pytest.raises(UnprocessableEntity):

-             module_build_service.scm.SCM(repo_url, "unknown")

+             scm = module_build_service.scm.SCM(repo_url, "unknown")

+             # Accessing the commit property will cause the commit to be resolved, causing an

+             # exception

+             scm.commit

  

      def test_verify_commit_in_branch(self):

          target = "7035bd33614972ac66559ac1fdd019ff6027ad21"

@@ -20,6 +20,7 @@ 

  

  import io

  import tempfile

+ import hashlib

  from os import path, mkdir

  from shutil import copyfile, rmtree

  from datetime import datetime
@@ -67,6 +68,8 @@ 

          self.mocked_scm.return_value.repository_root = "https://src.stg.fedoraproject.org/modules/"

          self.mocked_scm.return_value.sourcedir = self.sourcedir

          self.mocked_scm.return_value.get_module_yaml = self.get_module_yaml

+         self.mocked_scm.return_value.is_full_commit_hash.return_value = commit and len(commit) == 40

+         self.mocked_scm.return_value.get_full_commit_hash.return_value = self.get_full_commit_hash

  

      def checkout(self, temp_dir):

          self.sourcedir = path.join(temp_dir, self.name)
@@ -83,6 +86,12 @@ 

      def get_module_yaml(self):

          return path.join(self.sourcedir, self.name + ".yaml")

  

+     def get_full_commit_hash(self, commit_hash=None):

+         if not commit_hash:

+             commit_hash = self.commit

+         sha1_hash = hashlib.sha1("random").hexdigest()

+         return commit_hash + sha1_hash[len(commit_hash):]

+ 

  

  class TestUtilsComponentReuse:

      def setup_method(self, test_method):

@@ -102,6 +102,8 @@ 

          self.mocked_scm.return_value.branch = branch

          self.mocked_scm.return_value.sourcedir = self.sourcedir

          self.mocked_scm.return_value.get_module_yaml = self.get_module_yaml

+         self.mocked_scm.return_value.is_full_commit_hash.return_value = commit and len(commit) == 40

+         self.mocked_scm.return_value.get_full_commit_hash.return_value = self.get_full_commit_hash

  

      def checkout(self, temp_dir):

          try:
@@ -124,6 +126,12 @@ 

      def get_module_yaml(self):

          return path.join(self.sourcedir, self.name + ".yaml")

  

+     def get_full_commit_hash(self, commit_hash=None):

+         if not commit_hash:

+             commit_hash = self.commit

+         sha1_hash = hashlib.sha1("random").hexdigest()

+         return commit_hash + sha1_hash[len(commit_hash):]

+ 

  

  class TestViews:

      def setup_method(self, test_method):

This should fix #1224

This also includes some additional cleanup in the SCM class.

2 new commits added

  • Simplify format_mmd since scm.commit is always set or returns an exception
  • Add an efficiency improvement to SCM.get_full_commit_hash
4 years ago

Thanks for the review @bstinson. I added a couple more commits that simplify some of the existing code.

@jkaluza could you review this PR? It's much easier to review commit by commit.

rebased onto d10fb44ce91f1f151b5172de71361203d224ed68

4 years ago

rebased onto 3c4faf0

4 years ago

Pull-Request has been merged by mprahl

4 years ago

Build 37e4c71 FAILED!
Rebase or make new commits to rebuild.