#31 Work on the logic to generate the changelog
Merged 4 years ago by nphilipp. Opened 4 years ago by pingou.
fedora-infra/ pingou/rpmautospec changelog_work  into  master

@@ -86,7 +86,9 @@ 

                  print(line, file=tmp_specfile, end="")

  

              if has_autochangelog:

-                 print("\n".join(produce_changelog(srcdir)), file=tmp_specfile)

+                 print("\n".join(produce_changelog(srcdir, latest_rel=new_rel)), file=tmp_specfile)

+ 

+             tmp_specfile.flush()

  

              # ...and copy it back (potentially across device boundaries)

              shutil.copy2(tmp_specfile.name, specfile_name)

file modified
+202 -69
@@ -1,14 +1,15 @@ 

  #!/usr/bin/python3

- import collections

  import datetime

  import logging

  import os

+ import re

  import shutil

  import subprocess

  import tempfile

  import textwrap

+ import typing

  

- import pygit2

+ from .py2compat.escape_tags import unescape_tag

  

  _log = logging.getLogger(__name__)

  
@@ -25,7 +26,7 @@ 

      return subcmd_name

  

  

- def run_command(command, cwd=None):

+ def run_command(command: list, cwd: typing.Optional[str] = None) -> bytes:

      """ Run the specified command in a specific working directory if one

      is specified.

      """
@@ -36,91 +37,223 @@ 

          _log.error("Command `{}` return code: `{}`".format(" ".join(command), e.returncode))

          _log.error("stdout:\n-------\n{}".format(e.stdout))

          _log.error("stderr:\n-------\n{}".format(e.stderr))

-         raise Exception("Command failed to run")

+         raise

  

      return output

  

  

- def produce_changelog(repopath):

+ def git_get_log(

+     path: str,

+     log_options: typing.Optional[typing.List[str]] = None,

+     toref: typing.Optional[str] = None,

+     target: typing.Optional[str] = None,

+ ) -> typing.List[str]:

+     """ Returns the list of the commit logs for the repo in ``path`` .

+ 

+     This method runs the system's `git log --pretty=oneline --abbrev-commit`

+     command.

+ 

+     This command returns git log as follow:

+     <short hash> <subject of the commit message>

+     <short hash2> <subject of the commit message>

+     <short hash3> <subject of the commit message>

+     ...

+ 

+     :kwarg log_options: options to pass to git log

+     :kwarg toref: a reference/commit to use when generating the log

+     :kwarg target: the target of the git log command, can be a ref, a

+         file or nothing

+ 

+     """

+     cmd = ["git", "log", "--pretty=oneline", "--abbrev-commit", "--no-decorate"]

+     if log_options:

+         cmd.extend(log_options)

+     if toref:

+         cmd.append(f"{toref}..")

+     if target:

+         cmd.extend(["--", target])

+ 

+     _log.debug(f"git_get_log {' '.join(cmd)}")

+     return run_command(cmd, cwd=path).decode("UTF-8").strip().split("\n")

+ 

+ 

+ def git_get_commit_info(path: str, commithash: str) -> typing.List[str]:

+     """This function calls `git show --no-patch --format="%P %ct"` on the

+     specified commit and returns the output from git

+     """

+     cmd = ["git", "show", "--no-patch", "--format=%P|%H|%ct|%aN <%aE>|%s", commithash]

+     _log.debug(f"git_get_commit_info {' '.join(cmd)}")

+     return run_command(cmd, cwd=path).decode("UTF-8").strip().split("\n")

+ 

+ 

+ def git_get_changed_files(path: str, commithash: str) -> typing.List[str]:

+     """ Returns the list of files changed in the specified commit. """

+     cmd = ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", commithash]

+     _log.debug(f"git_get_changed_files {' '.join(cmd)}")

+     return run_command(cmd, cwd=path).decode("UTF-8").strip().split("\n")

+ 

+ 

+ def git_get_tags(path: str) -> typing.Mapping[str, str]:

+     """ Returns a dict containing for each commit tagged the corresponding tag. """

+     cmd = ["git", "show-ref", "--tags"]

+     _log.debug(f"git_get_tags {' '.join(cmd)}")

+     tags_list = run_command(cmd, cwd=path).decode("UTF-8").strip().split("\n")

+ 

+     output = {}

+     for row in tags_list:

+         commit, name = row.split(" ", 1)

+         # we're only interested in the build/* tags

+         if name.startswith("refs/tags/build/"):

+             name = name.replace("refs/tags/build/", "")

+             output[commit] = unescape_tag(name)

+ 

+     return output

+ 

+ 

+ def nevrd_to_evr(nevrd: str) -> str:

+     """ Converts a name:epoch-version-release.dist_tag to epoch_version_release

+     so it can be inserted in the changelog.

+ 

+     If the nevrd provided does not have at least 2 "-" in it, otherwise

+     it will be just be cleaned for any potential dist_tag.

+     """

+     if nevrd.count("-") >= 2:

+         version, release = nevrd.rsplit("-", 2)[1:]

+         # Append a "-" to the version to make it easier to concatenate later

+         version += "-"

+     else:

+         version = ""

+         release = nevrd

+     release = re.sub(r"\.fc\d+", "", release)

+     release = re.sub(r"\.el\d+", "", release)

+     return f"{version}{release}"

+ 

+ 

+ def get_rpm_current_version(path: str, name: str) -> str:

+     """ Retrieve the current version set in the spec file named ``name``.spec

+     at the given path.

+     """

+     output = None

+     try:

+         output = (

+             run_command(["rpm", "--qf", "%{version}\n", "--specfile", f"{name}.spec"], cwd=path,)

+             .decode("UTF-8")

+             .strip()

+         )

+     except Exception:

+         pass

+     return output

+ 

+ 

+ def produce_changelog(repopath, latest_rel=None):

      name = os.path.basename(repopath)

-     with tempfile.TemporaryDirectory() as workdir:

+     with tempfile.TemporaryDirectory(prefix="rpmautospec-") as workdir:

          repocopy = f"{workdir}/{name}"

          shutil.copytree(repopath, repocopy)

+         _log.debug(f"Working directory: {repocopy}")

          lines = []

-         repo_obj = pygit2.Repository(repocopy)

  

-         branch = repo_obj.lookup_branch(repo_obj.head.shorthand)

-         commit = branch.peel(pygit2.Commit)

-         data = collections.defaultdict(list)

-         for commit in repo_obj.walk(commit.hex, pygit2.GIT_SORT_TIME):

-             if len(commit.parents) > 1:

+         # Get all the tags in the repo

+         tags = git_get_tags(repocopy)

+ 

+         # Get the lastest commit in the repo

+         head = git_get_log(repocopy, log_options=["-1"])[0]

+         head_hash = head.split(" ", 1)[0]

+         head_info = git_get_commit_info(repocopy, head_hash)[0]

+         head_commit_dt = datetime.datetime.utcfromtimestamp(int(head_info.split("|", 3)[2]))

+ 

+         # Get the current version and build the version-release to be used

+         # for the latest entry in the changelog, if we can build it

+         current_evr = None

+         current_version = get_rpm_current_version(repocopy, name)

+         if current_version and latest_rel:

+             latest_rel = nevrd_to_evr(latest_rel)

+             current_evr = f"{current_version}-{latest_rel}"

+ 

+         stop_commit_hash = None

+         changelog = []

+         changelog_file = os.path.join(repocopy, "changelog")

+         if os.path.exists(changelog_file):

+             stop_commit = git_get_log(repocopy, log_options=["-1"], target="changelog")

+             if stop_commit:

+                 stop_commit_hash = stop_commit[0].split(" ", 1)[0]

+             with open(changelog_file) as stream:

+                 changelog = [r.rstrip() for r in stream.readlines()]

+ 

+         output = []

+         entry = []

+         nevr = current_evr or "LATEST"

+         last_author = None

+         for log_line in git_get_log(repocopy, toref=f"{stop_commit_hash}^"):

+             if not log_line.strip():

+                 continue

+             commit = log_line.split(" ", 1)[0]

+ 

+             info = git_get_commit_info(repocopy, commit)

+             if len(info) > 1:

                  # Ignore merge commits

+                 _log.debug(f"commit {commit} is a merge commit, skipping")

                  continue

  

-             commit_dt = datetime.datetime.utcfromtimestamp(commit.commit_time)

-             if commit_dt < (datetime.datetime.utcnow() - datetime.timedelta(days=730)):

+             _, commithash, commit_ts, author_info, commit_summary = info[0].split("|", 4)

+ 

+             if commithash in tags:

+                 output.append(entry)

+                 entry = []

+                 nevr = nevrd_to_evr(tags[commithash])

+ 

+             commit_dt = datetime.datetime.utcfromtimestamp(int(commit_ts))

+             if commit_dt < (head_commit_dt - datetime.timedelta(days=730)):

                  # Ignore all commits older than 2 years

+                 # if there is a `changelog` file in addition to these commits

+                 # they will be cut down anyway when the RPM gets built, so

+                 # the gap between the commits we are gathering here and the

+                 # ones in the `changelog` file can be ignored.

+                 # print(f"commit {commit} is too old, breaking iteration")

                  break

  

-             repo_obj.checkout_tree(

-                 commit, strategy=pygit2.GIT_CHECKOUT_FORCE | pygit2.GIT_CHECKOUT_RECREATE_MISSING,

-             )

-             if os.path.exists(os.path.join(repocopy, f"{name}.spec")):

-                 try:

-                     output = run_command(

-                         [

-                             "rpm",

-                             "--qf",

-                             "%{name}  %{version}  %{release}\n",

-                             "--specfile",

-                             f"{name}.spec",

-                         ],

-                         cwd=repocopy,

-                     )

-                 except Exception:

-                     continue

-                 output = tuple(

-                     output.decode("utf-8").strip().split("\n")[0].rsplit(".", 1)[0].split("  "),

-                 )

-                 nvr = "-".join(output)

- 

-                 if commit.parents:

-                     diff = repo_obj.diff(commit.parents[0], commit)

+             files_changed = git_get_changed_files(repocopy, commit)

+             ignore = True

+             for filename in files_changed:

+                 if filename.endswith((".spec", ".patch")):

+                     ignore = False

+ 

+             if not ignore:

+                 if last_author == author_info:

+                     entry[-1]["commits"].append(commit_summary)

                  else:

-                     # First commit in the repo

-                     diff = commit.tree.diff_to_tree(swap=True)

- 

-                 if diff.stats.files_changed:

-                     files_changed = [d.new_file.path for d in diff.deltas]

-                     ignore = True

-                     for filename in files_changed:

-                         if filename.endswith((".spec", ".patch")):

-                             ignore = False

-                     if not ignore:

-                         data[output].append(commit)

+                     entry.append(

+                         {

+                             "commit": commit,

+                             "commit_ts": commit_ts,

+                             "commit_author": author_info,

+                             "commits": [commit_summary],

+                             "nevr": nevr,

+                         }

+                     )

+                 last_author = author_info

              else:

-                 print("No more spec file, bailing")

-                 break

+                 _log.debug(f"commit {commit} is not changing a file of interest, ignoring")

  

-     for nvr, commits in data.items():

-         for idx, commit in enumerate(reversed(commits)):

-             last_commit = idx + 1 == len(commits)

-             commit_dt = datetime.datetime.utcfromtimestamp(commit.commit_time)

-             wrapper = textwrap.TextWrapper(width=75, subsequent_indent="  ")

-             message = wrapper.fill(commit.message.split("\n")[0].strip("- "))

- 

-             if last_commit:

-                 lines += [

-                     f"* {commit_dt.strftime('%a %b %d %Y')} {commit.author.name}"

-                     f" <{commit.author.email}> - {nvr[1]}-{nvr[2]}",

-                 ]

-             else:

-                 lines += [

-                     f"* {commit_dt.strftime('%a %b %d %Y')} {commit.author.name}"

-                     f" <{commit.author.email}>",

-                 ]

-             lines += ["- %s" % message]

+         # Last entries

+         output.append(entry)

+ 

+     wrapper = textwrap.TextWrapper(width=75, subsequent_indent="  ")

+     for entries in output:

+         for commit in entries:

+             commit_dt = datetime.datetime.utcfromtimestamp(int(commit["commit_ts"]))

+             author_info = commit["commit_author"]

+             nevr = commit["nevr"]

+             lines += [

+                 f"* {commit_dt.strftime('%a %b %d %Y')} {author_info} - {nevr}",

+             ]

+             for message in reversed(commit["commits"]):

+                 if message.strip():

+                     lines += ["- %s" % wrapper.fill(message.strip())]

              lines += [""]

+ 

+     # Add the existing changelog if there is one

+     lines.extend(changelog)

      return lines

  

  

@@ -1,4 +1,4 @@ 

- import filecmp

+ import io

  import os

  import shutil

  import tarfile
@@ -158,4 +158,6 @@ 

                      expected_spec_file_path = tmpspec.name

                      self.fuzz_spec_file(expected_spec_file_path, autorel_case, autochangelog_case)

  

-                 assert filecmp.cmp(unprocessed_spec_file_path, expected_spec_file_path)

+                 assert list(io.open(unprocessed_spec_file_path)) == list(

+                     io.open(expected_spec_file_path)

+                 )

In this commit, we are dropping the dependency on pygit2.
Fixes https://pagure.io/Fedora-Infra/rpmautospec/issue/10

We also take into account the presence and content of the
changelog file in the git repo as the idea is only to
automatically generate the changelog up until the latest commit
updating this changelog file.

Signed-off-by: Pierre-Yves Chibon pingou@pingoured.fr

Build failed.

Metadata Update from @nphilipp:
- Request assigned

4 years ago

Let's call this something more specific than log (which clashes with the name of the logger object in many other projects), perhaps prefix all these functions running git with git_. Also, let's use typing hints rather than putting that info in the docstring. E.g.:

import typing
...
def git_log(
    path: str,
    log_options: typing.Optional[typing.List[str]] = None,
    toref: typing.Optional[str] = None,
    target: typing.Optional[str] = None,
) -> typing.List[str]:

This isn't very informative if I don't know what these CLI options mean. Can we describe the format in which the commit logs are returned?

BTW, I just ran this command in our repo and it printed ref names if commits had one (e.g. branch names). Perhaps we should add --no-decorate?
```

Going with the above:

def git_get_commit_info(path: str, commithash: str) -> typing.List[str]:
    ...

Maybe: "Returns the commit logs for the repo in path."?

Likewise (and use plural files):

def git_get_changed_files(path: str, commithash: str) -> typing.List[str]:

Should we read this from repopath instead, i.e. reflect any working copy changes for use in fedpkg and the like?

This trips over commit logs containing non-Unicode entries. Perhaps use .decode("UTF-8", "replace")? I didn't notice this for gimp with the pygit2 variant because it only processed the last 2 years of commits.

And we could use git log ... --since=... to limit the changelog to the last two years.

Renamed to get_git_log and typing hints added as well

Description of the format added as well as --no-decorate

Let's look into this in another PR but yes I think it would be good. Do you want to open a ticket for this so we don't forget?

2 new commits added

  • Gather 2 years of logs from the last commit made to the repo
  • Adjust the changelog module for Nils comments
4 years ago

3 new commits added

  • Gather 2 years of logs from the last commit made to the repo
  • Adjust the changelog module for Nils comments
  • Work on the logic to generate the changelog
4 years ago

Build failed.

1 new commit added

  • Do not add quotes around the data retrieved by git show
4 years ago

Build failed.

Can we make this file a little less generic? Like <package>.changelog or something like that? If I have multiple spec files in the same git repo for the same sources, I'd like to be able to handle this relatively cleanly.

1 new commit added

  • wip
4 years ago

Build failed.

rebased onto be7cd6f07457687cdf19bd750539c14591b31e09

4 years ago

Build failed.

2 new commits added

  • Work on the logic to generate the changelog
  • Streamline the dist-git test data
4 years ago

Build failed.

2 new commits added

  • Work on the logic to generate the changelog
  • Streamline the dist-git test data
4 years ago

2 new commits added

  • Work on the logic to generate the changelog
  • Streamline the dist-git test data
4 years ago

Build failed.

rebased onto 7b5088c8d3343bca6b6956d872ce443b8113c1be

4 years ago

rebased onto 084759b89696e5d840bf3565f0c3c13b22f9385b

4 years ago

Build failed.

rebased onto 7f74bf621b8dfaefb06ebeeb9f1e86189b025e72

4 years ago

Build succeeded.

rebased onto b7620dfbc7181e294f1b2a741e54dd824a690d69

4 years ago

rebased onto c1b3c4d

4 years ago

Build failed.

4 new commits added

  • Check if two files are identical with list(io.open()) rather then filecmp
  • Re raise the exception after having logged it when shelling out a command
  • Make sure we flush the temporary file/buffer to disk before we copy it
  • Work on the logic to generate the changelog
4 years ago

Build succeeded.

Pull-Request has been merged by nphilipp

4 years ago