#31 Work on the logic to generate the changelog
@@ -86,7 +86,9 @@ 

                  print(line, file=tmp_specfile, end="")


              if has_autochangelog:

-                 print("\n".join(produce_changelog(srcdir)), file=tmp_specfile)

+                 print("\n".join(produce_changelog(srcdir, latest_rel=new_rel)), file=tmp_specfile)


+             tmp_specfile.flush()


              # ...and copy it back (potentially across device boundaries)

              shutil.copy2(tmp_specfile.name, specfile_name)

@@ -1,14 +1,15 @@ 


- import collections

  import datetime

  import logging

  import os

+ import re

  import shutil

  import subprocess

  import tempfile

  import textwrap

+ import typing


- import pygit2

+ from .py2compat.escape_tags import unescape_tag


  _log = logging.getLogger(__name__)

@@ -25,7 +26,7 @@ 

      return subcmd_name



- def run_command(command, cwd=None):

+ def run_command(command: list, cwd: typing.Optional[str] = None) -> bytes:

      """ Run the specified command in a specific working directory if one

      is specified.

@@ -36,91 +37,223 @@ 

          _log.error("Command `{}` return code: `{}`".format(" ".join(command), e.returncode))



-         raise Exception("Command failed to run")

+         raise


      return output



- def produce_changelog(repopath):

+ def git_get_log(

+     path: str,

+     log_options: typing.Optional[typing.List[str]] = None,

+     toref: typing.Optional[str] = None,

+     target: typing.Optional[str] = None,

+ ) -> typing.List[str]:

+     """ Returns the list of the commit logs for the repo in ``path`` .


+     This method runs the system's `git log --pretty=oneline --abbrev-commit`

+     command.


+     This command returns git log as follow:

+     <short hash> <subject of the commit message>

+     <short hash2> <subject of the commit message>

+     <short hash3> <subject of the commit message>

+     ...


+     :kwarg log_options: options to pass to git log

+     :kwarg toref: a reference/commit to use when generating the log

+     :kwarg target: the target of the git log command, can be a ref, a

+         file or nothing


+     """

+     cmd = ["git", "log", "--pretty=oneline", "--abbrev-commit", "--no-decorate"]

+     if log_options:

+         cmd.extend(log_options)

+     if toref:

+         cmd.append(f"{toref}..")

+     if target:

+         cmd.extend(["--", target])


+     _log.debug(f"git_get_log {' '.join(cmd)}")

+     return run_command(cmd, cwd=path).decode("UTF-8").strip().split("\n")



+ def git_get_commit_info(path: str, commithash: str) -> typing.List[str]:

+     """This function calls `git show --no-patch --format="%P %ct"` on the

+     specified commit and returns the output from git

+     """

+     cmd = ["git", "show", "--no-patch", "--format=%P|%H|%ct|%aN <%aE>|%s", commithash]

+     _log.debug(f"git_get_commit_info {' '.join(cmd)}")

+     return run_command(cmd, cwd=path).decode("UTF-8").strip().split("\n")



+ def git_get_changed_files(path: str, commithash: str) -> typing.List[str]:

+     """ Returns the list of files changed in the specified commit. """

+     cmd = ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", commithash]

+     _log.debug(f"git_get_changed_files {' '.join(cmd)}")

+     return run_command(cmd, cwd=path).decode("UTF-8").strip().split("\n")



+ def git_get_tags(path: str) -> typing.Mapping[str, str]:

+     """ Returns a dict containing for each commit tagged the corresponding tag. """

+     cmd = ["git", "show-ref", "--tags"]

+     _log.debug(f"git_get_tags {' '.join(cmd)}")

+     tags_list = run_command(cmd, cwd=path).decode("UTF-8").strip().split("\n")


+     output = {}

+     for row in tags_list:

+         commit, name = row.split(" ", 1)

+         # we're only interested in the build/* tags

+         if name.startswith("refs/tags/build/"):

+             name = name.replace("refs/tags/build/", "")

+             output[commit] = unescape_tag(name)


+     return output



+ def nevrd_to_evr(nevrd: str) -> str:

+     """ Converts a name:epoch-version-release.dist_tag to epoch_version_release

+     so it can be inserted in the changelog.


+     If the nevrd provided does not have at least 2 "-" in it, otherwise

+     it will be just be cleaned for any potential dist_tag.

+     """

+     if nevrd.count("-") >= 2:

+         version, release = nevrd.rsplit("-", 2)[1:]

+         # Append a "-" to the version to make it easier to concatenate later

+         version += "-"

+     else:

+         version = ""

+         release = nevrd

+     release = re.sub(r"\.fc\d+", "", release)

+     release = re.sub(r"\.el\d+", "", release)

+     return f"{version}{release}"



+ def get_rpm_current_version(path: str, name: str) -> str:

+     """ Retrieve the current version set in the spec file named ``name``.spec

+     at the given path.

+     """

+     output = None

+     try:

+         output = (

+             run_command(["rpm", "--qf", "%{version}\n", "--specfile", f"{name}.spec"], cwd=path,)

+             .decode("UTF-8")

+             .strip()

+         )

+     except Exception:

+         pass

+     return output



+ def produce_changelog(repopath, latest_rel=None):

      name = os.path.basename(repopath)

-     with tempfile.TemporaryDirectory() as workdir:

+     with tempfile.TemporaryDirectory(prefix="rpmautospec-") as workdir:

          repocopy = f"{workdir}/{name}"

          shutil.copytree(repopath, repocopy)

+         _log.debug(f"Working directory: {repocopy}")

          lines = []

-         repo_obj = pygit2.Repository(repocopy)


-         branch = repo_obj.lookup_branch(repo_obj.head.shorthand)

-         commit = branch.peel(pygit2.Commit)

-         data = collections.defaultdict(list)

-         for commit in repo_obj.walk(commit.hex, pygit2.GIT_SORT_TIME):

-             if len(commit.parents) > 1:

+         # Get all the tags in the repo

+         tags = git_get_tags(repocopy)


+         # Get the lastest commit in the repo

+         head = git_get_log(repocopy, log_options=["-1"])[0]

+         head_hash = head.split(" ", 1)[0]

+         head_info = git_get_commit_info(repocopy, head_hash)[0]

+         head_commit_dt = datetime.datetime.utcfromtimestamp(int(head_info.split("|", 3)[2]))


+         # Get the current version and build the version-release to be used

+         # for the latest entry in the changelog, if we can build it

+         current_evr = None

+         current_version = get_rpm_current_version(repocopy, name)

+         if current_version and latest_rel:

+             latest_rel = nevrd_to_evr(latest_rel)

+             current_evr = f"{current_version}-{latest_rel}"


+         stop_commit_hash = None

+         changelog = []

+         changelog_file = os.path.join(repocopy, "changelog")

+         if os.path.exists(changelog_file):

+             stop_commit = git_get_log(repocopy, log_options=["-1"], target="changelog")

+             if stop_commit:

+                 stop_commit_hash = stop_commit[0].split(" ", 1)[0]

+             with open(changelog_file) as stream:

+                 changelog = [r.rstrip() for r in stream.readlines()]


+         output = []

+         entry = []

+         nevr = current_evr or "LATEST"

+         last_author = None

+         for log_line in git_get_log(repocopy, toref=f"{stop_commit_hash}^"):

+             if not log_line.strip():

+                 continue

+             commit = log_line.split(" ", 1)[0]


+             info = git_get_commit_info(repocopy, commit)

+             if len(info) > 1:

                  # Ignore merge commits

+                 _log.debug(f"commit {commit} is a merge commit, skipping")



-             commit_dt = datetime.datetime.utcfromtimestamp(commit.commit_time)

-             if commit_dt < (datetime.datetime.utcnow() - datetime.timedelta(days=730)):

+             _, commithash, commit_ts, author_info, commit_summary = info[0].split("|", 4)


+             if commithash in tags:

+                 output.append(entry)

+                 entry = []

+                 nevr = nevrd_to_evr(tags[commithash])


+             commit_dt = datetime.datetime.utcfromtimestamp(int(commit_ts))

+             if commit_dt < (head_commit_dt - datetime.timedelta(days=730)):

                  # Ignore all commits older than 2 years

+                 # if there is a `changelog` file in addition to these commits

+                 # they will be cut down anyway when the RPM gets built, so

+                 # the gap between the commits we are gathering here and the

+                 # ones in the `changelog` file can be ignored.

+                 # print(f"commit {commit} is too old, breaking iteration")



-             repo_obj.checkout_tree(

-                 commit, strategy=pygit2.GIT_CHECKOUT_FORCE | pygit2.GIT_CHECKOUT_RECREATE_MISSING,

-             )

-             if os.path.exists(os.path.join(repocopy, f"{name}.spec")):

-                 try:

-                     output = run_command(

-                         [

-                             "rpm",

-                             "--qf",

-                             "%{name}  %{version}  %{release}\n",

-                             "--specfile",

-                             f"{name}.spec",

-                         ],

-                         cwd=repocopy,

-                     )

-                 except Exception:

-                     continue

-                 output = tuple(

-                     output.decode("utf-8").strip().split("\n")[0].rsplit(".", 1)[0].split("  "),

-                 )

-                 nvr = "-".join(output)


-                 if commit.parents:

-                     diff = repo_obj.diff(commit.parents[0], commit)

+             files_changed = git_get_changed_files(repocopy, commit)

+             ignore = True

+             for filename in files_changed:

+                 if filename.endswith((".spec", ".patch")):

+                     ignore = False


+             if not ignore:

+                 if last_author == author_info:

+                     entry[-1]["commits"].append(commit_summary)


-                     # First commit in the repo

-                     diff = commit.tree.diff_to_tree(swap=True)


-                 if diff.stats.files_changed:

-                     files_changed = [d.new_file.path for d in diff.deltas]

-                     ignore = True

-                     for filename in files_changed:

-                         if filename.endswith((".spec", ".patch")):

-                             ignore = False

-                     if not ignore:

-                         data[output].append(commit)

+                     entry.append(

+                         {

+                             "commit": commit,

+                             "commit_ts": commit_ts,

+                             "commit_author": author_info,

+                             "commits": [commit_summary],

+                             "nevr": nevr,

+                         }

+                     )

+                 last_author = author_info


-                 print("No more spec file, bailing")

-                 break

+                 _log.debug(f"commit {commit} is not changing a file of interest, ignoring")


-     for nvr, commits in data.items():

-         for idx, commit in enumerate(reversed(commits)):

-             last_commit = idx + 1 == len(commits)

-             commit_dt = datetime.datetime.utcfromtimestamp(commit.commit_time)

-             wrapper = textwrap.TextWrapper(width=75, subsequent_indent="  ")

-             message = wrapper.fill(commit.message.split("\n")[0].strip("- "))


-             if last_commit:

-                 lines += [

-                     f"* {commit_dt.strftime('%a %b %d %Y')} {commit.author.name}"

-                     f" <{commit.author.email}> - {nvr[1]}-{nvr[2]}",

-                 ]

-             else:

-                 lines += [

-                     f"* {commit_dt.strftime('%a %b %d %Y')} {commit.author.name}"

-                     f" <{commit.author.email}>",

-                 ]

-             lines += ["- %s" % message]

+         # Last entries

+         output.append(entry)


+     wrapper = textwrap.TextWrapper(width=75, subsequent_indent="  ")

+     for entries in output:

+         for commit in entries:

+             commit_dt = datetime.datetime.utcfromtimestamp(int(commit["commit_ts"]))

+             author_info = commit["commit_author"]

+             nevr = commit["nevr"]

+             lines += [

+                 f"* {commit_dt.strftime('%a %b %d %Y')} {author_info} - {nevr}",

+             ]

+             for message in reversed(commit["commits"]):

+                 if message.strip():

+                     lines += ["- %s" % wrapper.fill(message.strip())]

              lines += [""]


+     # Add the existing changelog if there is one

+     lines.extend(changelog)

      return lines



@@ -1,4 +1,4 @@ 

- import filecmp

+ import io

  import os

  import shutil

  import tarfile
@@ -158,4 +158,6 @@ 

                      expected_spec_file_path = tmpspec.name

                      self.fuzz_spec_file(expected_spec_file_path, autorel_case, autochangelog_case)


-                 assert filecmp.cmp(unprocessed_spec_file_path, expected_spec_file_path)

+                 assert list(io.open(unprocessed_spec_file_path)) == list(

+                     io.open(expected_spec_file_path)

+                 )

In this commit, we are dropping the dependency on pygit2.
Fixes https://pagure.io/Fedora-Infra/rpmautospec/issue/10

We also take into account the presence and content of the
changelog file in the git repo as the idea is only to
automatically generate the changelog up until the latest commit
updating this changelog file.

Signed-off-by: Pierre-Yves Chibon pingou@pingoured.fr

Let's call this something more specific than log (which clashes with the name of the logger object in many other projects), perhaps prefix all these functions running git with git_. Also, let's use typing hints rather than putting that info in the docstring. E.g.:

import typing
def git_log(
    path: str,
    log_options: typing.Optional[typing.List[str]] = None,
    toref: typing.Optional[str] = None,
    target: typing.Optional[str] = None,
) -> typing.List[str]:

This isn't very informative if I don't know what these CLI options mean. Can we describe the format in which the commit logs are returned?

BTW, I just ran this command in our repo and it printed ref names if commits had one (e.g. branch names). Perhaps we should add --no-decorate?

Going with the above:

def git_get_commit_info(path: str, commithash: str) -> typing.List[str]:

Maybe: "Returns the commit logs for the repo in path."?

Likewise (and use plural files):

def git_get_changed_files(path: str, commithash: str) -> typing.List[str]:

Should we read this from repopath instead, i.e. reflect any working copy changes for use in fedpkg and the like?

This trips over commit logs containing non-Unicode entries. Perhaps use .decode("UTF-8", "replace")? I didn't notice this for gimp with the pygit2 variant because it only processed the last 2 years of commits.

And we could use git log ... --since=... to limit the changelog to the last two years.

Renamed to get_git_log and typing hints added as well

Description of the format added as well as --no-decorate

Let's look into this in another PR but yes I think it would be good. Do you want to open a ticket for this so we don't forget?

Can we make this file a little less generic? Like <package>.changelog or something like that? If I have multiple spec files in the same git repo for the same sources, I'd like to be able to handle this relatively cleanly.

