From 72b034412effdd4c9fabd3c1b0c3170837490237 Mon Sep 17 00:00:00 2001 From: Nils Philippsen Date: May 25 2021 15:15:45 +0000 Subject: Add visitor, use processor for changelog Fixes: #116, #117, #142, #150 Signed-off-by: Nils Philippsen --- diff --git a/python-rpmautospec.spec b/python-rpmautospec.spec index 6337644..67ff261 100644 --- a/python-rpmautospec.spec +++ b/python-rpmautospec.spec @@ -52,10 +52,11 @@ Summary: %{summary} %{?python_provide:%python_provide python3-%{srcname}} Requires: koji -Requires: git-core -Requires: python3-rpm Requires: python3-koji Requires: python3-pygit2 +Requires: rpm +# for "rpm --specfile" +Requires: rpm-build >= 4.9 %description -n python3-%{srcname} %_description @@ -71,10 +72,6 @@ Requires: python3-pygit2 %package -n %{srcname} Summary: CLI tool for generating RPM releases and changelogs Requires: python3-%{srcname} = %{version}-%{release} -# We add this require here and not in python3-rpmautospec because we do not want -# it on the builders, the builders plugins will work fine without it but we -# need this in the chroot or when packagers run the CLI on their machines. -Requires: rpm-build >= 4.9 %description -n %{srcname} CLI tool for generating RPM releases and changelogs @@ -150,6 +147,9 @@ install -m 644 rpm/macros.d/macros.rpmautospec %{buildroot}%{rpmmacrodir}/ %endif %changelog +* Wed May 19 2021 Nils Philippsen +- remove git-core, fix RPM related dependencies + * Wed May 12 2021 Nils Philippsen - depend on python3-pygit2 diff --git a/rpmautospec/changelog.py b/rpmautospec/changelog.py index 481ee82..5f4bac3 100644 --- a/rpmautospec/changelog.py +++ b/rpmautospec/changelog.py @@ -1,17 +1,10 @@ -#!/usr/bin/python3 -import datetime import logging -import os -import re -import shutil -import tempfile -import textwrap -import typing +from typing import Any, Dict, Optional, Union -from .misc import get_rpm_current_version, parse_evr, rpmvercmp_key, run_command +from .pkg_history import PkgHistoryProcessor -_log = logging.getLogger(__name__) +log = logging.getLogger(__name__) def register_subcommand(subparsers): @@ -27,229 +20,36 @@ def register_subcommand(subparsers): return subcmd_name -def git_get_log( - path: str, - log_options: typing.Optional[typing.List[str]] = None, - toref: typing.Optional[str] = None, - target: typing.Optional[str] = None, -) -> typing.List[str]: - """Returns the list of the commit logs for the repo in ``path`` . - - This method runs the system's `git log --pretty=oneline --abbrev-commit` - command. - - This command returns git log as follow: - - - - ... - - :kwarg log_options: options to pass to git log - :kwarg toref: a reference/commit to use when generating the log - :kwarg target: the target of the git log command, can be a ref, a - file or nothing - - """ - cmd = ["git", "log", "--pretty=oneline", "--abbrev-commit", "--no-decorate"] - if log_options: - cmd.extend(log_options) - if toref: - cmd.append(f"{toref}..") - if target: - cmd.extend(["--", target]) - - _log.debug("git_get_log(): %s", cmd) - return run_command(cmd, cwd=path).decode("UTF-8").strip().split("\n") - - -def git_get_commit_info(path: str, commithash: str) -> typing.List[str]: - """This function calls `git show --no-patch --format="%P %ct"` on the - specified commit and returns the output from git - """ - cmd = ["git", "show", "--no-patch", "--format=%P|%H|%ct|%aN <%aE>|%s", commithash] - _log.debug("git_get_commit_info: %s", cmd) - return run_command(cmd, cwd=path).decode("UTF-8").strip().split("\n") - - -def git_get_changed_files(path: str, commithash: str) -> typing.List[str]: - """Returns the list of files changed in the specified commit.""" - cmd = ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", commithash] - _log.debug("git_get_changed_files: %s", cmd) - return run_command(cmd, cwd=path).decode("UTF-8").strip().split("\n") - - -def nevrd_to_evr(nevrd: str) -> str: - """Converts a name:epoch-version-release.dist_tag to epoch_version_release - so it can be inserted in the changelog. - - If the nevrd provided does not have at least 2 "-" in it, otherwise - it will be just be cleaned for any potential dist_tag. - """ - if nevrd.count("-") >= 2: - version, release = nevrd.rsplit("-", 2)[1:] - # Append a "-" to the version to make it easier to concatenate later - version += "-" +def _coerce_to_str(str_or_bytes: Union[str, bytes]) -> str: + if isinstance(str_or_bytes, bytes): + str_or_bytes = str_or_bytes.decode("utf-8", errors="replace") + return str_or_bytes + + +def _coerce_to_bytes(str_or_bytes: Union[str, bytes]) -> str: + if isinstance(str_or_bytes, str): + str_or_bytes = str_or_bytes.encode("utf-8") + return str_or_bytes + + +def collate_changelog( + processor_results: Dict[str, Any], result_type: Optional[type] = str +) -> Union[str, bytes]: + changelog = processor_results["changelog"] + if result_type == str: + entry_strings = (_coerce_to_str(entry["data"]) for entry in changelog) else: - version = "" - release = nevrd - release = re.sub(r"\.fc\d+", "", release) - release = re.sub(r"\.el\d+", "", release) - return f"{version}{release}" - - -def produce_changelog(repopath, latest_rel=None): - name = os.path.basename(repopath) - with tempfile.TemporaryDirectory(prefix="rpmautospec-") as workdir: - repocopy = f"{workdir}/{name}" - shutil.copytree(repopath, repocopy) - _log.debug("Working directory: %s", repocopy) - lines = [] - - # FIXME: We don't do tags anymore - tags = [] - - # Get the latest commit in the repo - head = git_get_log(repocopy, log_options=["-1"])[0] - head_hash = head.split(" ", 1)[0] - head_info = git_get_commit_info(repocopy, head_hash)[0] - head_commit_dt = datetime.datetime.utcfromtimestamp(int(head_info.split("|", 3)[2])) - - # Get the current version and build the version-release to be used - # for the latest entry in the changelog, if we can build it - current_evr = None - current_version = get_rpm_current_version(repocopy, name) - if current_version and latest_rel: - latest_rel = nevrd_to_evr(latest_rel) - current_evr = f"{current_version}-{latest_rel}" - - stop_commit_hash = None - changelog = [] - changelog_file = os.path.join(repocopy, "changelog") - if os.path.exists(changelog_file): - stop_commit = git_get_log(repocopy, log_options=["-1"], target="changelog") - if stop_commit: - stop_commit_hash = stop_commit[0].split(" ", 1)[0] - with open(changelog_file) as stream: - changelog = [r.rstrip() for r in stream.readlines()] - - output = [] - entry = [] - evr = current_evr or "LATEST" - last_author = None - toref = None - if stop_commit_hash: - toref = f"{stop_commit_hash}^" - for log_line in git_get_log(repocopy, toref=toref): - if not log_line.strip(): - continue - commit = log_line.split(" ", 1)[0] - - info = git_get_commit_info(repocopy, commit) - if len(info) > 1: - # Ignore merge commits - _log.debug("commit %s is a merge commit, skipping", commit) - continue - - _, commithash, commit_ts, author_info, commit_summary = info[0].split("|", 4) - author_info = author_info.replace("%", "%%") - commit_summary = commit_summary.replace("%", "%%") - - # FIXME: new algo - if commithash in tags: - _log.debug("Tags for the commit: %s: %s", commithash, tags[commithash]) - output.append(entry) - entry = [] - # Use the most recent build for EVR - builds = [] - for b in tags[commithash]: - _epo, _ver, _rel = parse_evr(nevrd_to_evr(b)) - builds.append({"epoch": _epo, "version": _ver, "release": _rel}) - _log.debug("Builds to sort: %s", builds) - if len(builds) > 1: - builds.sort(key=rpmvercmp_key, reverse=True) - - build = builds[0] - if build["epoch"]: - evr = f"{build['epoch']}:{build['version']}-{build['release']}" - else: - evr = f"{build['version']}-{build['release']}" - - commit_dt = datetime.datetime.utcfromtimestamp(int(commit_ts)) - if commit_dt < (head_commit_dt - datetime.timedelta(days=730)): - # Ignore all commits older than 2 years - # if there is a `changelog` file in addition to these commits - # they will be cut down anyway when the RPM gets built, so - # the gap between the commits we are gathering here and the - # ones in the `changelog` file can be ignored. - _log.debug("commit %s is too old, breaking iteration", commit) - break - - files_changed = git_get_changed_files(repocopy, commit) - ignore = True - for filename in files_changed: - if filename.endswith( - ( - ".automount", - ".device", - ".mount", - ".patch", - ".path", - ".pc", - ".preset", - ".scope", - ".service", - ".slice", - ".socket", - ".spec", - ".swap", - ".target", - ".timer", - ) - ): - ignore = False - - if not ignore: - if last_author == author_info and entry: - entry[-1]["commits"].append(commit_summary) - else: - entry.append( - { - "commit": commit, - "commit_ts": commit_ts, - "commit_author": author_info, - "commits": [commit_summary], - "evr": evr, - } - ) - last_author = author_info - else: - _log.debug("commit %s is not changing a file of interest, ignoring", commit) - - # Last entries - output.append(entry) - - wrapper = textwrap.TextWrapper(width=75, subsequent_indent=" ") - for entries in output: - for commit in entries: - commit_dt = datetime.datetime.utcfromtimestamp(int(commit["commit_ts"])) - author_info = commit["commit_author"] - evr = commit["evr"] - lines += [ - f"* {commit_dt.strftime('%a %b %d %Y')} {author_info} - {evr}", - ] - for message in reversed(commit["commits"]): - if message.strip(): - lines += ["- %s" % wrapper.fill(message.strip())] - lines += [""] - - # Add the existing changelog if there is one - lines.extend(changelog) - return lines + entry_strings = (_coerce_to_bytes(entry["data"]) for entry in changelog) + return "\n\n".join(entry_strings) + + +def produce_changelog(spec_or_repo): + processor = PkgHistoryProcessor(spec_or_repo) + result = processor.run(visitors=(processor.release_number_visitor, processor.changelog_visitor)) + return collate_changelog(result) def main(args): """Main method.""" - - repopath = args.worktree_path.rstrip(os.path.sep) - changelog = produce_changelog(repopath) - _log.info("\n".join(changelog)) + changelog = produce_changelog(args.worktree_path) + log.info(changelog) diff --git a/rpmautospec/misc.py b/rpmautospec/misc.py index 1a32d9c..485451a 100644 --- a/rpmautospec/misc.py +++ b/rpmautospec/misc.py @@ -1,14 +1,10 @@ -from functools import cmp_to_key -import logging import re import subprocess from pathlib import Path from typing import Optional -from typing import Tuple from typing import Union import koji -import rpm # The %autorelease macro including parameters. This is imported into the main package to be used @@ -16,32 +12,10 @@ import rpm AUTORELEASE_MACRO = "autorelease(e:s:hp)" AUTORELEASE_SENTINEL = "__AUTORELEASE_SENTINEL__" -evr_re = re.compile(r"^(?:(?P\d+):)?(?P[^-:]+)(?:-(?P[^-:]+))?$") autochangelog_re = re.compile(r"\s*%(?:autochangelog|\{\??autochangelog\})\s*") -rpmvercmp_key = cmp_to_key( - lambda b1, b2: rpm.labelCompare( - (str(b1["epoch"]), b1["version"], b1["release"]), - (str(b2["epoch"]), b2["version"], b2["release"]), - ), -) - _kojiclient = None -_log = logging.getLogger(__name__) - - -def parse_evr(evr_str: str) -> Tuple[int, str, Optional[str]]: - match = evr_re.match(evr_str) - - if not match: - raise ValueError(evr_str) - - epoch = match.group("epoch") or 0 - epoch = int(epoch) - - return epoch, match.group("version"), match.group("release") - def get_rpm_current_version( path: str, name: Optional[str] = None, with_epoch: bool = False @@ -102,22 +76,6 @@ def koji_init(koji_url_or_session: Union[str, koji.ClientSession]) -> koji.Clien return _kojiclient -def run_command(command: list, cwd: Optional[str] = None) -> bytes: - """Run the specified command in a specific working directory if one - is specified. - """ - output = None - try: - output = subprocess.check_output(command, cwd=cwd, stderr=subprocess.PIPE) - except subprocess.CalledProcessError as e: - _log.error("Command `%s` return code: `%s`", " ".join(command), e.returncode) - _log.error("stdout:\n-------\n%s", e.stdout) - _log.error("stderr:\n-------\n%s", e.stderr) - raise - - return output - - def specfile_uses_rpmautospec( specfile: str, check_autorelease: bool = True, check_autochangelog: bool = True ) -> bool: diff --git a/rpmautospec/pkg_history.py b/rpmautospec/pkg_history.py index 967738b..408778d 100644 --- a/rpmautospec/pkg_history.py +++ b/rpmautospec/pkg_history.py @@ -1,8 +1,11 @@ +import datetime as dt import logging from collections import defaultdict +from fnmatch import fnmatchcase from functools import lru_cache, reduce from pathlib import Path from tempfile import TemporaryDirectory +from textwrap import TextWrapper from typing import Any, Dict, Optional, Sequence, Union import pygit2 @@ -14,6 +17,15 @@ log = logging.getLogger(__name__) class PkgHistoryProcessor: + + changelog_ignore_patterns = [ + ".gitignore", + # no need to ignore "changelog" explicitly + "gating.yaml", + "sources", + "tests/*", + ] + def __init__(self, spec_or_path: Union[str, Path]): if isinstance(spec_or_path, str): spec_or_path = Path(spec_or_path) @@ -103,6 +115,138 @@ class PkgHistoryProcessor: yield commit_result + @staticmethod + def _files_changed_in_diff(diff: pygit2.Diff): + files = set() + for delta in diff.deltas: + if delta.old_file: + files.add(delta.old_file.path) + if delta.new_file: + files.add(delta.new_file.path) + return files + + def changelog_visitor(self, commit: pygit2.Commit, children_must_continue: bool): + """Visit a commit to generate changelog entries for it and its parents. + + It first determines if parent chain(s) must be followed, i.e. if the + changelog file was modified in this commit or all its children and yields that to the + caller, who later sends the partial results for this commit (to be + modified) and full results of parents back (as dictionaries), which + get processed and the results for this commit yielded again. + """ + # Check if the spec file exists, if not, there will be no changelog. + specfile_present = f"{self.name}.spec" in commit.tree + + # Find out if the changelog is different from every parent (or present, in the case of the + # root commit). + try: + changelog_blob = commit.tree["changelog"] + except KeyError: + changelog_blob = None + + # With root commits, changelog present means it was changed + changelog_changed = False if commit.parents else bool(changelog_blob) + for parent in commit.parents: + try: + par_changelog_blob = parent.tree["changelog"] + except KeyError: + par_changelog_blob = None + if changelog_blob == par_changelog_blob: + changelog_changed = False + + # Establish which parent to follow (if any, and if we can). + parent_to_follow = None + merge_unresolvable = False + if len(commit.parents) < 2: + if commit.parents: + parent_to_follow = commit.parents[0] + else: + for parent in commit.parents: + if commit.tree == parent.tree: + # Merge done with strategy "ours" or equivalent, i.e. (at least) one parent has + # the same content. Follow this parent + parent_to_follow = parent + break + else: + # Didn't break out of loop => no parent with same tree found. If the changelog + # is different from all parents, it was updated in the merge commit and we don't + # care. If it didn't change, we don't know how to continue and need to flag that. + merge_unresolvable = not changelog_changed + + commit_result, parent_results = yield ( + not (changelog_changed or merge_unresolvable) + and specfile_present + and children_must_continue + ) + + changelog_entry = { + "commit-id": commit.id, + } + + changelog_author = f"{commit.author.name} <{commit.author.email}>" + changelog_date = dt.datetime.utcfromtimestamp(commit.commit_time).strftime("%a %b %d %Y") + changelog_evr = f"{commit_result['epoch-version']}-{commit_result['release-number']}" + changelog_header = f"* {changelog_date} {changelog_author} {changelog_evr}" + + if not specfile_present: + # no spec file => start fresh + commit_result["changelog"] = () + elif merge_unresolvable: + changelog_entry["data"] = f"{changelog_header}\n- RPMAUTOSPEC: unresolvable merge" + changelog_entry["error"] = "unresolvable merge" + previous_changelog = () + commit_result["changelog"] = (changelog_entry,) + elif changelog_changed: + if changelog_blob: + # Don't decode, we'll paste as is. + changelog_entry["data"] = changelog_blob.data + else: + # Changelog removed. Oops. + changelog_entry[ + "data" + ] = f"{changelog_header}\n- RPMAUTOSPEC: changelog file removed" + changelog_entry["error"] = "changelog file removed" + commit_result["changelog"] = (changelog_entry,) + else: + # Pull previous changelog entries from parent result (if any). + if len(commit.parents) == 1: + previous_changelog = parent_results[0].get("changelog", ()) + else: + previous_changelog = () + for candidate in parent_results: + if candidate["commit-id"] == parent_to_follow: + previous_changelog = candidate.get("changelog", ()) + break + + # Check if this commit should be considered for the RPM changelog. + if parent_to_follow: + diff = parent_to_follow.tree.diff_to_tree(commit.tree) + else: + diff = commit.tree.diff_to_tree(swap=True) + changed_files = self._files_changed_in_diff(diff) + # Skip if no files changed (i.e. commit solely for changelog/build) or if any files are + # not to be ignored. + skip_for_changelog = changed_files and all( + any(fnmatchcase(f, pat) for pat in self.changelog_ignore_patterns) + for f in changed_files + ) + + if not skip_for_changelog: + commit_subject = commit.message.split("\n")[0].strip() + if commit_subject.startswith("-"): + commit_subject = commit_subject[1:].lstrip() + if not commit_subject: + commit_subject = "RPMAUTOSPEC: empty commit log subject after stripping" + changelog_entry["error"] = "empty commit log subject" + wrapper = TextWrapper(width=75, subsequent_indent=" ") + wrapped_msg = wrapper.fill(f"- {commit_subject}") + changelog_entry["data"] = f"{changelog_header}\n{wrapped_msg}" + commit_result["changelog"] = (changelog_entry,) + previous_changelog + else: + commit_result["changelog"] = previous_changelog + + yield commit_result + def run( self, head: Optional[Union[str, pygit2.Commit]] = None, diff --git a/tests/rpmautospec/test_misc.py b/tests/rpmautospec/test_misc.py index dc94f99..dccb0a9 100644 --- a/tests/rpmautospec/test_misc.py +++ b/tests/rpmautospec/test_misc.py @@ -1,7 +1,5 @@ import logging import os -import subprocess -from unittest import mock import pytest @@ -13,29 +11,6 @@ __here__ = os.path.dirname(__file__) class TestMisc: """Test the rpmautospec.misc module""" - @mock.patch("rpmautospec.misc.subprocess.check_output") - @pytest.mark.parametrize("raise_exception", (False, True)) - def test_run_command(self, check_output, raise_exception, caplog): - """Test run_command()""" - caplog.set_level(logging.DEBUG) - - if not raise_exception: - check_output.return_value = "Some output" - assert misc.run_command(["command"]) == "Some output" - check_output.assert_called_once_with(["command"], cwd=None, stderr=subprocess.PIPE) - assert not any(rec.levelno >= logging.WARNING for rec in caplog.records) - else: - check_output.side_effect = subprocess.CalledProcessError( - returncode=139, - cmd=["command"], - output="Some command", - stderr="And it failed!", - ) - with pytest.raises(subprocess.CalledProcessError) as excinfo: - misc.run_command(["command"]) - assert str(excinfo.value) == "Command '['command']' returned non-zero exit status 139." - assert any(rec.levelno == logging.ERROR for rec in caplog.records) - def test_specfile_uses_rpmautospec_no_macros(self, caplog): """Test no macros on specfile_uses_rpmautospec()""" caplog.set_level(logging.DEBUG) diff --git a/tests/rpmautospec/test_pkg_history.py b/tests/rpmautospec/test_pkg_history.py index 0f067dc..62b2b27 100644 --- a/tests/rpmautospec/test_pkg_history.py +++ b/tests/rpmautospec/test_pkg_history.py @@ -210,7 +210,7 @@ class TestPkgHistoryProcessor: res = processor.run( *args, - visitors=[processor.release_number_visitor], + visitors=[processor.release_number_visitor, processor.changelog_visitor], all_results=all_results, ) @@ -224,3 +224,15 @@ class TestPkgHistoryProcessor: assert res["commit-id"] == head_commit.id assert res["release-number"] == 2 + + changelog = res["changelog"] + top_entry = changelog[0] + + assert top_entry["commit-id"] == head_commit.id + for snippet in ( + "Jane Doe ", + "- Did nothing!", + ): + assert snippet in top_entry["data"] + + assert all("error" not in entry for entry in changelog)