#482 (new-)sources should fail with git tracked files
Merged 4 years ago by onosek. Opened 4 years ago by onosek.
onosek/rpkg stage_check  into  master

file modified
+13 -1
@@ -46,7 +46,7 @@ 

                             rpkgAuthError, rpkgError)

  from pyrpkg.lookaside import CGILookasideCache

  from pyrpkg.sources import SourcesFile

- from pyrpkg.utils import cached_property, find_me, log_result

+ from pyrpkg.utils import cached_property, find_me, is_file_tracked, log_result

  

  from .gitignore import GitIgnore

  
@@ -2065,6 +2065,11 @@ 

  

          for entry in sourcesf.entries:

              outfile = os.path.join(outdir, entry.file)

+             if is_file_tracked(outfile, outdir):

+                 raise rpkgError(

+                     "Error: Attempting a download '{0}' that would override a git tracked file. "

+                     "Either remove the corresponding line from 'sources' file to keep the git "

+                     "tracked one or 'git rm' the file to allow the download.".format(outfile))

              self.lookasidecache.download(

                  self.ns_repo_name if self.lookaside_namespaced else self.repo_name,

                  entry.file, entry.hash, outfile,
@@ -2842,6 +2847,13 @@ 

                  raise rpkgError(msg)

  

              gitignore.add('/%s' % file_basename)

+             if is_file_tracked(f, self.path):  # need full file path

+                 raise rpkgError(

+                     "Error: Attempting to upload a git tracked file '{0}'. Only upload files not "

+                     "tracked by git. You can use 'git rm --cached' to untrack a file from git.\n"

+                     "Hint: Use git for text files like the spec file, patches or helper scripts. "

+                     "Use the lookaside cache for binary blobs, usually upstream source "

+                     "tarballs.".format(f))

              self.lookasidecache.upload(

                  self.ns_repo_name if self.lookaside_namespaced else self.repo_name,

                  f, file_hash, offline=offline)

file modified
+82
@@ -17,6 +17,7 @@ 

  import os

  import sys

  

+ import git

  import six

  

  if six.PY3:
@@ -156,3 +157,84 @@ 

      if 'handler_reference' not in globals():

          handler_reference = koji_watch_tasks_handler

      return handler_reference

+ 

+ 

+ def is_file_tracked(file_path, repo_path):

+     """

+     Finds out whether input file is currently tracked in the Git repository

+ 

+     :param file_path: path to a file that should be checked (relative or absolute)

+     :type file_path: str

+     :param repo_path: path to Git repository (relative or absolute)

+     :type repo_path: str

+     :return: is file staged in git repo

+     :rtype: bool

+     """

+     if not file_path:

+         raise ValueError("empty file path")

+     if not repo_path:

+         raise ValueError("empty repo path")

+ 

+     # file can be external and file like this is not tracked

+     relative_file_path = is_file_in_directory(file_path, repo_path)

+     if not relative_file_path:

+         return False

+ 

+     # create a repo object from our path

+     try:

+         repo = git.Repo(repo_path)

+     except (git.InvalidGitRepositoryError, git.NoSuchPathError):

+         raise RuntimeError("%s is not a valid repo" % repo_path)

+ 

+     if relative_file_path in repo.untracked_files:

+         return False

+ 

+     # search entries for the file name

+     for entry in repo.index.entries.keys():

+         (entry_file_name, _) = entry

+         if entry_file_name == relative_file_path:

+             return True

+ 

+     return False

+ 

+ 

+ def is_file_in_directory(file_path, dir_path):

+     """

+     Compares two different paths - file and dictionary.

+     Method doesn't check whether files exist.

+     :param file_path: relative or absolute path to the file

+     :type file_path: str

+     :param file_path: relative or absolute path to the directory

+     :type file_path: str

+     :return: file path relative to the dictionary if the file is inside

+         of the directory otherwise None

+     :rtype: str or None

+     """

+     try:

+         real_file_path = os.path.realpath(file_path)

+         real_dir_path = os.path.realpath(dir_path)

+     except TypeError:

+         print(

+             "Wrong value of the file name(s): ({0}, {1})".format(

+                 file_path, dir_path

+             ),

+             file=sys.stderr

+         )

+         raise

+ 

+     # file is definitely outside of the repository

+     if len(real_dir_path) > len(real_file_path):

+         return

+ 

+     # this case is not defined

+     if real_file_path == real_dir_path:

+         raise ValueError("Wrong input, paths are the same.")

+ 

+     # paths have common prefix that equals to dir path -> file is inside the directory

+     # NOTE: there is more suitable method 'os.path.commonpath' since Python 3.5.

+     # It returns valid path.

+     if os.path.commonprefix((real_file_path, real_dir_path)) == real_dir_path:

+         # what is the file name relative to the directory

+         # (length of filename is safely longer than length of directory)

+         return real_file_path[len(real_dir_path):].strip("/")

+     return

file modified
+5 -3
@@ -1279,6 +1279,7 @@ 

              with patch('pyrpkg.lookaside.CGILookasideCache.upload', new=self.lookasidecache_upload):

                  cli.upload()

  

+         # git tracked files are not allowed to be uploaded to lookaside cache

          readme_rst = os.path.join(self.cloned_repo_path, 'README.rst')

          self.make_changes(filename=readme_rst, content='# dockpkg', commit=True)

  
@@ -1286,13 +1287,14 @@ 

          with patch('sys.argv', new=cli_cmd):

              cli = self.new_cli()

              with patch('pyrpkg.lookaside.CGILookasideCache.upload', new=self.lookasidecache_upload):

-                 cli.upload()

+                 six.assertRaisesRegex(

+                     self, rpkgError,

+                     r'/README.rst.+upload files not tracked by git',

+                     cli.upload)

  

          expected_sources_content = [

              '{0}  {1}'.format(self.hash_file(self.readme_patch),

                                os.path.basename(self.readme_patch)),

-             '{0}  {1}'.format(self.hash_file(readme_rst),

-                               os.path.basename(readme_rst)),

              ]

          self.assertEqual(expected_sources_content,

                           self.read_file(self.sources_file).strip().split('\n'))

file modified
+93 -1
@@ -1,9 +1,18 @@ 

+ import os

+ import tempfile

  import unittest

  import warnings

  

  import mock

  

- from pyrpkg.utils import cached_property, log_result, warn_deprecated

+ from pyrpkg.utils import (

+     cached_property,

+     is_file_in_directory,

+     is_file_tracked,

+     log_result,

+     warn_deprecated,

+ )

+ from utils import CommandTestCase

  

  

  class CachedPropertyTestCase(unittest.TestCase):
@@ -196,3 +205,86 @@ 

          ]

          log_result(self.log_func, obj)

          self.assertEqual(self.logs, expected)

+ 

+ 

+ class FileInDirectoryTestCase(unittest.TestCase):

+     def test_is_file_in_directory(self):

+         expected = (

+             # file, directory, expected result

+             ("/repo/fedpkg/sources", "/repo/fedpkg", "sources"),

+             ("/repo/fedpkg/subdir/sources", "/repo/fedpkg", "subdir/sources"),

+             ("/repo/fedpkg/sources/", "/repo/fedpkg", "sources"),

+             ("/repo/fedpkg/sources", "/repo/fedpkg/", "sources"),

+             ("/repo/file", "/", "repo/file"),

+             ("/file", "/", "file"),

+             ("sources", ".", "sources"),

+             ("data/file", ".", "data/file"),

+             ("dir/myrepo/sources", "dir/myrepo", "sources"),

+ 

+             ("/sources", "/repo/fedpkg", None),

+             ("/", "bad_dir_name", None),

+             ("./", "./a", None),

+             ("./b", "./a/", None),

+             ("sources", "./dir", None),

+             ("f", "./dir", None),

+             ("file", "dir/myrepo", None),

+             ("a/bbb", "/a/b", None),

+             ("a/b", "/a/bbb", None),

+         )

+         for num, case in enumerate(expected):

+             (file_path, dir_path, expected_result) = case

+             self.assertEqual(

+                 is_file_in_directory(file_path, dir_path),

+                 expected_result,

+                 "The case num {0} has failed".format(num),

+             )

+ 

+ 

+ class FileTrackedTestCase(CommandTestCase):

+     def test_file_outside_of_repo(self):

+         # check file outside of the test repo

+         self.assertFalse(

+             is_file_tracked("external_filename", self.repo_path)

+         )

+ 

+     def test_actual_file_in_repo(self):

+         # check actual file from the test repo

+         self.assertTrue(

+             is_file_tracked(

+                 os.path.join(self.repo_path, "sources"),

+                 self.repo_path

+             )

+         )

+ 

+     def test_newly_created_file_in_repo(self):

+         # check newly created file in the test repo

+         temp_file = tempfile.NamedTemporaryFile(dir=self.repo_path)

+         self.assertFalse(

+             is_file_tracked(temp_file.name, self.repo_path)

+         )

+         temp_file.close()

+ 

+     def test_newly_added_file_to_stage(self):

+         # check newly added file to stage in the test repo

+         temp_file = tempfile.NamedTemporaryFile(dir=self.repo_path)

+         temp_file_basename = os.path.basename(temp_file.name)

+         self.run_cmd(["git", "add", temp_file_basename], cwd=self.repo_path)

+         self.assertTrue(

+             is_file_tracked(temp_file.name, self.repo_path)

+         )

+         temp_file.close()

+ 

+     def test_not_existing_file(self):

+         # check not existing file

+         self.assertFalse(

+             is_file_tracked("/file_doesnt_exist", self.repo_path)

+         )

+ 

+     def test_not_existing_file_should_belong_to_repo(self):

+         # check not existing file that should belong to the test repo

+         self.assertFalse(

+             is_file_tracked(

+                 os.path.join(self.repo_path, "file_doesnt_exist"),

+                 self.repo_path

+             )

+         )

Implements additional protection (except .gitignore warning) that
prevents overwrite staged files when x-pkg sources is run. And forbids
uploading staged files that may not belong to lookaside cache.

Fixes: https://pagure.io/fedpkg/issue/241
JIRA: COMPOSE-2689

Signed-off-by: Ondrej Nosek onosek@redhat.com

I am gonna add some test before merge.

Is there some reason to not support files in subdirectories? It's not a big deal, since those should not conflict with anything in lookaside, I'm just curious.

This is a great feature!

Currently it will only catch situation when the file is modified and git add was called on it. I think it should also check if the file is generally known to git.

Thanks @lsedlar, I considered your comments. I will look at all (including subdirectories and external) files whether they are tracked in Git repository or not. The new method should also recognize Git tracked files better.

rebased onto cb7789ae2a9d541b231bec16fea4ca79491a19a1

4 years ago

rebased onto 8216735a0945177e1f4e3bccfb1631a45028da3b

4 years ago

Should this be an exception or at least go to stderr?

Maybe use assertTrue and assertFalse?

How about splitting this into one case per method to make sure all problems are detected? Right now if first check fails, nothing else will be done.

A couple minor comments, but the patch looks good to me. :thumbsup:

rebased onto 8f9dbd8

4 years ago

I modified the code as per your comments.

Pull-Request has been merged by onosek

4 years ago