#318 [extra-files] Write a metadata file enumerating extra files
Closed 7 years ago by lsedlar. Opened 7 years ago by jcline.
jcline/pungi extra-files-metadata  into  master

file modified
+1
@@ -13,3 +13,4 @@ 

  Joel Andres Granados <jgranado at redhat dot com>

  <proski at fedoraproject dot org>

  Mark McLoughlin <markmc at redhat dot com>

+ Jeremy Cline <jcline at redhat dot com>

file modified
+29 -1
@@ -714,6 +714,34 @@ 

      ]

  

  

+ Extra Files Metadata

+ --------------------

+ If extra files are specified a metadata file, ``extra_files.json``, is placed

+ in the os/ directory and media. This metadata file is in the format:

+ 

+ ::

+ 

+     {

+       "header": {"version": "1.0},

+       "data": [

+         {

+           "file": "GPL",

+           "checksums": {

+             "sha256": "8177f97513213526df2cf6184d8ff986c675afb514d4e68a404010521b880643"

+           },

+           "size": 18092

+         },

+         {

+           "file": "release-notes/notes.html",

+           "checksums": {

+             "sha256": "82b1ba8db522aadf101dca6404235fba179e559b95ea24ff39ee1e5d9a53bdcb"

+           },

+           "size": 1120

+         }

+       ]

+     }

+ 

+ 

  Productimg Settings

  ===================

  Product images are placed on installation media and provide additional branding
@@ -1235,7 +1263,7 @@ 

      This feature becomes useful when you need to transform compose location

      into e.g. a HTTP repo which is can be passed to ``koji image-build``.

      The ``path`` part is normalized via ``os.path.normpath()``.

-     

+ 

  

  Example config

  --------------

file modified
+45
@@ -17,6 +17,7 @@ 

  

  import os

  import time

+ import json

  

  import productmd.composeinfo

  import productmd.treeinfo
@@ -25,6 +26,7 @@ 

  

  from pungi.compose_metadata.discinfo import write_discinfo as create_discinfo

  from pungi.compose_metadata.discinfo import write_media_repo as create_media_repo

+ from pungi import util as pungi_util

  

  

  def get_description(compose, variant, arch):
@@ -317,3 +319,46 @@ 

      path = os.path.join(compose.paths.compose.os_tree(arch=arch, variant=variant), ".treeinfo")

      compose.log_info("Writing treeinfo: %s" % path)

      ti.dump(path)

+ 

+ 

+ def write_extra_files(tree_path, files, checksum_type='sha256', logger=None):

+     """

+     Write the metadata for all extra files added to the compose.

+ 

+     :param tree_path:

+         Root of the tree to write the ``extra_files.json`` metadata file for.

+ 

+     :param files:

+         A list of files that should be included in the metadata file. These

+         should be paths that are relative to ``tree_path``.

+ 

+     :return:

+         Path to the metadata file written.

+     """

+     metadata_path = os.path.join(tree_path, 'extra_files.json')

+     if logger:

+         logger.log_info('Calculating content of {metadata}'.format(metadata=metadata_path))

+     metadata = {'header': {'version': '1.0'}, 'data': []}

+     for f in files:

+         if logger:

+             logger.log_debug('Processing {file}'.format(file=f))

+         path = os.path.join(tree_path, f)

+         checksum = pungi_util._doCheckSum(path, checksum_type, logger)

+         # _doCheckSum returns in the format <type>:<digest> _or_ False for failure

+         if checksum is False:

+             err = 'Failed to calculate the checksum for {file}.'.format(file=path)

+             raise RuntimeError(err)

+         checksum = checksum.split(':')[1]

+         entry = {

+             'file': f,

+             'checksums': {checksum_type: checksum},

+             'size': os.path.getsize(path),

+         }

+         metadata['data'].append(entry)

+ 

+     if logger:

+         logger.log_info('Writing {metadata}'.format(metadata=metadata_path))

+ 

+     with open(metadata_path, 'w') as fd:

+         json.dump(metadata, fd, sort_keys=True, indent=4, separators=(',', ': '))

+     return metadata_path

file modified
+6 -5
@@ -21,6 +21,7 @@ 

  

  from pungi.util import get_arch_variant_data, pkg_is_rpm, copy_all

  from pungi.arch import split_name_arch

+ from pungi import metadata

  from pungi.wrappers.scm import get_file_from_scm, get_dir_from_scm

  from pungi.phases.base import ConfigGuardedPhase

  
@@ -53,7 +54,7 @@ 

                                            % (arch, variant.uid))

  

  

- def copy_extra_files(compose, cfg, arch, variant, package_sets):

+ def copy_extra_files(compose, cfg, arch, variant, package_sets, checksum_type='sha256'):

      var_dict = {

          "arch": arch,

          "variant_id": variant.id,
@@ -84,12 +85,12 @@ 

              scm_dict["repo"] = rpms

  

          getter = get_file_from_scm if 'file' in scm_dict else get_dir_from_scm

-         getter(scm_dict,

-                os.path.join(extra_files_dir, scm_dict.get('target', '').lstrip('/')),

-                logger=compose._logger)

+         target_path = os.path.join(extra_files_dir, scm_dict.get('target', '').lstrip('/'))

+         getter(scm_dict, target_path, logger=compose._logger)

  

      if os.listdir(extra_files_dir):

-         copy_all(extra_files_dir, os_tree)

+         files_copied = copy_all(extra_files_dir, os_tree)

+         metadata.write_extra_files(os_tree, files_copied, checksum_type, compose._logger)

  

      compose.log_info("[DONE ] %s" % msg)

  

file modified
+38 -1
@@ -509,7 +509,24 @@ 

  

  

  def copy_all(src, dest):

-     """This function is equivalent to running `cp src/* dest`."""

+     """

+     Copy all files and directories within ``src`` to the ``dest`` directory.

+ 

+     This is equivalent to running ``cp -r src/* dest``.

+ 

+     :param src:

+         Source directory to copy from.

+ 

+     :param dest:

+         Destination directory to copy to.

+ 

+     :return:

+         A list of relative paths to the files copied.

+ 

+     Example:

+         >>> _copy_all('/tmp/src/', '/tmp/dest/')

+         ['file1', 'dir1/file2', 'dir1/subdir/file3']

+     """

      contents = os.listdir(src)

      if not contents:

          raise RuntimeError('Source directory %s is empty.' % src)
@@ -521,3 +538,23 @@ 

              shutil.copytree(source, destination)

          else:

              shutil.copy2(source, destination)

+ 

+     return recursive_file_list(src)

+ 

+ 

+ def recursive_file_list(directory):

+     """Return a list of files contained in ``directory``.

+ 

+     The files are paths relative to ``directory``

+ 

+     :param directory:

+         Path to the directory to list.

+ 

+     Example:

+         >>> recursive_file_list('/some/dir')

+         ['file1', 'subdir/file2']

+     """

+     file_list = []

+     for root, dirs, files in os.walk(directory):

+         file_list += [os.path.relpath(os.path.join(root, f), directory) for f in files]

+     return file_list

file modified
+64 -2
@@ -212,6 +212,36 @@ 

  

  

  def get_file_from_scm(scm_dict, target_path, logger=None):

+     """

+     Copy one or more files from source control to a target path. A list of files

+     created in ``target_path`` is returned.

+ 

+     :param scm_dict:

+         A dictionary describing the source control repository; this can

+         optionally be a path to a directory on the local filesystem or reference

+         an RPM. Supported keys for the dictionary are ``scm``, ``repo``,

+         ``file``, and ``branch``. ``scm`` is the type of version control system

+         used ('git', 'cvs', 'rpm', etc.), ``repo`` is the URL of the repository

+         (or, if 'rpm' is the ``scm``, the package name), ``file`` is either a

+         path or list of paths to copy, and ``branch`` is the branch to check

+         out, if any.

+ 

+     :param target_path:

+         The destination path for the files being copied.

+ 

+     :param logger:

+         The logger to use for any logging performed.

+ 

+     Example:

+         >>> scm_dict = {

+         >>>     'scm': 'git',

+         >>>     'repo': 'https://pagure.io/pungi.git',

+         >>>     'file': ['share/variants.dtd'],

+         >>> }

+         >>> target_path = '/tmp/path/'

+         >>> get_file_from_scm(scm_dict, target_path)

+         ['/tmp/path/share/variants.dtd']

+     """

      if isinstance(scm_dict, str):

          scm_type = "file"

          scm_repo = None
@@ -225,14 +255,45 @@ 

  

      scm = _get_wrapper(scm_type, logger=logger)

  

+     files_copied = []

      for i in force_list(scm_file):

          tmp_dir = tempfile.mkdtemp(prefix="scm_checkout_")

          scm.export_file(scm_repo, i, scm_branch=scm_branch, target_dir=tmp_dir)

-         copy_all(tmp_dir, target_path)

+         files_copied += copy_all(tmp_dir, target_path)

          shutil.rmtree(tmp_dir)

+     return files_copied

  

  

  def get_dir_from_scm(scm_dict, target_path, logger=None):

+     """

+     Copy a directory from source control to a target path. A list of files

+     created in ``target_path`` is returned.

+ 

+     :param scm_dict:

+         A dictionary describing the source control repository; this can

+         optionally be a path to a directory on the local filesystem or reference

+         an RPM. Supported keys for the dictionary are ``scm``, ``repo``,

+         ``dir``, and ``branch``. ``scm`` is the type of version control system

+         used ('git', 'cvs', 'rpm', etc.), ``repo`` is the URL of the repository

+         (or, if 'rpm' is the ``scm``, the package name), ``dir`` is the

+         directory to copy, and ``branch`` is the branch to check out, if any.

+ 

+     :param target_path:

+         The destination path for the directory being copied.

+ 

+     :param logger:

+         The logger to use for any logging performed.

+ 

+     Example:

+         >>> scm_dict = {

+         >>>     'scm': 'git',

+         >>>     'repo': 'https://pagure.io/pungi.git',

+         >>>     'dir': 'share,

+         >>> }

+         >>> target_path = '/tmp/path/'

+         >>> get_dir_from_scm(scm_dict, target_path)

+         ['/tmp/path/share/variants.dtd', '/tmp/path/share/rawhide-fedora.ks', ...]

+     """

      if isinstance(scm_dict, str):

          scm_type = "file"

          scm_repo = None
@@ -248,5 +309,6 @@ 

  

      tmp_dir = tempfile.mkdtemp(prefix="scm_checkout_")

      scm.export_dir(scm_repo, scm_dir, scm_branch=scm_branch, target_dir=tmp_dir)

-     copy_all(tmp_dir, target_path)

+     files_copied = copy_all(tmp_dir, target_path)

      shutil.rmtree(tmp_dir)

+     return files_copied

@@ -81,7 +81,6 @@ 

          compose = helpers.DummyCompose(self.topdir, {})

          cfg = {'scm': 'file', 'dir': os.path.join(self.topdir, 'src'),

                 'repo': None, 'target': 'subdir'}

- 

          extra_files.copy_extra_files(compose, [cfg], 'x86_64',

                                       compose.variants['Server'], mock.Mock())

  
@@ -147,6 +146,7 @@ 

      def fake_get_file(self, scm_dict, dest, logger):

          self.scm_dict = scm_dict

          helpers.touch(os.path.join(dest, scm_dict['file']))

+         return [scm_dict['file']]

  

  

  if __name__ == "__main__":

file modified
+54 -1
@@ -1,6 +1,6 @@ 

  #!/usr/bin/env python2

  # -*- coding: utf-8 -*-

- 

+ import json

  import mock

  import unittest

  import os
@@ -158,5 +158,58 @@ 

          self.assertFalse(os.path.isfile(self.path))

  

  

+ class TestWriteExtraFiles(helpers.PungiTestCase):

+ 

+     def setUp(self):

+         super(TestWriteExtraFiles, self).setUp()

+         self.compose = helpers.DummyCompose(self.topdir, {})

+ 

+     def test_write_extra_files(self):

+         """Assert metadata is written to the proper location with valid data"""

+         mock_logger = mock.Mock()

+         files = ['file1', 'file2', 'subdir/file3']

+         expected_metadata = {

+             u'header': {u'version': u'1.0'},

+             u'data': [

+                 {

+                     u'file': u'file1',

+                     u'checksums': {u'sha256': u'ecdc5536f73bdae8816f0ea40726ef5e9b810d914493075903bb90623d97b1d8'},

+                     u'size': 6,

+                 },

+                 {

+                     u'file': u'file2',

+                     u'checksums': {u'sha256': u'67ee5478eaadb034ba59944eb977797b49ca6aa8d3574587f36ebcbeeb65f70e'},

+                     u'size': 6,

+                 },

+                 {

+                     u'file': u'subdir/file3',

+                     u'checksums': {u'sha256': u'52f9f0e467e33da811330cad085fdb4eaa7abcb9ebfe6001e0f5910da678be51'},

+                     u'size': 13,

+                 },

+             ]

+         }

+         tree_dir = os.path.join(self.topdir, 'compose', 'Server', 'x86_64', 'os')

+         for f in files:

+             helpers.touch(os.path.join(tree_dir, f), f + '\n')

+ 

+         metadata_file = metadata.write_extra_files(tree_dir, files, logger=mock_logger)

+         with open(metadata_file) as metadata_fd:

+             actual_metadata = json.load(metadata_fd)

+ 

+         self.assertEqual(expected_metadata['header'], actual_metadata['header'])

+         self.assertEqual(expected_metadata['data'], actual_metadata['data'])

+ 

+     def test_write_extra_files_missing_file(self):

+         """Assert metadata is written to the proper location with valid data"""

+         mock_logger = mock.Mock()

+         files = ['file1', 'file2', 'subdir/file3']

+         tree_dir = os.path.join(self.topdir, 'compose', 'Server', 'x86_64', 'os')

+         for f in files:

+             helpers.touch(os.path.join(tree_dir, f), f + '\n')

+         files.append('missing_file')

+ 

+         self.assertRaises(RuntimeError, metadata.write_extra_files, tree_dir, files, 'sha256', mock_logger)

+ 

+ 

  if __name__ == "__main__":

      unittest.main()

file modified
+27
@@ -363,5 +363,32 @@ 

          ])

  

  

+ class TestRecursiveFileList(unittest.TestCase):

+ 

+     def setUp(self):

+         self.tmp_dir = tempfile.mkdtemp()

+ 

+     def tearDown(self):

+         shutil.rmtree(self.tmp_dir)

+ 

+     def test_flat_file_list(self):

+         """Build a directory containing files and assert they are listed."""

+         expected_files = sorted(['file1', 'file2', 'file3'])

+         for expected_file in [os.path.join(self.tmp_dir, f) for f in expected_files]:

+             touch(expected_file)

+ 

+         actual_files = sorted(util.recursive_file_list(self.tmp_dir))

+         self.assertEqual(expected_files, actual_files)

+ 

+     def test_nested_file_list(self):

+         """Build a directory containing files and assert they are listed."""

+         expected_files = sorted(['file1', 'subdir/file2', 'sub/subdir/file3'])

+         for expected_file in [os.path.join(self.tmp_dir, f) for f in expected_files]:

+             touch(expected_file)

+ 

+         actual_files = sorted(util.recursive_file_list(self.tmp_dir))

+         self.assertEqual(expected_files, actual_files)

+ 

+ 

  if __name__ == "__main__":

      unittest.main()

The latest commit message describes the metadata format. This PR currently pulls in commits from lsedlar/test-metadata as I based my work off that branch. That branch is not yet merged, but once it is I can adjust this PR as necessary.

All commits need to be signed off, this series breaks building, please look at the jenkins results.

@ausil I think the failing tests are not a big deal at this point. I asked Jeremy to open this PR even if it is not complete yet. Do you have any opinion on the metadata it creates? Description is in last commit message.

It will have no effect on Fedora, as extra_files are not used there.

We need to have the documentation updated to reflect the changes and configuration options added here.

rebased

7 years ago

Cool. I have two questions. Firstly, should there be a configuration option to skip writing the metadata? Secondly, when something goes wrong (file missing/unreadable/etc) what should happen? I see some other phases raising RuntimeError, but I'm not sure what's appropriate in this case.

I'd say no to the config: additional metadata should not hurt anyone. The second problem is more interesting: if a file that should be there is missing or unreadable I think that constitutes a major problem and the compose should be aborted. Raising any kind of exception is good enough. It will be caught by global handler and a traceback will be generated.

rebased

7 years ago

Okay, I've updated it to raise an exception if the file is missing/unreadable. All the unit tests pass for me as well.

rebased

7 years ago

@ralph, it is now. I wasn't aware of the purpose behind Signed-off-by so thanks for the link!

rebased

7 years ago

Hey, any chance of moving forward with this?

@jcline From my point of view this is pretty much ready to go. I rebased it on master so it can be merged and opened it as #384.

Pull-Request has been closed by lsedlar

7 years ago