#967 use correct fileinfo checksum field
Merged 4 years ago by mikem. Opened 4 years ago by tkopecek.
tkopecek/koji issue966  into  master

file modified
+40
@@ -21,6 +21,7 @@ 

  from __future__ import absolute_import

  import calendar

  import datetime

+ import hashlib

  from koji.xmlrpcplus import DateTime

  from fnmatch import fnmatch

  import koji
@@ -31,6 +32,7 @@ 

  import resource

  import shutil

  import stat

+ import struct

  import sys

  import time

  import six.moves.configparser
@@ -494,6 +496,44 @@ 

      digest_algo = koji.RPM_FILEDIGESTALGO_IDS.get(digest_algo_id, 'unknown')

      return digest_algo.lower()

  

+ 

+ def check_sigmd5(filename):

+     """Compare header's sigmd5 with actual md5 of hdr+payload without need of rpm"""

+     with open(filename, 'rb') as f:

+         leadsize = 96

+         # skip magic + reserved

+         o = leadsize + 8

+         f.seek(o)

+         data = f.read(8)

+         indexcount, storesize = struct.unpack('!II', data)

+         sig_o = 0

+         for idx in range(indexcount):

+             data = f.read(16)

+             tag, data_type, offset, count = struct.unpack('!IIII', data)

+             if tag == 1004: # SIGMD5

+                 assert(data_type == 7) # binary data

+                 assert(count == 16)   # 16 bytes of md5

+                 break

+         # seek to location of md5

+         f.seek(o + 8 + indexcount * 16 + offset)

+         sigmd5 = f.read(16)

+ 

+         # seek to start of header

+         sigsize = 8 + 16 * indexcount + storesize

+         o += sigsize + (8 - (sigsize % 8)) % 8

+         f.seek(o)

+ 

+         # compute md5 of rest of file

+         md5 = hashlib.md5()

+         while True:

+             d = f.read(1024**2)

+             if not d:

+                 break

+             md5.update(d)

+ 

+         return sigmd5 == md5.digest()

+ 

+ 

  def parseStatus(rv, prefix):

      if isinstance(prefix, list) or isinstance(prefix, tuple):

          prefix = ' '.join(prefix)

file modified
+27 -18
@@ -297,28 +297,37 @@ 

          """Set the buildroot object to expired on the hub."""

          self.server.expireBuildroot(self.buildroot_id)

  

-     def fetchFile(self, basedir, buildinfo, fileinfo, type):

+     def fetchFile(self, basedir, buildinfo, fileinfo, brtype):

          """Download the file from buildreq, at filepath, into the basedir"""

          destpath = os.path.join(basedir, fileinfo['localpath'])

          ensuredir(os.path.dirname(destpath))

-         destfile = open(destpath, 'w')

-         offset = 0

-         checksum = hashlib.md5()

-         while True:

-             encoded = self.server.getFile(buildinfo, fileinfo, encode_int(offset), 1048576, type)

-             if not encoded:

-                 break

-             data = base64.b64decode(encoded)

-             del encoded

-             destfile.write(data)

-             offset += len(data)

-             checksum.update(data)

-         destfile.close()

-         digest = checksum.hexdigest()

+         if 'checksum_type' in fileinfo:

+             if fileinfo['checksum_type'] == 'sha1':

+                 checksum = hashlib.sha1()

+             elif fileinfo['checksum_type'] == 'sha256':

+                 checksum = hashlib.sha256()

+             elif fileinfo['checksum_type'] == 'md5':

+                 checksum = hashlib.md5()

+             else:

+                 raise BuildError('Unknown checksum type %s for %f' % (

+                         fileinfo['checksum_type'],

+                         os.path.basename(fileinfo['localpath'])))

+         with open(destpath, 'w') as destfile:

+             offset = 0

+             while True:

+                 encoded = self.server.getFile(buildinfo, fileinfo, encode_int(offset), 1048576, brtype)

+                 if not encoded:

+                     break

+                 data = base64.b64decode(encoded)

+                 del encoded

+                 destfile.write(data)

+                 offset += len(data)

+                 if 'checksum_type' in fileinfo:

+                     checksum.update(data)

          # rpms don't have a md5sum in the fileinfo, but check it for everything else

-         if ('md5sum' in fileinfo) and (digest != fileinfo['md5sum']):

-             raise BuildError('md5 checksum validation failed for %s, %s (computed) != %s (provided)' % \

-                   (destpath, digest, fileinfo['md5sum']))

+         if 'checksum' in fileinfo and fileinfo['checksum'] != checksum.hexdigest():

+             raise BuildError('checksum validation failed for %s, %s (computed) != %s (provided)' % \

+                   (destpath, checksum.hexdigest(), fileinfo['checksum']))

          self.logger.info('Retrieved %s (%s bytes, md5: %s)', destpath, offset, digest)

  

      def fetchBuildReqs(self):

file modified
+33 -19
@@ -25,6 +25,7 @@ 

  from koji.daemon import SCM, TaskManager

  from koji.tasks import ServerExit, ServerRestart, BaseTaskHandler, MultiPlatformTask

  from koji.tasks import RestartTask, RestartVerifyTask

+ import hashlib

  import sys

  import logging

  import os
@@ -678,17 +679,29 @@ 

                  raise koji.BuildError('unsupported file type: %s' % type)

              koji.ensuredir(os.path.dirname(localpath))

              with closing(requests.get(remote_url, stream=True)) as response:

-                 f = open(localpath, 'wb')

-                 length = response.headers.get('content-length')

-                 if length is None:

-                     f.write(response.content)

-                 else:

-                     for chunk in response.iter_content(chunk_size=65536):

-                         f.write(chunk)

-                 f.close()

- 

+                 with open(localpath, 'wb') as f:

+                     length = response.headers.get('content-length')

+                     if length is None:

+                         f.write(response.content)

+                     else:

+                         for chunk in response.iter_content(chunk_size=65536):

+                             f.write(chunk)

+             if type == 'rpm':

+                 # rpm, check sigmd5. It is enough, as if content is broken,

+                 # rpm will fail later

+                 hdr = koji.get_rpm_header(localpath)

+                 payloadhash = koji.hex_string(hdr[rpm.RPMTAG_SIGMD5])

+                 if fileinfo['payloadhash'] != payloadhash:

+                     raise koji.BuildError("Downloaded rpm %s doesn't match checksum (expected: %s, got %s)" % (

+                         os.path.basename(fileinfo['localpath']),

+                         fileinfo['payloadhash'], payloadhash))

+                 if not koji.util.check_sigmd5(localpath):

+                     raise koji.BuildError("Downloaded rpm %s doesn't match sigmd5" % \

+                         os.path.basename(fileinfo['localpath']))

+             else:

+                 self.verifyChecksum(localpath, fileinfo['checksum'], koji.CHECKSUM_TYPES[fileinfo['checksum_type']], localpath)

  

-         return file(localpath, 'r')

+         return open(localpath, 'r')

  

      def getFile(self, buildinfo, archiveinfo, offset, length, type):

          """
@@ -749,19 +762,20 @@ 

              raise koji.BuildError('%s does not exist' % local_path)

  

          if algo == 'sha1':

-             sum = koji.util.sha1_constructor()

+             sum = hashlib.sha1()

          elif algo == 'md5':

-             sum = koji.util.md5_constructor()

+             sum = hashlib.md5()

+         elif algo == 'sha256':

+             sum == hashlib.sha256()

          else:

              raise koji.BuildError('unsupported checksum algorithm: %s' % algo)

  

-         fobj = file(local_path, 'r')

-         while True:

-             data = fobj.read(1048576)

-             if not data:

-                 break

-             sum.update(data)

-         fobj.close()

+         with file(local_path, 'r') as f:

+             while True:

+                 data = f.read(1048576)

+                 if not data:

+                     break

+                 sum.update(data)

          if sum.hexdigest() == checksum:

              return True

          else:

This code should more explicitly distinguish between the rpm case and the archive case, rather than just relying on the checksum field.

We should also need to check the checksum_type field. Even if we don't want to support types besides md5 here, we should be checking the value.

The comment about rpms seems to need an update. It says, "but check it for everything else", yet we don't seem to check much of anything for rpms here.

Lack of checks for rpms is a blind spot. We do have the sigmd5 value, but we can't rely on having rpmlib inside the windows vm. I'm wondering if we could do that check in kojivmd instead (in localCache()), outside the vm. Granted, maybe this could be a separate PR.

rebased onto 3d8b036

4 years ago

no true - rpm needn't be used at all

This bit has me wondering about a few things. I guess this is needed if the server doesn't return a content length? Is this really the right thing? What if the server uses chunked transfer encoding? Worth an explanatory comment at least.

this isn't actually checking the embedded md5sum. koji.get_rpm_header just pulls out the data, it sets the rpms flags to disable signature and digest checks. If we want to verify here, we need to so something different.

no true - rpm needn't be used at all

I'm not sure I see what you're getting at with this comment. Could you elaborate?

1 new commit added

  • check_sigmd5 for kojivmd
4 years ago
  1. Content-Length - It is coming from older code. I've created separate issue #982
  2. checking md5sum and my comment are about same thing. I've thought, that in all builds will be rpm binary used in some time, so I just checked that header is in line with db. In runtime rpm would fail on wrong sigmd5 vs header. But many builds just put rpm file in zip or similar things and never touch it, so corrupted file wouldn't be detected. I've added new commit which really checks the content.

2 new commits added

  • check_sigmd5 for kojivmd
  • use correct fileinfo checksum field
4 years ago

+def check_sigmd5(filename):

It's certainly not too complicated to do this, but I'm a little concerned about adding even more rpm parsing directly into Koji. The rpm team keep suggesting that future changes may break code like this.

Also, I'm not sure why this is needed. You're using it kojivmd, which has rpmlib available to it.

@mikem Do you know about some way how to do it with rpmlib? I'm not able to find any verify method for that. I need to read binary header and payload and I don't think I'm able to extract it. Reading hedaer in TransactionSet simply doesn't raise any error regardless of setVSFlags

I've also wanted to limit rpm usages due to #953.

Looking at this again.

https://github.com/mikem23/koji-playground/commits/pagure/pr/967

Added a unit test, fixed some small issues, and rebased

Commit 6cce316 fixes this pull-request

Pull-Request has been merged by mikem

4 years ago