From 67ebe16f7249a4f84bb8a53ab406d21e4bf4963a Mon Sep 17 00:00:00 2001 From: mprahl Date: Jan 11 2019 21:24:18 +0000 Subject: Read the files to be hashed as binary to ensure end of lines are not converted When encoutering a Windows end of line (^M), io.open and open in Python 3 will convert those to UNIX end of lines by default. When reading logs to compute the checksum, it's important those new lines aren't converted, to ensure the checksum is correct. This caused issues in Fedora staging because when cloning down a repo, the repoSpanner output had Windows end of lines, and this would end up in build.log. The solution is to just read it as binary so that Python doesn't perform these conversions. --- diff --git a/module_build_service/builder/KojiContentGenerator.py b/module_build_service/builder/KojiContentGenerator.py index 138e11c..fe63863 100644 --- a/module_build_service/builder/KojiContentGenerator.py +++ b/module_build_service/builder/KojiContentGenerator.py @@ -404,8 +404,8 @@ class KojiContentGenerator(object): # parse it to get the Modulemd instance. mmd_path = os.path.join(output_path, mmd_filename) try: - with open(mmd_path) as mmd_f: - data = mmd_f.read() + with open(mmd_path, 'rb') as mmd_f: + data = mmd_f.read().decode('utf-8') mmd = Modulemd.Module().new_from_string(data) ret['filename'] = mmd_filename ret['filesize'] = len(data) @@ -452,8 +452,8 @@ class KojiContentGenerator(object): try: log_path = os.path.join(output_path, "build.log") - with open(log_path) as build_log: - checksum = hashlib.md5(build_log.read().encode('utf-8')).hexdigest() + with open(log_path, 'rb') as build_log: + checksum = hashlib.md5(build_log.read()).hexdigest() stat = os.stat(log_path) ret.append( {