From 2e297b09740df9ef031c91e8c2e7dbede91f63b5 Mon Sep 17 00:00:00 2001 From: Pierre-Yves Chibon Date: Jan 16 2017 16:18:36 +0000 Subject: Make updating git repos a locked process This way we should prevent two processes/requests updating the git repo at the same time and potentially running into each other which may lead to a broken history in the repo where object are pushed incorrectly. --- diff --git a/pagure/lib/git.py b/pagure/lib/git.py index a27e083..f460416 100644 --- a/pagure/lib/git.py +++ b/pagure/lib/git.py @@ -24,6 +24,7 @@ import subprocess import tempfile import arrow +import filelock import pygit2 import werkzeug @@ -169,80 +170,87 @@ def update_git(obj, repo, repofolder): # Get the fork repopath = os.path.join(repofolder, repo.path) + lockfile = '%s.lock' % repopath + + lock = filelock.FileLock(lockfile) + with lock: + + # Clone the repo into a temp folder + newpath = tempfile.mkdtemp(prefix='pagure-') + new_repo = pygit2.clone_repository(repopath, newpath) + + file_path = os.path.join(newpath, obj.uid) + + # Get the current index + index = new_repo.index + + # Are we adding files + added = False + if not os.path.exists(file_path): + added = True + + # Write down what changed + with open(file_path, 'w') as stream: + stream.write(json.dumps( + obj.to_json(), sort_keys=True, indent=4, + separators=(',', ': '))) + + # Retrieve the list of files that changed + diff = new_repo.diff() + files = [] + for patch in diff: + if hasattr(patch, 'new_file_path'): + files.append(patch.new_file_path) + elif hasattr(patch, 'delta'): + files.append(patch.delta.new_file.path) + + # Add the changes to the index + if added: + index.add(obj.uid) + for filename in files: + index.add(filename) + + # If not change, return + if not files and not added: + shutil.rmtree(newpath) + os.unlink(lockfile) + return - # Clone the repo into a temp folder - newpath = tempfile.mkdtemp(prefix='pagure-') - new_repo = pygit2.clone_repository(repopath, newpath) - - file_path = os.path.join(newpath, obj.uid) - - # Get the current index - index = new_repo.index - - # Are we adding files - added = False - if not os.path.exists(file_path): - added = True - - # Write down what changed - with open(file_path, 'w') as stream: - stream.write(json.dumps( - obj.to_json(), sort_keys=True, indent=4, - separators=(',', ': '))) - - # Retrieve the list of files that changed - diff = new_repo.diff() - files = [] - for patch in diff: - if hasattr(patch, 'new_file_path'): - files.append(patch.new_file_path) - elif hasattr(patch, 'delta'): - files.append(patch.delta.new_file.path) - - # Add the changes to the index - if added: - index.add(obj.uid) - for filename in files: - index.add(filename) - - # If not change, return - if not files and not added: - shutil.rmtree(newpath) - return - - # See if there is a parent to this commit - parent = None - try: - parent = new_repo.head.get_object().oid - except pygit2.GitError: - pass - - parents = [] - if parent: - parents.append(parent) - - # Author/commiter will always be this one - author = pygit2.Signature(name='pagure', email='pagure') - - # Actually commit - new_repo.create_commit( - 'refs/heads/master', - author, - author, - 'Updated %s %s: %s' % (obj.isa, obj.uid, obj.title), - new_repo.index.write_tree(), - parents) - index.write() - - # Push to origin - ori_remote = new_repo.remotes[0] - master_ref = new_repo.lookup_reference('HEAD').resolve() - refname = '%s:%s' % (master_ref.name, master_ref.name) + # See if there is a parent to this commit + parent = None + try: + parent = new_repo.head.get_object().oid + except pygit2.GitError: + pass + + parents = [] + if parent: + parents.append(parent) + + # Author/commiter will always be this one + author = pygit2.Signature(name='pagure', email='pagure') + + # Actually commit + new_repo.create_commit( + 'refs/heads/master', + author, + author, + 'Updated %s %s: %s' % (obj.isa, obj.uid, obj.title), + new_repo.index.write_tree(), + parents) + index.write() + + # Push to origin + ori_remote = new_repo.remotes[0] + master_ref = new_repo.lookup_reference('HEAD').resolve() + refname = '%s:%s' % (master_ref.name, master_ref.name) - PagureRepo.push(ori_remote, refname) + PagureRepo.push(ori_remote, refname) # Remove the clone shutil.rmtree(newpath) + # Remove the lock file + os.unlink(lockfile) def clean_git(obj, repo, repofolder): @@ -835,84 +843,93 @@ def update_file_in_git( # Get the fork repopath = pagure.get_repo_path(repo) - # Clone the repo into a temp folder - newpath = tempfile.mkdtemp(prefix='pagure-') - new_repo = pygit2.clone_repository( - repopath, newpath, checkout_branch=branch) + lockfile = '%s.lock' % repopath - file_path = os.path.join(newpath, filename) + lock = filelock.FileLock(lockfile) + with lock: - # Get the current index - index = new_repo.index + # Clone the repo into a temp folder + newpath = tempfile.mkdtemp(prefix='pagure-') + new_repo = pygit2.clone_repository( + repopath, newpath, checkout_branch=branch) - # Write down what changed - with open(file_path, 'w') as stream: - stream.write(content.replace('\r', '').encode('utf-8')) - - # Retrieve the list of files that changed - diff = new_repo.diff() - files = [] - for patch in diff: - if hasattr(patch, 'new_file_path'): - files.append(patch.new_file_path) - elif hasattr(patch, 'delta'): - files.append(patch.delta.new_file.path) - - # Add the changes to the index - added = False - for filename in files: - added = True - index.add(filename) - - # If not change, return - if not files and not added: - shutil.rmtree(newpath) - return + file_path = os.path.join(newpath, filename) - # See if there is a parent to this commit - branch_ref = get_branch_ref(new_repo, branch) - parent = branch_ref.get_object() + # Get the current index + index = new_repo.index - # See if we need to create the branch - nbranch_ref = None - if branchto not in new_repo.listall_branches(): - nbranch_ref = new_repo.create_branch(branchto, parent) + # Write down what changed + with open(file_path, 'w') as stream: + stream.write(content.replace('\r', '').encode('utf-8')) - parents = [] - if parent: - parents.append(parent.hex) + # Retrieve the list of files that changed + diff = new_repo.diff() + files = [] + for patch in diff: + if hasattr(patch, 'new_file_path'): + files.append(patch.new_file_path) + elif hasattr(patch, 'delta'): + files.append(patch.delta.new_file.path) - # Author/commiter will always be this one - author = pygit2.Signature( - name=user.username.encode('utf-8'), - email=email.encode('utf-8') - ) + # Add the changes to the index + added = False + for filename in files: + added = True + index.add(filename) - # Actually commit - new_repo.create_commit( - nbranch_ref.name if nbranch_ref else branch_ref.name, - author, - author, - message.strip(), - new_repo.index.write_tree(), - parents) - index.write() + # If not change, return + if not files and not added: + shutil.rmtree(newpath) + os.unlink(lockfile) + return + + # See if there is a parent to this commit + branch_ref = get_branch_ref(new_repo, branch) + parent = branch_ref.get_object() + + # See if we need to create the branch + nbranch_ref = None + if branchto not in new_repo.listall_branches(): + nbranch_ref = new_repo.create_branch(branchto, parent) + + parents = [] + if parent: + parents.append(parent.hex) + + # Author/commiter will always be this one + author = pygit2.Signature( + name=user.username.encode('utf-8'), + email=email.encode('utf-8') + ) - # Push to origin - ori_remote = new_repo.remotes[0] - refname = '%s:refs/heads/%s' % ( - nbranch_ref.name if nbranch_ref else branch_ref.name, - branchto) + # Actually commit + new_repo.create_commit( + nbranch_ref.name if nbranch_ref else branch_ref.name, + author, + author, + message.strip(), + new_repo.index.write_tree(), + parents) + index.write() + + # Push to origin + ori_remote = new_repo.remotes[0] + refname = '%s:refs/heads/%s' % ( + nbranch_ref.name if nbranch_ref else branch_ref.name, + branchto) - try: - PagureRepo.push(ori_remote, refname) - except pygit2.GitError as err: # pragma: no cover - shutil.rmtree(newpath) - raise pagure.exceptions.PagureException( - 'Commit could not be done: %s' % err) + try: + PagureRepo.push(ori_remote, refname) + except pygit2.GitError as err: # pragma: no cover + os.unlink(lockfile) + shutil.rmtree(newpath) + raise pagure.exceptions.PagureException( + 'Commit could not be done: %s' % err) # Remove the clone shutil.rmtree(newpath) + # Remove the lock file + os.unlink(lockfile) return os.path.join('files', filename)