From a5065128daa218be52554bac0e9db824cf87aba1 Mon Sep 17 00:00:00 2001 From: Patrick Uiterwijk Date: Oct 01 2018 11:15:27 +0000 Subject: Implement HTTP pull/push Merges #3784 Signed-off-by: Patrick Uiterwijk --- diff --git a/doc/configuration.rst b/doc/configuration.rst index 42cc82a..eb35b98 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -1456,6 +1456,38 @@ watching commits on a project upon commits. Defaults to: ``True`` +ALLOW_HTTP_PULL_PUSH +~~~~~~~~~~~~~~~~~~~~ + +This configuration key controls whether any HTTP access to repositories is provided +via the support for that that's embedded in Pagure. +This provides HTTP pull access via /.git if nothing else +serves this URL. + +Defaults to: ``True`` + + +ALLOW_HTTP_PUSH +~~~~~~~~~~~~~~~ + +This configuration key controls whether pushing is possible via the HTTP interface. +This is disabled by default, as it requires setting up an authentication mechanism +on the webserver that sets REMOTE_USER. + +Defaults to: ``False`` + + +HTTP_REPO_ACCESS_GITOLITE +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This configuration key configures the path to the gitolite-shell binary. +If this is set to None, Git http-backend is used directly. +Only set this to ``None`` if you intend to provide HTTP push access via Pagure, and +are using a dynamic ACL backend. + +Defaults to: ``/usr/share/gitolite3/gitolite-shell`` + + RepoSpanner Options ------------- diff --git a/pagure/default_config.py b/pagure/default_config.py index b1b9b5c..05b4c86 100644 --- a/pagure/default_config.py +++ b/pagure/default_config.py @@ -480,6 +480,14 @@ REACTIONS = [ # This is used for faster indexing. Do not change. _REACTIONS_DICT = dict(REACTIONS) +# HTTP pull/push options +# Whether to allow Git HTTP proxying +ALLOW_HTTP_PULL_PUSH = True +# Whether to allow pushing via HTTP +ALLOW_HTTP_PUSH = False +# Path to Gitolite-shell if using that, None to use Git directly +HTTP_REPO_ACCESS_GITOLITE = "/usr/share/gitolite3/gitolite-shell" + # repoSpanner integration settings # Whether to create new repositories on repoSpanner by default. # Either None or a region name. diff --git a/pagure/ui/__init__.py b/pagure/ui/__init__.py index 1b791f6..8d40d07 100644 --- a/pagure/ui/__init__.py +++ b/pagure/ui/__init__.py @@ -17,6 +17,7 @@ UI_NS = flask.Blueprint("ui_ns", __name__) # Import the different controllers in the UI namespace/blueprint import pagure.config # noqa: E402 import pagure.ui.app # noqa: E402 +from pagure.ui.clone import add_clone_proxy_cmds # noqa: E402 import pagure.ui.fork # noqa: E402 import pagure.ui.groups # noqa: E402 @@ -29,6 +30,9 @@ if pagure.config.config["PAGURE_AUTH"] == "local": import pagure.ui.login # noqa: E402 +add_clone_proxy_cmds() + + @UI_NS.errorhandler(404) def not_found(error): """404 Not Found page""" diff --git a/pagure/ui/clone.py b/pagure/ui/clone.py new file mode 100644 index 0000000..b89e807 --- /dev/null +++ b/pagure/ui/clone.py @@ -0,0 +1,249 @@ +# -*- coding: utf-8 -*- + +""" + (c) 2014-2018 - Copyright Red Hat Inc + + Authors: + Patrick Uiterwijk + +""" + +from __future__ import unicode_literals + +import logging +import subprocess +import tempfile +import os + +import flask +import requests +import werkzeug + +import pagure.exceptions +import pagure.lib +import pagure.lib.git +import pagure.lib.mimetype +import pagure.lib.plugins +import pagure.lib.tasks +import pagure.forms +import pagure.ui.plugins +from pagure.config import config as pagure_config +from pagure.ui import UI_NS + +_log = logging.getLogger(__name__) + + +def proxy_raw_git(): + """ Proxy a request to Git or gitolite3 via a subprocess. + + This should get called after it is determined the requested project + is not on repoSpanner. + """ + # We are going to shell out to gitolite-shell. Prepare the env it needs. + gitenv = { + # These are the vars git-http-backend needs + "PATH_INFO": flask.request.path, + "REMOTE_USER": flask.request.remote_user, + "REMOTE_ADDR": flask.request.remote_addr, + "CONTENT_TYPE": flask.request.content_type, + "QUERY_STRING": flask.request.query_string, + "REQUEST_METHOD": flask.request.method, + "GIT_PROJECT_ROOT": pagure_config["GIT_FOLDER"], + # We perform access checks, so can bypass that of Git + "GIT_HTTP_EXPORT_ALL": "true", + # This might be needed by hooks + "PAGURE_CONFIG": os.environ.get("PAGURE_CONFIG"), + "PYTHONPATH": os.environ.get("PYTHONPATH"), + } + + gitolite = pagure_config["HTTP_REPO_ACCESS_GITOLITE"] + if gitolite: + gitenv.update( + { + # These are the additional vars gitolite needs + # Fun fact: REQUEST_URI is not even mentioned in RFC3875 + "REQUEST_URI": flask.request.full_path, + "GITOLITE_HTTP_HOME": pagure_config["GITOLITE_HOME"], + "HOME": pagure_config["GITOLITE_HOME"], + } + ) + elif flask.request.remote_user: + gitenv.update({"GL_USER": flask.request.remote_user}) + + # These keys are optional + for key in ( + "REMOTE_USER", + "REMOTE_ADDR", + "CONTENT_TYPE", + "QUERY_STRING", + "PYTHONPATH", + ): + if not gitenv[key]: + del gitenv[key] + + for key in gitenv: + if not gitenv[key]: + raise ValueError("Value for key %s unknown" % key) + + if gitolite: + cmd = [gitolite] + else: + cmd = ["/usr/bin/git", "http-backend"] + + # Note: using a temporary files to buffer the input contents + # is non-ideal, but it is a way to make sure we don't need to have + # the full input (which can be very long) in memory. + # Ideally, we'd directly stream, but that's an RFE for the future, + # since that needs to happen in other threads so as to not block. + # (See the warnings in the subprocess module) + with tempfile.SpooledTemporaryFile() as infile: + while True: + block = flask.request.stream.read(4096) + if not block: + break + infile.write(block) + infile.seek(0) + + proc = subprocess.Popen( + cmd, stdin=infile, stdout=subprocess.PIPE, stderr=None, env=gitenv + ) + + out = proc.stdout + + # First, gather the response head + headers = {} + while True: + line = out.readline() + if not line: + raise Exception("End of file while reading headers?") + # This strips the \n, meaning end-of-headers + line = line.strip() + if not line: + break + header = line.split(b": ", 1) + headers[header[0].lower()] = header[1] + + if len(headers) == 0: + raise Exception("No response at all received") + + if "status" not in headers: + # If no status provided, assume 200 OK as per RFC3875 + headers["status"] = "200 OK" + + respcode, respmsg = headers.pop("status").split(" ", 1) + wrapout = werkzeug.wsgi.wrap_file(flask.request.environ, out) + return flask.Response( + wrapout, + status=int(respcode), + headers=headers, + direct_passthrough=True, + ) + + +def proxy_repospanner(project, service): + """ Proxy a request to repoSpanner. + + Args: + project (model.Project): The project being accessed + service (String): The service as indicated by ?Service= in /info/refs + """ + oper = os.path.basename(flask.request.path) + if oper == "refs": + oper = "info/refs?service=%s" % service + regionurl, regioninfo = project.repospanner_repo_info("main") + url = "%s/%s" % (regionurl, oper) + + resp = requests.request( + flask.request.method, + url, + verify=regioninfo["ca"], + cert=(regioninfo["push_cert"]["cert"], regioninfo["push_cert"]["key"]), + data=flask.request.stream, + headers={ + "Content-Type": flask.request.content_type, + "X-Extra-Username": flask.request.remote_user, + "X-Extra-Repotype": "main", + "X-Extra-project_name": project.name, + "x-Extra-project_user": project.user if project.is_fork else "", + "X-Extra-project_namespace": project.namespace, + }, + ) + + # Strip out any headers that cause problems + for name in ("transfer-encoding",): + del resp.headers[name] + + return flask.Response( + resp.iter_content(chunk_size=128), + status=resp.status_code, + headers=dict(resp.headers), + ) + + +def clone_proxy(project, username=None, namespace=None): + """ Proxy the /info/refs endpoint for HTTP pull/push. + + Note that for the clone endpoints, it's very explicit that has been + renamed to , to avoid the automatic repo searching from flask_app. + This means that we have a chance to trust REMOTE_USER to verify the users' + access to the attempted repository. + """ + if not pagure_config["ALLOW_HTTP_PULL_PUSH"]: + flask.abort(403, "HTTP pull/push is not allowed") + + if flask.request.path.endswith("/info/refs"): + service = flask.request.args.get("service") + if not service: + # This is a Git client older than 1.6.6, and it doesn't work with + # the smart protocol. We do not support the old protocol via HTTP. + flask.abort(400, "Please switch to newer Git client") + if service not in ("git-upload-pack", "git-receive-pack"): + flask.abort(400, "Unknown service requested") + + if "git-receive-pack" in flask.request.full_path: + if not pagure_config["ALLOW_HTTP_PUSH"]: + # Pushing (git-receive-pack) over HTTP is not allowed + flask.abort(403, "HTTP pushing disabled") + if not flask.request.remote_user: + # Anonymous pushing... nope + flask.abort(403, "Unauthenticated push not allowed") + + project = pagure.lib.get_authorized_project( + flask.g.session, + project, + user=username, + namespace=namespace, + asuser=flask.request.remote_user, + ) + if not project: + flask.abort(404, "Project not found") + + if project.is_on_repospanner: + return proxy_repospanner(project, service) + else: + return proxy_raw_git() + + +def add_clone_proxy_cmds(): + """ This function adds flask routes for all possible clone paths. + + This comes down to: + /(fork//)(/)(.git) + with an operation following, where operation is one of: + - /info/refs (generic) + - /git-upload-pack (pull) + - /git-receive-pack (push) + """ + for prefix in ( + "", + "/", + "forks//", + "forks///", + ): + for suffix in ("", ".git"): + for oper in ("info/refs", "git-receive-pack", "git-upload-pack"): + route = "/%s%s/%s" % (prefix, suffix, oper) + methods = ("GET",) if oper == "info/refs" else ("POST",) + UI_NS.add_url_rule( + route, view_func=clone_proxy, methods=methods + ) diff --git a/tests/test_pagure_flask_ui_clone.py b/tests/test_pagure_flask_ui_clone.py new file mode 100644 index 0000000..095ee45 --- /dev/null +++ b/tests/test_pagure_flask_ui_clone.py @@ -0,0 +1,170 @@ +# -*- coding: utf-8 -*- + +""" + (c) 2015-2018 - Copyright Red Hat Inc + + Authors: + Patrick Uiterwijk + +""" + +from __future__ import unicode_literals + +__requires__ = ['SQLAlchemy >= 0.8'] +import pkg_resources + +import datetime +import unittest +import shutil +import sys +import tempfile +import os + +import six +import json +import pygit2 +from mock import patch, MagicMock + +sys.path.insert(0, os.path.join(os.path.dirname( + os.path.abspath(__file__)), '..')) + +import pagure.lib +import tests + + +class PagureFlaskAppClonetests(tests.Modeltests): + """ Tests for the clone bridging. """ + + def setUp(self): + super(PagureFlaskAppClonetests, self).setUp() + + tests.create_projects(self.session) + tests.create_tokens(self.session) + tests.create_tokens_acl(self.session) + self.create_project_full('clonetest', {"create_readme": "y"}) + + @patch.dict('pagure.config.config', {'ALLOW_HTTP_PULL_PUSH': False}) + def test_http_clone_disabled(self): + """ Test that the HTTP clone endpoint gets correctly closed. """ + output = self.app.get('/clonetest.git/info/refs?service=git-upload-pack') + self.assertEqual(output.status_code, 403) + self.assertIn('not allowed', output.get_data(as_text=True)) + + @patch.dict('pagure.config.config', {'ALLOW_HTTP_PULL_PUSH': True}) + def test_http_clone_invalid_service(self): + """ Test that the HTTP endpoint refuses invalid services. """ + output = self.app.get('/clonetest.git/info/refs?service=myservice') + self.assertEqual(output.status_code, 400) + self.assertIn('Unknown service', output.get_data(as_text=True)) + + @patch.dict('pagure.config.config', {'ALLOW_HTTP_PULL_PUSH': True}) + def test_http_clone_invalid_project(self): + """ Test that the HTTP endpoint refuses invalid projects. """ + output = self.app.get('/nosuchrepo.git/info/refs?service=git-upload-pack') + self.assertEqual(output.status_code, 404) + self.assertIn('Project not found', output.get_data(as_text=True)) + + @patch.dict('pagure.config.config', {'ALLOW_HTTP_PULL_PUSH': True}) + def test_http_clone_dumb(self): + """ Test that the HTTP endpoint refuses dumb service request. """ + output = self.app.get('/clonetest.git/info/refs') + self.assertEqual(output.status_code, 400) + self.assertIn('Please switch', output.get_data(as_text=True)) + + @patch.dict('pagure.config.config', { + 'ALLOW_HTTP_PULL_PUSH': True, + 'ALLOW_HTTP_PUSH': False, + 'HTTP_REPO_ACCESS_GITOLITE': None, + }) + def test_http_push_disabled(self): + """ Test that the HTTP push gets refused. """ + output = self.app.get('/clonetest.git/info/refs?service=git-receive-pack') + self.assertEqual(output.status_code, 403) + self.assertIn('pushing disabled', output.get_data(as_text=True)) + output = self.app.post('/clonetest.git/git-receive-pack') + self.assertEqual(output.status_code, 403) + self.assertIn('pushing disabled', output.get_data(as_text=True)) + + @patch.dict('pagure.config.config', { + 'ALLOW_HTTP_PULL_PUSH': True, + 'ALLOW_HTTP_PUSH': True, + 'HTTP_REPO_ACCESS_GITOLITE': None, + }) + def test_http_push_unauthed(self): + """ Test that the HTTP push gets refused unauthed. """ + output = self.app.get('/clonetest.git/info/refs?service=git-receive-pack') + self.assertEqual(output.status_code, 403) + self.assertIn('Unauthenticated push', output.get_data(as_text=True)) + + @patch.dict('pagure.config.config', {'ALLOW_HTTP_PULL_PUSH': True}) + def test_http_clone_private_project_unauthed(self): + """ Test that the HTTP endpoint enforced project.private. """ + project = pagure.lib._get_project(self.session, 'clonetest') + project.private = True + self.session.add(project) + self.session.commit() + + output = self.app.get('/clonetest.git/info/refs?service=git-upload-pack') + self.assertEqual(output.status_code, 404) + self.assertIn('Project not found', output.get_data(as_text=True)) + + @patch.dict('pagure.config.config', { + 'ALLOW_HTTP_PULL_PUSH': True, + 'ALLOW_HTTP_PUSH': False, + 'HTTP_REPO_ACCESS_GITOLITE': None, + }) + def test_http_clone(self): + """ Test that HTTP cloning gives reasonable output. """ + # Unfortunately, actually testing a git clone would need the app to + # run on a TCP port, which the test environment doesn't do. + + output = self.app.get('/clonetest.git/info/refs?service=git-upload-pack') + self.assertEqual(output.status_code, 200) + output_text = output.get_data(as_text=True) + self.assertIn("# service=git-upload-pack", output_text) + self.assertIn("symref=HEAD:refs/heads/master", output_text) + self.assertIn(" refs/heads/master\n0000", output_text) + + @patch.dict('pagure.config.config', { + 'ALLOW_HTTP_PULL_PUSH': True, + 'ALLOW_HTTP_PUSH': False, + 'HTTP_REPO_ACCESS_GITOLITE': None, + }) + def test_http_clone_private(self): + """ Test that HTTP cloning gives reasonable output with project.private. """ + # Unfortunately, actually testing a git clone would need the app to + # run on a TCP port, which the test environment doesn't do. + project = pagure.lib._get_project(self.session, 'clonetest') + project.private = True + self.session.add(project) + self.session.commit() + + output = self.app.get('/clonetest.git/info/refs?service=git-upload-pack') + self.assertEqual(output.status_code, 404) + self.assertIn('Project not found', output.get_data(as_text=True)) + + output = self.app.get( + '/clonetest.git/info/refs?service=git-upload-pack', + environ_overrides={'REMOTE_USER': 'pingou'}, + ) + self.assertEqual(output.status_code, 200) + output_text = output.get_data(as_text=True) + self.assertIn("# service=git-upload-pack", output_text) + self.assertIn("symref=HEAD:refs/heads/master", output_text) + self.assertIn(" refs/heads/master\n0000", output_text) + + @patch.dict('pagure.config.config', { + 'ALLOW_HTTP_PULL_PUSH': True, + 'ALLOW_HTTP_PUSH': True, + 'HTTP_REPO_ACCESS_GITOLITE': None, + }) + def test_http_push(self): + """ Test that the HTTP push gets accepted. """ + output = self.app.get( + '/clonetest.git/info/refs?service=git-receive-pack', + environ_overrides={'REMOTE_USER': 'pingou'}, + ) + self.assertEqual(output.status_code, 200) + output_text = output.get_data(as_text=True) + self.assertIn("# service=git-receive-pack", output_text) + self.assertIn(" refs/heads/master\x00", output_text) diff --git a/tests/test_pagure_repospanner.py b/tests/test_pagure_repospanner.py index c3b6b9d..10241ab 100644 --- a/tests/test_pagure_repospanner.py +++ b/tests/test_pagure_repospanner.py @@ -305,6 +305,59 @@ class PagureRepoSpannerTestsNewRepoDefault(PagureRepoSpannerTests): repodirlist = os.listdir(os.path.join(self.path, 'repos')) self.assertEqual(repodirlist, ['pseudo']) + @patch.dict('pagure.config.config', { + 'ALLOW_HTTP_PULL_PUSH': True, + 'ALLOW_HTTP_PUSH': True, + 'HTTP_REPO_ACCESS_GITOLITE': False, + }) + def test_http_pull(self): + """ Test that the HTTP pull endpoint works for repoSpanner. """ + tests.create_projects(self.session) + tests.create_tokens(self.session) + tests.create_tokens_acl(self.session) + self.create_project_full('clonetest', {"create_readme": "y"}) + + # Verify the new project is indeed on repoSpanner + project = pagure.lib._get_project(self.session, 'clonetest') + self.assertTrue(project.is_on_repospanner) + + # Unfortunately, actually testing a git clone would need the app to + # run on a TCP port, which the test environment doesn't do. + output = self.app.get('/clonetest.git/info/refs?service=git-upload-pack') + self.assertEqual(output.status_code, 200) + output_text = output.get_data(as_text=True) + self.assertIn("# service=git-upload-pack", output_text) + self.assertIn("symref=HEAD:refs/heads/master", output_text) + self.assertIn(" refs/heads/master\x00", output_text) + + @patch.dict('pagure.config.config', { + 'ALLOW_HTTP_PULL_PUSH': True, + 'ALLOW_HTTP_PUSH': True, + 'HTTP_REPO_ACCESS_GITOLITE': False, + }) + def test_http_push(self): + """ Test that the HTTP push endpoint works for repoSpanner. """ + tests.create_projects(self.session) + tests.create_tokens(self.session) + tests.create_tokens_acl(self.session) + self.create_project_full('clonetest', {"create_readme": "y"}) + + # Verify the new project is indeed on repoSpanner + project = pagure.lib._get_project(self.session, 'clonetest') + self.assertTrue(project.is_on_repospanner) + + # Unfortunately, actually testing a git clone would need the app to + # run on a TCP port, which the test environment doesn't do. + output = self.app.get( + '/clonetest.git/info/refs?service=git-upload-pack', + environ_overrides={'REMOTE_USER': 'pingou'}, + ) + self.assertEqual(output.status_code, 200) + output_text = output.get_data(as_text=True) + self.assertIn("# service=git-upload-pack", output_text) + self.assertIn("symref=HEAD:refs/heads/master", output_text) + self.assertIn(" refs/heads/master\x00", output_text) + @patch('pagure.ui.app.admin_session_timedout') def test_hooks(self, ast): """ Test hook setting and running works. """