#198 WIP: allow to set repo priority (see #97)
Merged 6 years ago by clime. Opened 6 years ago by frostyx.
copr/ frostyx/copr repository-priority  into  master

@@ -5,7 +5,7 @@ 

  import string

  

  from six import with_metaclass

- from six.moves.urllib.parse import urljoin, urlparse

+ from six.moves.urllib.parse import urljoin, urlparse, parse_qs

  from textwrap import dedent

  import re

  
@@ -571,6 +571,27 @@ 

      return repo_url

  

  

+ def parse_repo_params(repo, supported_keys=None):

+     """

+     :param repo: str repo from Copr/CoprChroot/Build/...

+     :param supported_keys list of supported optional parameters

+     :return: dict of optional parameters parsed from the repo URL

+     """

+     supported_keys = supported_keys or ["priority"]

+     if not repo.startswith("copr://"):

+         return {}

+ 

+     params = {}

+     qs = parse_qs(urlparse(repo).query)

+     for k, v in qs.items():

+         if k in supported_keys:

+             # parse_qs returns values as lists, but we allow setting the param only once,

+             # so we can take just first value from it

+             value = int(v[0]) if v[0].isnumeric() else v[0]

+             params[k] = value

+     return params

+ 

+ 

  def generate_build_config(copr, chroot_id):

      """ Return dict with proper build config contents """

      chroot = None
@@ -595,20 +616,21 @@ 

              "name": "Copr buildroot",

          })

  

-     for repo in copr.repos_list:

-         repo_view = {

-             "id": generate_repo_name(repo),

-             "url": pre_process_repo_url(chroot_id, repo),

-             "name": "Additional repo " + generate_repo_name(repo),

-         }

-         repos.append(repo_view)

-     for repo in chroot.repos_list:

-         repo_view = {

-             "id": generate_repo_name(repo),

-             "url": pre_process_repo_url(chroot_id, repo),

-             "name": "Additional repo " + generate_repo_name(repo),

-         }

-         repos.append(repo_view)

+     def get_additional_repo_views(repos_list):

+         repos = []

+         for repo in repos_list:

+             params = parse_repo_params(repo)

+             repo_view = {

+                 "id": generate_repo_name(repo),

+                 "url": pre_process_repo_url(chroot_id, repo),

+                 "name": "Additional repo " + generate_repo_name(repo),

+             }

+             repo_view.update(params)

+             repos.append(repo_view)

+         return repos

+ 

+     repos.extend(get_additional_repo_views(copr.repos_list))

+     repos.extend(get_additional_repo_views(chroot.repos_list))

  

      return {

          'project_id': copr.repo_id,

@@ -1,4 +1,5 @@ 

  from copy import deepcopy

+ from flask import Flask, current_app

  import six

  

  if six.PY3:
@@ -8,7 +9,7 @@ 

  

  from coprs import app

  from coprs.helpers import parse_package_name, generate_repo_url, \

-     fix_protocol_for_frontend, fix_protocol_for_backend

+     fix_protocol_for_frontend, fix_protocol_for_backend, pre_process_repo_url, parse_repo_params

  

  from tests.coprs_test_case import CoprsTestCase

  
@@ -121,3 +122,33 @@ 

              app.config["ENFORCE_PROTOCOL_FOR_FRONTEND_URL"] = orig

              raise e

          app.config["ENFORCE_PROTOCOL_FOR_BACKEND_URL"] = orig

+ 

+     def test_pre_process_repo_url(self):

+         app = Flask(__name__)

+         app.config["BACKEND_BASE_URL"] = "http://backend"

+ 

+         test_cases = [

+             ("http://example1.com/foo/$chroot/", "http://example1.com/foo/fedora-rawhide-x86_64/"),

+             ("copr://someuser/someproject", "http://backend/results/someuser/someproject/fedora-rawhide-x86_64/"),

+             ("copr://someuser/someproject?foo=bar&baz=10",

+              "http://backend/results/someuser/someproject/fedora-rawhide-x86_64/"),

+         ]

+         with app.app_context():

+             for url, exp in test_cases:

+                 assert pre_process_repo_url("fedora-rawhide-x86_64", url) == exp

+ 

+     def test_parse_repo_params(self):

+         test_cases = [

+             ("copr://foo/bar", {}),

+             ("copr://foo/bar?priority=10", {"priority": 10}),

+             ("copr://foo/bar?priority=10&unexp1=baz&unexp2=qux", {"priority": 10}),

+             ("http://example1.com/foo?priority=10", {}),

+         ]

+         for repo, exp in test_cases:

+             assert parse_repo_params(repo) == exp

+ 

+     def test_parse_repo_params_pass_keys(self):

+         url = "copr://foo/bar?param1=foo&param2=bar&param3=baz&param4=qux"

+         supported = ["param1", "param2", "param4"]

+         expected = {"param1": "foo", "param2": "bar", "param4": "qux"}

+         assert parse_repo_params(url, supported_keys=supported) == expected

file modified
+3
@@ -34,6 +34,9 @@ 

  metadata_expire=0

  cost=1

  best=1

+ {%- if "priority" in repo %}

+ priority={{ repo["priority"] }}

+ {%- endif %}

  {% endfor %}

  """

  {% endif %}

The code is not finished yet, I am putting it here for discussion purpose.

As I understood from #97, it is not required for the described use case to allow a user to specify the integer value of the priority. Therefore, I would prefer to select if the priority should be lower or higher than normal. I think that it is far more user-friendly than describing the integer range and how it is evaluated. Do you agree?

Also, I want to discuss, how to put the priority value into the repofile. See the project overview, ... We provide one repofile per name_release but we want to set priority per chroot ... that means that e.g. I can set low priority for fedora-rawhide-i386 and high priority for fedora-rawhide-x86_64 ... and which one of those should be in the Fedora Rawhide repo?

The only thing that comes to my mind is having the [modified] tag in i386 [modified] (0)*, x86_64 [modified] (0)* as a link to an arch-specific repofile. I just don't like that in such case the currently linked repo will not set the priority and it would be something unexpected by a regular user.

What do you think? Do you have a better idea how to solve it?

Uh, okay. I went through the RFEs from Igor and the issue is actually about something else.

Mainly, Igor just wants for a particular build or package to prioritize COPR repos when building. That means repos that are generated into 'additional_repos' variable automatically should have different than default priority if desired.

We actually don't need to fiddle with priorities of the generated repo files in the project homepage. That's a different issue and frankly user can do it himself when he needs to. I cannot imagine a use-case when project owner would need to do it for users (and anyway we don't really have an issue filed for it).

In the end, it means we only need to toggle (set) priority in what is generated around here: https://pagure.io/copr/copr/blob/master/f/frontend/coprs_frontend/coprs/helpers.py#_571.

I would extend sytanx for our copr:// protocol. Allowing setting prio as GET param. Meaning the instruction below the 'Additional repos' field would be:

To reference another Copr with changed priority, use:
copr://user/project?priority=x

This would add some copr to additional_repos with priority x.

Of course, if user wants to lower priority of the project where he/she is building, he/she would need to reference the project itself. In additional repos it would be then mentioned twice (once added implicitly and the second time explicitly with the modified priority). We can also just generate the explicitly added repo and throw away the implicitly added in those cases.

...That's the least intrusive option to implement this.

Oh, I must have totally misunderstood the issue, sorry. I am glad that I submitted it to preliminary review before burning a lot of time on it.

I understand it now, thank you for clarification.

@clime wrote:

To reference another Copr with changed priority, use:
copr://user/project?priority=x

Looks nice, but I would much rather see a generic approach -- so users could specify priorities for any dependant repository.

Of course, if user wants to lower priority of the project where he/she is building, he/she would need to reference the project itself.

Good idea..

In additional repos it would be then mentioned twice

.. or we could simply drop the first (implicit) occurrence of that repo.

@clime wrote:
To reference another Copr with changed priority, use:
copr://user/project?priority=x

Agreed that this is the simplest option.

Looks nice, but I would much rather see a generic approach -- so users could specify priorities for any dependant repository.

I would like it too. Do you find an idea of possibility to specify repos as json dicts to be bad?

Since the {} symbols are not allowed in the URL, we would be easily able to parse it. Examples of repos fields:

  • Repos: http://example1.com/foo/ http://example2.com/bar/
  • Repos: {"url": "http://example1.com/foo", "priority": 1}
  • Repos: http://example1.com/foo/ {"url": "http://example2.com/bar", "priority": 1}

I think that for users that really want this feature, it is easy-to-use, implementation would be fairly simple and easy to extend to support more settings if we want. However, it could be a bit confusing for beginners and users that don't need such feature, so I suggest not promoting it in the UI (tooltips/placeholder) and just describe the possibility in the documentation.

What do you think? Do you prefer @clime's original idea, this, or another generic approach?

I would like it too. Do you find an idea of possibility to specify repos as json dicts to be bad?

Json is IMO bad, -1. Yaml would be fine to me ... but none of them are OK since it would break the cli commandline options, where we have --repo A, --repo B, etc.

Since the {} symbols are not allowed in the URL, we would be easily able to parse it. Examples of repos fields:

IMO it is not worth the hack, because we would have to add a new command-line option for different format (and then we could use PyYAML without parsing hacks). Of course, the --repo would have to be mapped into yaml and the existing fields in DB would have to be preprocessed by migration script...

Also | is a good separator, so one could think of the_url|prio|otherparam<space>... format.

Yaml would be fine to me

+1, so basically the repo fields could look like I showed, but keys and values would not have to be in " ". That's much better.

but none of them are OK since [...] we would have to add a new command-line option for different format [...] and the existing fields in DB would have to be preprocessed by migration script...

Maybe I didn't think this through, but I don't agree. I think that we could do it, but don't necessarily have to. Consider this usage of CLI

copr-cli ... --repo httpd://example1.com/foo --repo "{url: httpd://example2.com/bar, priority: 10}"

My idea is not to force everything to be yaml, but rather allow a user to specify some repo as yaml if he wants to (as we expect that usage of this feature will be quite rare).

I think that it should be stored in the database the same way that it is now - so every existing repos field will be still valid and newly some yaml could appear in it.

I would parse the yaml when we want to use it, so basically when preparing the queue for backend. It could be easily done by transforming both yaml repos and url-only repos to dicts (or objects), so we can uniformly work with them.

I hope that I've explained it correctly.
What do you think?

I would just try to satisfy the RFE. It's not like we couldn't add something more robust later if anyone actually asks for it. You are over-complicating this heavily.

I mean, the implementation of what I am proposing will be probably same few lines as your original idea, just the syntax is slightly more complicated in exchange for having it working with generic repos, not just copr repos.

I think that it's worth it and will eventually save us from some troubles of migrating from one syntax to another or supporting both. That's why I wanted to discuss it.

I would just try to satisfy the RFE. It's not like we couldn't add something more robust later if anyone actually asks for it. You are over-complicating this heavily.

This is just discussion and fixing other urls (not just copr://) is worth at least the talk :-).

--repo "{url: httpd://example2.com/bar, priority: 10}"

That's possible, OK. Clearly json is better than yaml here.

The wiki style [URL|description] could work too like [url|priority=10] or even
easier url|priority=10 since we use space as the separator.

If we can use the pipe character safely, I vote for the url|priority=10. It is really easy to write/read, it is inspired by wiki syntax, so some people are already familiar with it and it will work also for non-copr URLs.

From implementation perspective, there is no difference for this, for what I was suggesting or for ?priority=x, ... so I think that we don't need to consider this aspect and just find the best syntax.

From implementation perspective, there is no difference for this, for what I was suggesting or for ?priority=x, ... so I think that we don't need to consider this aspect and just find the best syntax.

There is pretty big difference. copr://user/project?priority=10is a valid url so you can use urlparse to parse the whole thing as it is done now in helpers.py:pre_process_repo_url. ?priority=x is actually adding nothing new whereas url|priority=10 is adding some totally random syntax. If you need special syntax like that, you should already split the additional repo element into two fields: repo_url and priority but that's really not something we want to cover the corner case where user needs to fiddle with priorities of the additional repos. Could you, please, stop adding some random syntax?

What's the point in being that negative? Urlparse has no problem with the symbol in question, and in any way - contributor needs to adjust the pre_process_repo_url method. The priority should be "parsed-out" before handing over the url to urlparse... It doesn't invalidate pros/cons of so far mentioned proposals.

Well, I don't really care enough to fight over this, @clime. If you think about it, no-one is trying to persuade you here, ... I had an idea that I felt it could be a viable alternative to the one that you already proposed. @praiskup actually offered another solution that in the end, I voted for. That doesn't mean that I am going to do it this way no matter what, ...

If you still prefer the original ?priority=x solution, I have no problem implementing it, but there should always be a space for discussing alternatives.

Well, I don't really care enough to fight over this, @clime. If you think about it, no-one is trying to persuade you here, ... I had an idea that I felt it could be a viable alternative to the one that you already proposed. @praiskup actually offered another solution that in the end, I voted for. That doesn't mean that I am going to do it this way no matter what, ...
If you still prefer the original ?priority=x solution, I have no problem implementing it, but there should always be a space for discussing alternatives.

The thing is that all the solutions proposed here including my solution, suck, frankly. ?priority=x is not good but it at least satisfies the RFE and it doesn't do anything else than that. But yes, I would still go for it unless we want to allow users to upload their own yum/dnf confs that would be appended to default ones. That's probably how the generic solution should be approached but I am not sure we are at a point that we need that.

There are some pros/cons mentioned in this thread (e.g. that ?priority=x won't ever work
for other urls, it would bring ambiguity; == so no space for future RFE due to laziness).
There were no cons for | except that it is some "random" syntax, dunno. Being useless
here in this PR, I'm stepping down..

rebased onto 8b2d678488074ea5742a18d8add002215f7bde35

6 years ago

The refactoring commit is probably a thing of opinion so that change is not required. I like it tough, so I am suggesting it.

I don't like that something called parse is changing the original input: the repo url in this case. The idea that only "supported_keys" are taken out and the rest is kept also doesn't seem be very intuitive.

This is showing to be problematic to implement. We call urlparse on two places (in pre_process_repo_url and in parse_repo_params) and in parse_repo_params there is also some unparsing?

Also, there would also needs to be an update in copr-rpmbuild's mock.cfg to actually generate the priority field if present into the generated mock configuration.

The refactoring part for sure is good but that's only the refactoring.

My idea was to make a function generate_repo_record(repo_url) that would take a repo url, parse it and in case of copr:// protocol did the transformation to https:// and also set additional_params dict to the url query params (but only for the copr:// protocol). Then it would return the dict with 'name', 'id', 'url' + additional_params.

But again some change needs to be done also on copr-rpmbuild's side to render those additional params (currently, only the 'name', 'id', 'url' are renderred). So it's not that easy change to do now. Especially given the fact that the repo to be added is partially generated on frontend (id/name) and partially in copr-rpmbuild. The logic should be moved to one place solely and either, frontend should pass only the URL to copr-rpmbuild and copr-rpmbuild should do all the rest or the frontend should generate the whole finished repo configuration.

I would suggest putting this PR to sleep, implement things where we are more certain of and come back to https://pagure.io/copr/copr/issue/97 later. I thought we would be able to implement #97 with just a small touch into the code but I don't see that option now. Sorry for making you spending time on this.

I don't like that something called parse is changing the original input: the repo url in this case.

Strictly speaking, the method is not changing the original input. Try the following test

def test_parse_repo_params_non_destructive(self):
    repo = "copr://foo/bar?priority=10&unexp1=baz&unexp2=qux"
    parse_repo_params(repo)
    assert repo == "copr://foo/bar?priority=10&unexp1=baz&unexp2=qux"

The idea that only "supported_keys" are taken out and the rest is kept also doesn't seem be very intuitive.
and in parse_repo_params there is also some unparsing?

It could be done without unparsing and returning back the URL stripped of the supported params (priority) if we really just focus on copr:// repos. In that case the parse_repo_params would return just the dict of supported params and let pre_process_repo_url to transform the URL to https:// without that parameters. We would still be calling urlparse on two places, but I like it much better than having one big function. This way, we can have nice unit tests.

Also, there would also needs to be an update in copr-rpmbuild's mock.cfg to actually generate the priority field if present into the generated mock configuration.

Sure, I totally forgot on that part, ty for reminding it.

I would suggest putting this PR to sleep, implement things where we are more certain of and come back to #97 later.

I am okay with this suggestion. Let's figure out the mock config generation first.

rebased onto c9a6b64

6 years ago

There probably isn't any real blocker for finishing this RFE. I gave it another shot and simplified the code based on some of your suggestions (the main thing is just parsing the supported params in the parse_repo_params and leaving the concern about URL to pre_process_repo_url) and added the priority line generation to the mock config. Please see the last two commits. What do you think about it?

I think that it wouldn't interfere the future mock config generation unification, so maybe we can have this RFE solved and rework the mock config generation later?

You should no longer need urlunparse.

1 new commit added

  • [frontend] urlunparse is no longer needed
6 years ago

There probably isn't any real blocker for finishing this RFE. I gave it another shot and simplified the code based on some of your suggestions (the main thing is just parsing the supported params in the parse_repo_params and leaving the concern about URL to pre_process_repo_url) and added the priority line generation to the mock config. Please see the last two commits. What do you think about it?

Both commits are ok.

I think that it wouldn't interfere the future mock config generation unification, so maybe we can have this RFE solved and rework the mock config generation later?

The problem was not in some "mock config generation unification". The thing to consider here was whether or not we want the copr-rpmbuild tool to have support for -a switch (to add additional repositories) and if that switch should accept 'copr://' protocol url addresses (meaning that we would need to add copr_backend_url configuration option into copr-rpmbuild config). In that case the whole repo generation logic would be moved to copr-rpmbuild and frontend would only provide the additional urls for copr-rpmbuild. But okay, even that doesn't really matter here.

The important thing is to realize how this PR changes meaning of copr:// protocol. Before it was just a simple "substitution" - it was https:// written in a compact form. After this PR, it will be a repo config generator - more precisely, communication protocol dedicated to communicate with a single resource only, which is a COPR repo config generator. Not sure how practical this sounds but it basically means it should be possible to write things like:

copr://dev/null/?baseurl=http://download.fedoraproject.org/pub/fedora/linux/releases/$releasever/Everything/$basearch/os/

and even:

copr://?baseurl=http://download.fedoraproject.org/pub/fedora/linux/releases/$releasever/Everything/$basearch/os/

so copr:// protocol will now be a way to generate repos. And actually "https://example.org/somerepo.repo" will become a short form to: copr://?baseurl=https://example.org/somerepo.repo.

So if you are ok with this shift of understanding to the copr protocol (I am quite ok with), then I am ok with this PR.

it should be possible to write things like:

In the meaning of - we can have it in the future if we want to. But I believe that this PR doesn't allow such things, I can write tests so we can be sure. Function pre_process_repo_url works as it used to, there were no changes to it. We just add optional params to the repo JSON, but only the supported ones (currently just priority).

it should be possible to write things like:

In the meaning of - we can have it in the future if we want to. But I believe that this PR doesn't allow such things, I can write tests so we can be sure.

Actually this is something we would like to allow eventually (or even quite soon). Even though copr://? looks weird, it makes sense. The current implementation is half-way to it. So ok, I guess we can merge this.

Pull-Request has been merged by clime

6 years ago

Actually this is something we would like to allow eventually (or even quite soon). Even though copr://? looks weird, it makes sense. The current implementation is half-way to it. So ok, I guess we can merge this.

Thanks for merging. In this moment I have no objections to this, but I think that the described feature should be a topic of another PR, so I am glad that we were able to finish this one :-)