#527 Override default User-Agent header for downloading sources
Opened 10 months ago by a-zhn. Modified 10 months ago
a-zhn/FedoraReview master  into  master

@@ -52,8 +52,9 @@ 

          self.log.debug(header + ": " + str(cmd))

          if isinstance(cmd, str) and not shell:

              cmd = cmd.split(" ")

-         proc = Popen(cmd, stdout=PIPE, stderr=PIPE,

-                      universal_newlines=True, shell=shell)

+         proc = Popen(

+             cmd, stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=shell

+         )

  

          output, error = "", "undefined"

          try:
@@ -85,8 +86,10 @@ 

              import socket

  

              socket.setdefaulttimeout(30)

- 

-             istream = urllib.request.urlopen(url)

+             req = urllib.request.Request(

+                 url=url, headers={"User-Agent": "fedora-review"}

+             )

+             istream = urllib.request.urlopen(req)

              if istream.getcode() and istream.getcode() != 200:

                  raise DownloadError(istream.getcode(), url)

              with open(path, "wb") as ostream:

file modified
+1
@@ -1,2 +1,3 @@ 

  """ Required to make python import from top directory """

+ 

  pass

While working on a package at https://bugzilla.redhat.com/show_bug.cgi?id=2334903 I encountered download failures using fedora-review. Which produced warnings for the fedora-review output and resulted in being unable to check the checksums for upstream sources matched what was bundled in the srpm.
This ended up being because cloudflare blocks requests with User-Agent: Python-urllib/3.13 and the upstream archives were hosted using cloudflare R2.

A small example of this is.

import urllib.request

try:
    url = "https://release.files.ghostty.org/1.1.2/ghostty-1.1.2.tar.gz"
    headers = {}
    # headers = {"User-Agent": "fedora-review"}
    req = urllib.request.Request(
        url=url, headers=headers
    )
    x = urllib.request.urlopen(req)
    print("Success")
except urllib.error.HTTPError as e:
    print("Failed to fetch")
    print(e.hdrs)

This can also be tested on the https://www.cloudflare.com url
and reproduced with curl using the urllib User-Agent

curl -vvv -A "Python-urllib/3.13" http://www.cloudflare.com

A solution is to override this header with a different User-Agent header when making the request.

I ran the black ,make pycodestyle and make flake8 commands as described in CONTRIBUTE.