#2329 backend: work with multiple CDN hostnames per instance
Merged 2 years ago by praiskup. Opened 2 years ago by frostyx.
copr/ frostyx/copr hitcounter-pt-8  into  main

@@ -31,8 +31,15 @@ 

  PRODUCTION_HOSTNAME = "copr-be.aws.fedoraproject.org"

  DEVEL_HOSTNAME = "copr-be-dev.aws.fedoraproject.org"

  

- PRODUCTION_CDN_HOSTNAME = "download.copr.fedorainfracloud.org"

- DEVEL_CDN_HOSTNAME = "download.copr-dev.fedorainfracloud.org"

+ PRODUCTION_CDN_HOSTNAMES = [

+     "download.copr.fedorainfracloud.org",

+     "d1nld9ovj32u75.cloudfront.net",

+ ]

+ 

+ DEVEL_CDN_HOSTNAMES = [

+     "download.copr-dev.fedorainfracloud.org",

+     "d1p7mxc66bhrst.cloudfront.net",

+ ]

  

  

  log = logging.getLogger(__name__)
@@ -126,33 +133,33 @@ 

      return accesses

  

  

- def get_cdn_hostname(args):

+ def get_cdn_hostnames(args):

      """

      The devel and production accesses are mixed together. Which ones do we want

      to count?

      """

      # If a CDN hostname was explicitly specified when calling the script

-     if args.cdn_hostname:

-         return args.cdn_hostname

+     if args.cdn_hostnames:

+         return args.cdn_hostnames

  

      # Count hits from devel CDN hostname on devel instance

      hostname = gethostname()

      if hostname == DEVEL_HOSTNAME:

-         return DEVEL_CDN_HOSTNAME

+         return DEVEL_CDN_HOSTNAMES

  

      # Default to production hits. Don't worry, we don't accidentally

      # remove them from any other instance

-     return PRODUCTION_CDN_HOSTNAME

+     return PRODUCTION_CDN_HOSTNAMES

  

  

- def check_different_cdn_hostname(accesses, cdn_hostname):

+ def check_different_cdn_hostname(accesses, cdn_hostnames):

      """

      If a list of HTTP accesses contain any access for a different CDN hostname

      (e.g. for devel instance when the script is running on production), return

      its value. Otherwise `None`.

      """

      for access in accesses:

-         if access["x-host-header"] != cdn_hostname:

+         if access["x-host-header"] not in cdn_hostnames:

              return access["x-host-header"]

      return None

  
@@ -184,10 +191,12 @@ 

                "number of simultaneously running instances might go up"))

      parser.add_argument(

          "--cdn-hostname",

+         action="append",

+         dest="cdn_hostnames",

          help=("By default the devel instance counts only hits from devel, and "

                "the production instance from production. You can override this "

                "by explicitly specifying the CDN hostname of interest, e.g. {0}"

-               .format(PRODUCTION_CDN_HOSTNAME)))

+               .format(PRODUCTION_CDN_HOSTNAMES[0])))

      return parser

  

  
@@ -198,7 +207,7 @@ 

      parser = get_arg_parser()

      args = parser.parse_args()

      tmp = tempfile.mkdtemp(prefix="copr-aws-s3-hitcounter-")

-     cdn_hostname = get_cdn_hostname(args)

+     cdn_hostnames = get_cdn_hostnames(args)

  

      if args.verbose:

          log.setLevel(logging.DEBUG)
@@ -215,7 +224,7 @@ 

          for path in [gz, raw]:

              os.remove(path)

  

-         different_cdn = check_different_cdn_hostname(accesses, cdn_hostname)

+         different_cdn = check_different_cdn_hostname(accesses, cdn_hostnames)

          if different_cdn:

              log.debug("Skipping: %s (different hostname: %s)",

                        s3file, different_cdn)

It seems that each instance has automatically generated CDN hostname
which looks like this

d1nld9ovj32u75.cloudfront.net

But at the same time it can have an assigned hostname, e.g.

download.copr.fedorainfracloud.org

And in the S3 access logs, there are accessess for both
hostnames (even mixed within one access log)

Build succeeded.

Commit 0d6cc18 fixes this pull-request

Pull-Request has been merged by praiskup

2 years ago
Metadata