From db39f090a648bfd21fdebd3caca19092a57719f0 Mon Sep 17 00:00:00 2001 From: Robert Mayr Date: Dec 23 2016 23:04:02 +0000 Subject: setup a test for getting two week Cloud Base AMIs - just for one set of AMI IDs --- diff --git a/alt.fedoraproject.org/build/atomic_vars.py b/alt.fedoraproject.org/build/atomic_vars.py new file mode 100755 index 0000000..7c9f677 --- /dev/null +++ b/alt.fedoraproject.org/build/atomic_vars.py @@ -0,0 +1,237 @@ +#!/usr/bin/python +""" Return the results of the atomic release engine from datagrepper. + +For the Two-Week Atomic Change (F23) + +Deps: $ sudo dnf install python-requests + +Author: Ralph Bean +License: LGPLv2+ +""" + +from __future__ import print_function + +import collections +import functools +import json +import logging +import os +import sys +import socket + +from datetime import datetime, timedelta + +import dateutil.relativedelta +import dateutil.tz +import dogpile.cache +import requests + +log = logging.getLogger("atomic_vars") + +try: + sys.path.append('../build.d') + + import globalvar +except ImportError: + log.error("Unable to import globalvar") + sys.exit(1) + +base_url = 'https://apps.fedoraproject.org/datagrepper/raw' +topic = "org.fedoraproject.prod.releng.atomic.twoweek.complete" + +UTC = dateutil.tz.tzutc() + +session = requests.session() + +cache = dogpile.cache.make_region().configure( + "dogpile.cache.dbm", + # 'make clean' does not remove this cache, but we let the values expire + # once every this many seconds (once a day) + expiration_time=86400, + arguments={ + "filename": os.path.join(os.getcwd(), 'build/atomic.cache') + }, +) + +# Are we running in fedora-infra or on someone's laptop? +hostname = socket.gethostname() +if '.phx2.fedoraproject.org' in hostname: + DL_URL_PREFIX = 'http://dl.phx2.fedoraproject.org' +else: + DL_URL_PREFIX = 'https://dl.fedoraproject.org' + +download_fpo = 'https://download.fedoraproject.org' + + +def get_page(page, pages): + """ Retrieve the JSON for a particular page of datagrepper results """ + log.debug("Getting page %i of %s", page, pages) + params = dict( + start=1441402109, # the timestamp of when we first started doing this + topic=topic, + page=page, + rows_per_page=1, + ) + response = session.get(base_url, params=params) + if not bool(response): + raise IOError("Failed to talk to %r %r" % (response.url, response)) + return response.json() + + +# A list of fedmsg messages ideas that were produced erroneously. +# We don't want to use them, so ban them from our results. +blacklist = [ + '2016-dd05c4b7-958b-439f-90d6-e5ca0af2197c', + '2016-b2a2eb00-acef-4a1f-bc6a-ad5aa9d81eee', + '2016-0307f681-1eae-4aeb-9126-8a43b7a378e2', +] + +def get_messages(target): + """ Generator that yields messages from datagrepper """ + + # Get the first page + data = get_page(1, 'unknown') + for message in data['raw_messages']: + if message['msg_id'] in blacklist: + continue + if target in json.dumps(message): + yield message + + more = functools.partial(get_page, pages=data['pages']) + + # Get all subsequent pages (if there are any...) + for page in range(1, data['pages']): + data = more(page + 1) + + for message in data['raw_messages']: + if message['msg_id'] in blacklist: + continue + if target in json.dumps(message): + yield message + + +def make_templates(curr_atomic_id, next_atomic_id): + return [ + # As things stand now, we only do two-week-atomic stuff for the current + # stable release. + (curr_atomic_id, '', ''), + + # If we ever move to doing pre-release versions as well, just uncomment + # the following line and it should all work. We leave it commented out + # now because querying datagrepper for pre-release results that are not + # there is much more slow than querying for something that exists. + #(next_atomic_id, 'pre_atomic_', 'pre_'), + ] + + +# We cache this guy on disk so we don't hit datagrepper over and over. +@cache.cache_on_arguments() +def collect(curr_atomic_id, next_atomic_id): + results = collections.defaultdict(dict) + + # This is the information needed to provide "latest" download targets that + # redirect to the actual mirrormanager url via htpasswd file + results['release']['redir_map'] = collections.defaultdict(dict) + + for idx, composedate_prefix, iso_size_prefix in make_templates(curr_atomic_id, next_atomic_id): + + log.info("Looking for latest atomic release for %s" % idx) + # Get the *latest* atomic release information. + messages = get_messages('-%s-' % idx) + try: + message = messages.next() + except StopIteration: + log.warn("Couldn't find any two-week-atomic content for %r" % idx) + continue + + # Parse the composedate out of the image_name + image_name = message['msg']['atomic_qcow2']['image_name'] + composedate = '.'.join(image_name.split('-')[-1].split('.')[:-2]) + log.info(" Found composedate: %s" % composedate) + results['release'][composedate_prefix + 'atomic_composedate'] = composedate + + # Save the timestamp so we can compute the age later, off-cache. + results['release'][composedate_prefix + 'atomic_ts'] = message['timestamp'] + + # Get the sizes of the isos in megabytes. To do this, we need... + # A mapping between what the release-engine tool calls each artifact, + # and what we call them. + mapping = { + 'atomic_qcow2': 'atomic_qcow2_cloud', + 'atomic_raw': 'atomic_raw_cloud', + 'atomic_vagrant_libvirt': 'atomic_libvag_cloud', + 'atomic_vagrant_virtualbox': 'atomic_VBvag_cloud', + } + for key, entry in message['msg'].items(): + # There are some other keys in there we don't care about. + if not key.startswith('atomic_'): + continue + + # Do an HTTP HEAD to find the size of the file in megabytes + url = entry['image_url'] + download_url = entry['image_url'] + if not url.startswith('http'): + url = DL_URL_PREFIX + url + download_url = download_fpo + entry['image_url'] + response = requests.head(url) + if not bool(response): + log.error("Failed to HEAD %s for size. %r" % (url, response)) + continue + + length = int(response.headers['content-length']) / 1000000 + + # Provide the download URL + url_key = mapping[key] + "_url" + results['release'][url_key] = download_url + + # Provide the redirect rule mapping + img_filename = download_url.split('/')[-1] + results['release']['redir_map'][key] = {} + results['release']['redir_map'][key]['redirect'] = download_url + results['release']['redir_map'][key]['filename'] = img_filename + + # Figure out which of our vars we're going to set, and set it + iso_size_key = iso_size_prefix + mapping[key] + results['iso_size'][iso_size_key] = str(length) + + # Special case for Atomic ISO latest redirect rule mapping because it's + # not included in fedmsg data + atomic_iso_filename = "Fedora-Atomic-ostree-x86_64-{}-{}.iso".format( + globalvar.release['curr_id'], + results['release'][composedate_prefix + 'atomic_composedate'] + ) + results['release']['redir_map']['atomic_iso'] = {} + results['release']['redir_map']['atomic_iso']['redirect'] = \ + globalvar.path['download_atomic'] + "/stable/Fedora-Atomic-" + \ + globalvar.release['curr_id'] + '-' + \ + results['release'][composedate_prefix + 'atomic_composedate'] + \ + "/Atomic/x86_64/iso/{}".format(atomic_iso_filename) + results['release']['redir_map']['atomic_iso']['filename'] = atomic_iso_filename + + return results + + +# Note, this is *not* cached, since we need to update it frequently. +def update_age(release): + """ Is it old and stale? + + We aim to produce new atomic releases every two weeks at minimum. If we're + older than two weeks, we should put up a warning on the websites. Here we + just compute a flag that gets checked in the template. If this latest + release if younger than two weeks, call it "fresh". If it is older than + two weeks, it is no longer fresh. + http://taiga.cloud.fedoraproject.org/project/acarter-fedora-docker-atomic-tooling/us/31 + """ + + results = collections.defaultdict(dict) + templates = make_templates(release['curr_atomic_id'], release['next_atomic_id']) + for idx, composedate_prefix, iso_size_prefix in templates: + two_weeks_ago = datetime.now(UTC) - timedelta(days=14) + timestamp = release[composedate_prefix + 'atomic_ts'] + latest = datetime.fromtimestamp(timestamp, UTC) + freshness = bool(latest >= two_weeks_ago) + relative_delta = datetime.now(UTC) - latest + casual_delta = relative_delta.days + results['release'][composedate_prefix + 'atomic_freshness'] = freshness + results['release'][composedate_prefix + 'atomic_age'] = casual_delta + return results diff --git a/alt.fedoraproject.org/build/fedimg_vars.py b/alt.fedoraproject.org/build/fedimg_vars.py new file mode 100755 index 0000000..576a30f --- /dev/null +++ b/alt.fedoraproject.org/build/fedimg_vars.py @@ -0,0 +1,159 @@ +#!/usr/bin/python +""" Return the AMIs uploaded by fedimg for a given set of release vars. + +Search datagrepper to find the results. + +Deps: $ sudo dnf install python-requests + +Author: Ralph Bean +License: LGPLv2+ +""" + +from __future__ import print_function + +import collections +import functools +import logging +import os + +import requests +import dogpile.cache + +log = logging.getLogger('fedimg_vars') + +base_url = 'https://apps.fedoraproject.org/datagrepper/raw' +topic = "org.fedoraproject.prod.fedimg.image.upload" + +session = requests.session() + +cache = dogpile.cache.make_region().configure( + "dogpile.cache.dbm", + # 'make clean' does not remove this cache, but we let the values expire + # once every this many seconds (once a day) + expiration_time=86400, + arguments={ + "filename": os.path.join(os.getcwd(), 'build/amis.cache') + }, +) + +def get_page(page, pages, target): + """ Retrieve the JSON for a particular page of datagrepper results """ + log.debug("Getting page %i of %s", page, pages) + response = session.get(base_url, params=dict( + topic=topic, + page=page, + contains=target, + rows_per_page=100, + )) + return response.json() + + +def get_messages(target): + """ Generator that yields messages from datagrepper """ + + # Get the first page + data = get_page(1, 'unknown', target) + for message in data['raw_messages']: + yield message + + more = functools.partial(get_page, pages=data['pages'], target=target) + + # Get all subsequent pages (if there are any...) + for page in range(1, data['pages']): + data = more(page + 1) + + for message in data['raw_messages']: + yield message + +# We cache this guy on disk for 500s +@cache.cache_on_arguments() +def collect(release): + results = collections.defaultdict(dict) + + # 1 - transform release vars into an image name we want to query for + templates = [ + # The F22 released AMIs uploads didn't appear to go through fedimg, so + # we can't use this scheme for them. Stuff for F23 should all go that + # route though, so we can hopefully switch over soon. + ("Fedora-Cloud-Base-{curr_cloud_AMI_id}-{atomic_composedate}.x86_64", { + 'HVM_base_AMI': lambda e: e.get('virt_type') == 'hvm' and e.get('vol_type') == 'standard', + 'GP2_HVM_base_AMI': lambda e: e.get('virt_type') == 'hvm' and e.get('vol_type') == 'gp2', + 'PV_base_AMI': lambda e: e.get('virt_type') == 'paravirtual' and e.get('vol_type') == 'standard', + 'GP2_PV_base_AMI': lambda e: e.get('virt_type') == 'paravirtual' and e.get('vol_type') == 'gp2', + }), + #("Fedora-Atomic-{curr_cloud_AMI_id}-{atomic_composedate}.x86_64", { + # 'HVM_atomic_AMI': lambda e: e.get('virt_type') == 'hvm' and e.get('vol_type') == 'standard', + # 'GP2_HVM_atomic_AMI': lambda e: e.get('virt_type') == 'hvm' and e.get('vol_type') == 'gp2', + #}), + #("Fedora-Cloud-Base-{next_cloud_AMI_id}_{curr_cloud_AMI_state}-{RC_pre_gold}.x86_64", { + # 'pre_HVM_base_AMI': lambda e: e.get('virt_type') == 'hvm' and e.get('vol_type') == 'standard', + # 'pre_GP2_HVM_base_AMI': lambda e: e.get('virt_type') == 'hvm' and e.get('vol_type') == 'gp2', + # 'pre_PV_base_AMI': lambda e: e.get('virt_type') == 'paravirtual' and e.get('vol_type') == 'standard', + # 'pre_GP2_PV_base_AMI': lambda e: e.get('virt_type') == 'paravirtual' and e.get('vol_type') == 'gp2', + #}), + #("Fedora-Cloud-Atomic-{next_cloud_AMI_id}_{curr_cloud_AMI_state}-{manual_pre_cloud_AMI_atomic_composedate}.x86_64", { + # 'pre_HVM_atomic_AMI': lambda e: e.get('virt_type') == 'hvm' and e.get('vol_type') == 'standard', + # 'pre_GP2_HVM_atomic_AMI': lambda e: e.get('virt_type') == 'hvm' and e.get('vol_type') == 'gp2', + #}), + ] + + for template, buckets in templates: + # 2 - Build an intermediary dict + intermediary = collections.OrderedDict() + target = template.format(**release) + log.info("Looking for AMIs for %s" % target) + + messages = get_messages(target) + for message in messages: + key = message['msg']['image_name'] + if not key in intermediary: + intermediary[key] = [] + intermediary[key].append(message['msg']) + + if not intermediary: + log.warn("No AMIs found for %s" % target) + continue + + # What would this even mean? + assert len(intermediary) < 2, "Impossible. Got more than one target." + + uploads = intermediary[target] + + # 3- transform intermediary representation into results + for name, matches in buckets.items(): + for upload in uploads: + if matches(upload['extra']): + ami = upload['extra']['id'] + # The region looks like "EC2 (REGION)", so we strip stuff. + region = upload['destination'][5:-1] + results[name][region] = ami + + return results + + +def sanity_check(globalvar, collected_fedimg_vars): + """ This is a sanity check just to make sure the datagrepper code is not + way off from what we had hand-typed before. + + Eventually, remove this. + """ + + names = [ + 'pre_HVM_base_AMI', + 'pre_GP2_HVM_base_AMI', + 'pre_PV_base_AMI', + 'pre_GP2_PV_base_AMI', + 'pre_HVM_atomic_AMI', + 'pre_GP2_HVM_atomic_AMI', + ] + for name in names: + handtyped = getattr(globalvar, name) + collected = collected_fedimg_vars[name] + + for key in handtyped: + if not key in collected: + log.warn("collected %r is missing %r" % (name, key)) + + for key in collected: + if not key in handtyped: + log.warn("handtyped %r is missing %r" % (name, key)) diff --git a/alt.fedoraproject.org/data/content/cloud/index.html b/alt.fedoraproject.org/data/content/cloud/index.html index 05bf028..c6acbb7 100644 --- a/alt.fedoraproject.org/data/content/cloud/index.html +++ b/alt.fedoraproject.org/data/content/cloud/index.html @@ -292,7 +292,33 @@

${_('GP2 HVM AMIs')}

-

${_('Click to launch')}

+

${_('Click to launch')}

+