#25 WIP: Initial AWS support
Closed a year ago by jcline. Opened a year ago by jcline.

file modified
+2 -1
@@ -19,7 +19,8 @@ 

  

  # These are not yet in an upstream release, when azure.azcollection > 2.3.0 check applicability

  # https://github.com/ansible-collections/azure/pull/1466

- RUN venv/bin/ansible-galaxy collection install --force azure.azcollection && \

+ RUN venv/bin/ansible-galaxy collection install --force amazon.aws && \

+     venv/bin/ansible-galaxy collection install --force azure.azcollection && \

      pushd ~/.ansible/collections/ansible_collections/azure/azcollection/ && \

      patch -p1 < /srv/image-uploader/src/patches/azure-pr-1466.patch && \

      popd

@@ -1,3 +1,7 @@ 

  __version__ = "1.0.0"

  

- from .publish import AzurePublishedV1, ContainerPublishedV1  # noqa: F401

+ from .publish import (  # noqa: F401

+     AwsPublishedV1,

+     AzurePublishedV1,

+     ContainerPublishedV1,

+ )

@@ -21,6 +21,65 @@ 

          return "fedora-image-uploader"

  

  

+ class AwsPublishedV1(_PublishedV1):

+     """Published when an AWS AMI is created from an image."""

+ 

+     topic = ".".join([_PublishedV1.topic, "aws"])

+     body_schema = {

+         "id": f"{SCHEMA_URL}/v1/{'.'.join([_PublishedV1.topic, 'aws'])}",

+         "$schema": "https://json-schema.org/draft/2019-09/schema",

+         "description": (

+             "Schema for messages sent by fedora-image-uploader when a "

+             "new Amazon Web Services image is published."

+         ),

+         "type": "object",

+         "properties": {

+             "architecture": {

+                 "type": "string",

+                 "description": "The machine architecture of the image (x86_64, aarch64, etc).",

+             },

+             "compose_id": {

+                 "type": "string",

+                 "description": "The compose ID this image was created from.",

+             },

+             "image_name": {

+                 "type": "string",

+                 "description": "The name of the AMI.",

+             },

+             "regions": {

+                 "type": "object",

+                 "description": (

+                     "A map of regions to AMI IDs. The object keys are the AWS region and "

+                     "the value is the AMI ID."

+                 ),

+             },

+         },

+         "required": [

+             "architecture",

+             "compose_id",

+             "image_name",

+             "regions",

+         ],

+     }

+ 

+     @property

+     def summary(self):

+         return (

+             f"{self.app_name} published an AWS image from compose {self.body['compose_id']} as "

+             f"{self.body['image_name']}"

+         )

+ 

+     def __str__(self):

+         regions_and_ids = [f"{region} as {id}" for region, id in self.body["regions"].items()]

+         return (

+             "A new image has been published to Amazon Web Services:\n\n"

+             f"\tArchitecture: {self.body['architecture']}\n"

+             f"\tCompose ID: {self.body['compose_id']}\n"

+             f"\tImage Name: {self.body['image_name']}\n"

+             f"\tRegions: {', '.join(regions_and_ids)}\n"

+         )

+ 

+ 

  class AzurePublishedV1(_PublishedV1):

      """

      Published when an image is uploaded to the Azure image gallery.

@@ -15,7 +15,11 @@ 

  from fedfind import exceptions as ff_exceptions

  from fedfind import helpers as ff_helpers

  from fedfind import release as ff_release

- from fedora_image_uploader_messages import AzurePublishedV1, ContainerPublishedV1

+ from fedora_image_uploader_messages import (

+     AwsPublishedV1,

+     AzurePublishedV1,

+     ContainerPublishedV1,

+ )

  from fedora_messaging import api, config

  from fedora_messaging import exceptions as fm_exceptions

  from fedora_messaging import message as fm_message
@@ -55,7 +59,14 @@ 

          self.requests = Session()

          retry_config = Retry(total=5, backoff_factor=1)

          self.requests.mount("https://", adapters.HTTPAdapter(max_retries=retry_config))

-         self.handlers = (self.handle_azure, self.handle_container)

+         handlers = {

+             "aws": self.handle_aws,

+             "azure": self.handle_azure,

+             "container": self.handle_container,

+         }

+         self.handlers = [

+             handler for conf_key, handler in handlers.items() if conf_key in self.conf.keys()

+         ]

          # tracks the container repos we got images for, for manifest

          # creation purposes

          self.container_repos = dict()
@@ -170,19 +181,7 @@ 

                  },

              )

              if self.conf["container"].get("publish_amqp_messages", False):

-                 try:

-                     api.publish(message)

-                 except (

-                     fm_exceptions.PublishTimeout,

-                     fm_exceptions.PublishReturned,

-                 ) as e:

-                     _log.warning("Unable to publish ContainerPublishV1 message: %s", str(e))

-                 except fm_exceptions.PublishForbidden as e:

-                     _log.error(

-                         "Unable to publish message to topic %s, permission denied: %s",

-                         message.topic,

-                         str(e),

-                     )

+                 fallible_publish(message)

  

      def _missing_manifest_arches(self, source: str, builtarches: Iterable[str]) -> set:

          """
@@ -255,7 +254,9 @@ 

  

          return image_dest

  

-     def run_playbook(self, playbook: str, variables: dict, workdir: str):

+     def run_playbook(

+         self, playbook: str, variables: dict, workdir: str

+     ) -> ansible_runner.runner.Runner:

          """

          Execute Ansible playbook in workdir using variables.

  
@@ -278,6 +279,54 @@ 

          if result.rc != 0:

              _log.error(f"Playbook failed with return code {result.rc}")

              raise fm_exceptions.Nack()

+         return result

+ 

+     def handle_aws(self, image: dict, ffrel: ff_release.Release):

+         """Handle AWS images."""

+         if image.get("subvariant") != "Cloud_Base" or "AmazonEC2" not in image.get("path", ""):

+             return

+ 

+         with tempfile.TemporaryDirectory() as workdir:

+             image_path = self.download_image(image, workdir, decompress=True)

+             date = ffrel.metadata["composeinfo"]["payload"]["compose"]["date"]

+             respin = ffrel.metadata["composeinfo"]["payload"]["compose"]["respin"]

+             ami_name = (

+                 f"Fedora-Cloud-Base-AmazonEC2.{image['arch']}-{ffrel.relnum}-{date}.{respin}",

+             )

+             variables = {

+                 "base_region": self.conf["aws"]["base_region"],

+                 "s3_bucket_name": self.conf["aws"]["s3_bucket_name"],

+                 "ami_description": self.conf["aws"]["ami_description"],

+                 "ami_volume_dev_name": self.conf["aws"]["ami_volume_dev_name"],

+                 "ami_volume_type": self.conf["aws"]["ami_volume_type"],

+                 "ami_volume_size": self.conf["aws"]["ami_volume_size"],

+                 "ami_regions": self.conf["aws"]["ami_regions"],

+                 "ami_name": ami_name,

+                 "architecture": image["arch"],

+                 "image_source": image_path,

+                 "exclude_from_latest": True,

+                 "ansible_remote_tmp": workdir,

+             }

+ 

+             playbook = os.path.join(PLAYBOOKS, "aws.yml")

+             run = self.run_playbook(playbook, variables, workdir)

+             # extract the AMI ids from the Ansible run

+             regions = dict()

+             for event in run.events:

+                 if event["event"] == "runner_on_ok":

+                     uploaded_ami = event["event_data"]["res"]["image_id"]

+                     region = event["invocation"]["module_args"]["region"]

+                     regions[region] = uploaded_ami

+             message = AwsPublishedV1(

+                 body={

+                     "architecture": image["arch"],

+                     "compose_id": ffrel.cid,

+                     "image_name": ami_name,

+                     "regions": regions,

+                 },

+             )

+             if self.conf["aws"].get("publish_amqp_messages", False):

+                 fallible_publish(message)

  

      def handle_azure(self, image: dict, ffrel: ff_release.Release):

          """
@@ -356,18 +405,7 @@ 

              # Gate publishing behind a feature flag so we can roll out updates while getting

              # proper permissions for publishing.

              if self.conf["azure"].get("publish_amqp_messages", False):

-                 try:

-                     api.publish(message=message)

-                 except (fm_exceptions.PublishTimeout, fm_exceptions.PublishReturned) as e:

-                     # There's always tomorrow for a new image, rather than restarting the whole

-                     # process, we'll skip publishing the message and try again next time.

-                     _log.warning("Unable to publish AzurePublishV1 message: %s", str(e))

-                 except fm_exceptions.PublishForbidden as e:

-                     _log.error(

-                         "Unable to publish message to topic %s, permission denied: %s",

-                         message.topic,

-                         str(e),

-                     )

+                 fallible_publish(message)

              try:

                  self.azure_cleanup_images()

              except Exception:
@@ -455,7 +493,7 @@ 

  

      def handle_container(self, image: dict, ffrel: ff_release.Release):

          """Handle container images."""

-         registries = self.conf.get("container", {}).get("registries")

+         registries = self.conf["container"].get("registries")

          if not registries:

              # we can't do anything if no registries are configured

              return
@@ -500,3 +538,23 @@ 

              self.container_repos[repo].append(arch)

          else:

              self.container_repos[repo] = [arch]

+ 

+ 

+ def fallible_publish(message):

+     """

+     Helper to publish AMQP messages fallibly.

+ 

+     Rather than try really hard to publish every message, if the broker is unavailable it's

+     reasonable to just wait until the next image (which happens daily) to get built and try

+     again then.

+     """

+     try:

+         api.publish(message=message)

+     except (fm_exceptions.PublishTimeout, fm_exceptions.PublishReturned) as e:

+         _log.warning("Unable to publish %s message: %s", message.__class__.__name__, str(e))

+     except fm_exceptions.PublishForbidden as e:

+         _log.error(

+             "Unable to publish message to topic %s, permission denied: %s",

+             message.topic,

+             str(e),

+         )

@@ -0,0 +1,108 @@ 

+ # This playbook expects the following environment variables to be set for authentication:

+ #   - AWS_ACCESS_KEY_ID

+ #   - AWS_SECRET_ACCESS_KEY

+ 

+ ---

+ - name: Create Fedora AWS marketplace image

+   hosts: localhost

+   # defaults and values largely pulled from fedimg without much thought on my part

+   vars:

+     base_region: us-east-1

+     # current names are fedora-s3-bucket-fedimg{-testing}

+     s3_bucket_name: fedora-image-uploads

+     # current format seems to be Fedora-Cloud-Base-AmazonEC2.x86_64-40-20240619.0

+     ami_name: "Fedora Cloud"

+     ami_description: "Fedora Cloud"

+     ami_virt_type: "hvm"

+     ami_volume_dev_name: "/dev/sda1"

+     ami_volume_type: "gp3"

+     ami_volume_size: 7

+     ami_regions:

+       - 'af-south-1'

+       - 'eu-north-1'

+       - 'ap-south-1'

+       - 'eu-west-3'

+       - 'eu-west-2'

+       - 'eu-south-1'

+       - 'eu-west-1'

+       - 'ap-northeast-3'

+       - 'ap-northeast-2'

+       - 'me-south-1'

+       - 'ap-northeast-1'

+       - 'sa-east-1'

+       - 'ca-central-1'

+       - 'ap-east-1'

+       - 'ap-southeast-1'

+       - 'ap-southeast-2'

+       - 'ap-southeast-3'

+       - 'eu-central-1'

+       - 'us-east-1'

+       - 'us-east-2'

+       - 'us-west-1'

+       - 'us-west-2'

+     architecture: "x86_64"

+   tasks:

+ 

+     - name: Ensure S3 bucket exists

+       amazon.aws.s3_bucket:

+         state: present

+         name: "{{ s3_bucket_name }}"

+         region: "{{ base_region }}"

+ 

+     - name: Checksum local file

+       register: local_image_file

+       ansible.builtin.stat:

+         path: "{{ image_source }}"

+         checksum_algorithm: sha256

+         get_checksum: true

+ 

+     - name: Set s3_object_name to <sha256sum>.raw

+       ansible.builtin.set_fact:

+         s3_object_name: "{{ local_image_file.stat.checksum }}.raw"

+ 

+     - name: Upload image to S3 bucket

+       register: s3_upload

+       amazon.aws.s3_object:

+         region: "{{ base_region }}"

+         src: "{{ image_path }}"

+         bucket: "{{ s3_bucket_name }}"

+         object: "{{ s3_object_name }}"

+         mode: put

+         overwrite: false

+ 

+     # TODO unclear if this task or the next one is what I need.

+     - name: Import image to EC2

+       amazon.aws.ec2_import_image:

+         region: "{{ base_region }}"

+         state: present

+         boot_mode: "uefi-preferred"

+         description: "{{ ami_description }}"

+         disk_containers:

+           description: "Fedora Cloud"

+           device_name: "{{ ami_volume_dev_name }}"

+           format: "raw"

+           user_bucket:

+             s3_bucket: "{{ s3_bucket_name }}"

+             s3_key: "{{ s3_object_name }}"

+         platform: "Linux"

+ 

+     - name: Create AMI from S3 object

+       loop: "{{ ami_regions }}"

+       register: created_ami

+       amazon.aws.ec2_ami:

+         state: present

+         region: "{{ item }}"

+         name: "{{ ami_name }}"

+         boot_mode: "uefi-preferred"

+         description: "{{ ami_description }}"

+         image_location: "{{ s3_upload.url }}"

+         architecture: "{{ architecture }}"

+         virtualization_type: "{{ ami_virt_type }}"

+         enhanced_networking: true

+         device_mapping:

+           - device_name: "{{ ami_volume_dev_name }}"

+             volume_size: "{{ ami_volume_size }}"

+             volume_type: "{{ ami_volume_type }}"

+             delete_on_termination: true

+         launch_permissions:

+           group_names: ['all']

@@ -61,6 +61,8 @@ 

      "azure-mgmt-recoveryservicesbackup",

      "azure-mgmt-notificationhubs",

      "azure-mgmt-eventhub",

+     "boto3",

+     "botocore",

      "click",

      "fedora-messaging",

      "fedora-image-uploader-messages",

@@ -56,6 +56,39 @@ 

  [consumer_config.container]

  registries = ["registry.fedoraproject.org", "quay.io/fedora"]

  

+ [consumer_config.aws]

+ base_region = "us-east-1"

+ s3_bucket_name = "fedora-image-uploads"

+ ami_description = "Fedora Cloud base image."

+ ami_volume_dev_name = "/dev/sda1"

+ ami_volume_type = "gp3"

+ ami_volume_size = 7

+ ami_regions = [

+     "af-south-1",

+     "eu-north-1",

+     "ap-south-1",

+     "eu-west-3",

+     "eu-west-2",

+     "eu-south-1",

+     "eu-west-1",

+     "ap-northeast-3",

+     "ap-northeast-2",

+     "me-south-1",

+     "ap-northeast-1",

+     "sa-east-1",

+     "ca-central-1",

+     "ap-east-1",

+     "ap-southeast-1",

+     "ap-southeast-2",

+     "ap-southeast-3",

+     "eu-central-1",

+     "us-east-1",

+     "us-east-2",

+     "us-west-1",

+     "us-west-2",

+ ]

+ 

+ 

  [qos]

  prefetch_size = 0

  prefetch_count = 25

Untested, still needs message publishing at least.

rebased onto a4064f7

a year ago

@dustymabe can you add @jcline to the AWS Credentials?

I filed https://pagure.io/fedora-infrastructure/issue/11999 to get access - sounds like Kevin can get me set up.

3 new commits added

  • Add initial AWS support
  • Only include handlers if there's a configuration section for them
  • Add helper to fallibly publish messages
a year ago

Ugh. I merged this with the CLI. It needs more work I'm sure, but the only credentials I have access to are in the staging environment.

Pull-Request has been closed by jcline

a year ago