#258 Updated playbooks for MBS v3 deployment
Merged 3 years ago by breilly. Opened 3 years ago by breilly.
fedora-infra/ breilly/ansible refactormbs-17  into  main

@@ -40,3 +40,7 @@ 

      mbs-frontend nodes.

  

      NOTE - this system has a KRB service principal with elevated koji privileges.

+ 

+ mbs_broker_url: "amqp://mbs{{ env_suffix }}:{{ mbs_backend_amqp_password}}@rabbitmq{{ env_suffix }}.fedoraproject.org/%2Fmbs"

+ mbs_num_workers: 3

+ mbs_systemd_wait_for_rabbitmq: true

@@ -47,3 +47,7 @@ 

      mbs-frontend nodes.

  

      NOTE - this system has a KRB service principal with elevated koji privileges.

+ 

+ mbs_broker_url: "amqp://mbs{{ env_suffix }}:{{ mbs_backend_amqp_password}}@rabbitmq{{ env_suffix }}.fedoraproject.org/%2Fmbs"

+ mbs_num_workers: 3

+ mbs_systemd_wait_for_rabbitmq: true

@@ -0,0 +1,4 @@ 

+ ---

+ mbs_broker_url: ""

+ mbs_systemd_wait_for_rabbitmq: false

+ mbs_celery_max_worker_tasks: 50

@@ -13,6 +13,8 @@ 

    notify:

    - restart apache

    - restart fedmsg-hub

+ #  - restart mbs poller

+ #  - restart mbs workers

    tags:

    - mbs

    - mbs/common
@@ -36,6 +38,8 @@ 

    notify:

    - restart apache

    - restart fedmsg-hub

+ #  - restart mbs poller

+ #  - restart mbs workers

    tags:

    - mbs

    - mbs/common
@@ -102,6 +106,108 @@ 

    - mbs

    - mbs/common

  

+ - name: Configure MBS virtual host in RabbitMQ

+   block:

+   - name: Configure the MBS virtual host

+     run_once: true

+     delegate_to: "rabbitmq01{{ env_suffix }}.iad2.fedoraproject.org"

+     rabbitmq_vhost:

+       name: /mbs

+       state: present

+     tags:

+       - rabbitmq_cluster

+       - config

+       - mbs

+       - mbs/common

+ 

+   - name: Configure the HA policy for the MBS queues

+     run_once: true

+     delegate_to: "rabbitmq01{{ env_suffix }}.iad2.fedoraproject.org"

+     rabbitmq_policy:

+       name: HA

+       apply_to: queues

+       pattern: .*

+       tags:

+         ha-mode: all

+         ha-sync-mode: automatic  # Auto sync queues to new cluster members

+         ha-sync-batch-size: 10000  # Larger is faster, but must finish in 1 net_ticktime

+       vhost: /mbs

+     tags:

+       - rabbitmq_cluster

+       - config

+       - mbs

+       - mbs/common

+ 

+   - name: Grant the mbs user access to the MBS vhost

+     run_once: true

+     delegate_to: "rabbitmq01{{ env_suffix }}.iad2.fedoraproject.org"

+     rabbitmq_user:

+       user: "mbs{{ env_suffix }}"

+       password: "{{ mbs_backend_amqp_password }}"

+       vhost: /mbs

+       configure_priv: .*

+       read_priv: .*

+       write_priv: .*

+       update_password: always

+     tags:

+       - rabbitmq_cluster

+       - config

+       - mbs

+       - mbs/common

+   when: not mbs_frontend and env_suffix == ".stg"

+ 

+ - name: Configure the MBS workers and poller

+   block:

+     - name: Add the systemd service files

+       template:

+         src: "{{ item.src }}"

+         dest: "{{ item.dest }}"

+         owner: root

+         group: root

+         mode: "0644"

+       with_items:

+       - src: mbs-worker@.service.j2

+         dest: /etc/systemd/system/mbs-worker@.service

+       - src: mbs-poller.service.j2

+         dest: /etc/systemd/system/mbs-poller.service

+       notify:

+       - restart mbs poller

+       - restart mbs workers

+       tags:

+         - mbs

+         - mbs/common

+ 

+     - name: Enable the MBS workers

+       systemd:

+         name: "mbs-worker@{{ item }}"

+         daemon_reload: yes

+         enabled: yes

+         state: started

+       with_sequence: start=0 end={{ mbs_num_workers - 1 }}

+       tags:

+         - mbs

+         - mbs/common

+ 

+     - name: Populate the service facts to detect if there are MBS workers to disable

+       service_facts: {}

+       tags:

+         - mbs

+         - mbs/common

+ 

+     - name: Disable any extra MBS workers

+       systemd:

+         name: "mbs-worker@{{ worker_num }}"

+         enabled: no

+         state: stopped

+       with_items: "{{ ansible_facts.services | select('match', 'mbs-worker@\\d+.service') | list }}"

+       vars:

+         worker_num: "{{ item | regex_search('\\d+') }}"

+       when: (worker_num | int) >= mbs_num_workers

+       tags:

+         - mbs

+         - mbs/common

+   when: not mbs_frontend and env_suffix == ".stg"

+ 

  - name: create /etc/module-build-service/default-modules directory

    file:

      path: /etc/module-build-service/default-modules

@@ -19,6 +19,8 @@ 

      HOST = '0.0.0.0'

      PORT = 5000

  

+     CELERY_BROKER_URL = '{{ mbs_broker_url }}'

+ 

      # Global network-related values, in seconds

      NET_TIMEOUT = 120

      NET_RETRY_INTERVAL = 30
@@ -63,6 +65,9 @@ 

      KRB_PRINCIPAL = None

      KRB_CCACHE = None

  

+     # Number of celery workers

+     NUM_WORKERS = {{ mbs_num_workers }}

+ 

      # AMQ prefixed variables are required only while using 'amq' as messaging backend

      # Addresses to listen to

      AMQ_RECV_ADDRESSES = ['amqps://messaging.mydomain.com/Consumer.m8y.VirtualTopic.eng.koji',

@@ -0,0 +1,14 @@ 

+ [Unit]

+ Description=MBS Poller

+ After=network.target{{ ' rabbitmq-server.service' if mbs_systemd_wait_for_rabbitmq else '' }}

+ 

+ [Service]

+ Type=simple

+ # Always restart the service, even if it exits cleanly

+ Restart=always

+ RestartSec=1

+ User=fedmsg

+ ExecStart=/usr/bin/celery beat -A module_build_service.scheduler.celery_app --loglevel=info --pidfile /var/run/fedmsg/mbs-scheduler.pid -s /var/run/fedmsg/mbs-scheduler.db

+ 

+ [Install]

+ WantedBy=multi-user.target

@@ -0,0 +1,14 @@ 

+ [Unit]

+ Description=MBS worker %I

+ After=network.target{{ ' rabbitmq-server.service' if mbs_systemd_wait_for_rabbitmq else '' }}

+ 

+ [Service]

+ Type=simple

+ # Always restart the service, even if it exits cleanly

+ Restart=always

+ RestartSec=1

+ User=fedmsg

+ ExecStart=/usr/bin/celery worker -n mbs-worker-%I -Q mbs-default,mbs-%I -A module_build_service.scheduler.celery_app --loglevel=info --max-tasks-per-child={{ mbs_celery_max_worker_tasks }}

+ 

+ [Install]

+ WantedBy=multi-user.target

MBS v3 now uses rabbitmq for its backend workers. These changes will install and configure rabbitmq and its workers.

Did you mean to make this a string?

You're not going to use a separate vhost on the RabbitMQ instance hosted by Fedora Infra? This seems like a lost opportunity.

Also, if you pursue this path, does this VM have the Yum repo for the latest RabbitMQ RPMs? The one in EPEL7 was quite old the last time I checked.

rebased onto 0b3dab985f47ec61a3a80d530de172a17da6edbc

3 years ago

rebased onto ecdee8897fcecc26fc9d323334bba5e485552448

3 years ago

Yeah, as matt mentioned, we have a rabbitmq cluster we use for fedora-messaging, but also for applications that need it. Look at roles/rabbitmq_cluster/tasks/apps.yml We could set you up a queue/user there and then you don't have to maintain another rabbitmq queue.

Also, if you pursue this path, does this VM have the Yum repo for the latest RabbitMQ RPMs? The one in EPEL7 was quite old the last time I checked.

Our cluster is using the rabbitmq from openstack... ( rabbitmq-server-3.7.23-2.el8ost ) as we needed some newer features.

This would be simplified a good deal just using our cluster. ;) But if you really don't want to... let us know.

Thanks Matt and Kevin, good point about the existing rabbitmq instance - I'll update the PR to take advantage of that.

rebased onto 84a3afeae56640dbc2660a43ae57cb634ca9d20d

3 years ago

Updated to use the existing rabbitmq instance. Let me know if anything needs to be corrected, or if more configuration is needed.

Thanks! This looks pretty good... I don't see where celery is installed on the backend though?

@kevin good catch - that's an install dep for module-build-service internally, and it's also been added in the basic config upstream for v3. I'll be updating the fedora infra module-build-service specfile to include python2-celery as well for the v3 deployment, but if it makes more sense I can add it to the ansible scripts too.

Ah, yes that makes sense to me.

Thats the last thing I see... we can merge this now, or wait until I have mbs fully functional in stg before we merge it. I still need to get a few things sorted on the staging buildsystem.

Oh sure, we can wait to merge this if that makes things easier on your end, and merge when we're ready to deploy v3.

MBS uses Celery but it requires a lot of permissions [1] to automatically create some queues. My suggestion is to create a separate RabbitMQ vhost and give the MBS service full access to it.

https://docs.celeryproject.org/en/stable/getting-started/brokers/rabbitmq.html#setting-up-rabbitmq

rebased onto c80fc4b3874b600f824450abe4612f3d78e408ea

3 years ago

Thanks @mprahl, updated the PR with vhost configuration.

Is this still needed after the vhost configuration?

rebased onto be3545c83886c788334aa561ed6ebac62ddad902

3 years ago

Good catch, removed the extra queue config.

So, if we merge this now, it's going to affect both prod and stg? Could you perhaps adjust it so the changes only affect stg?

I would hate for us to push this, then need to make some change in prod and get things messed up.

If you can do that it's fine to merge anytime you like...

rebased onto 2d1636a16fcaebad0d897e7dafd936929ad9aa77

3 years ago

Looks ok to me. Merge at your leasure. :)

rebased onto a00708b

3 years ago

Pull-Request has been merged by breilly

3 years ago