#255 taskotron: install timer to kill zombie VMs from ImageFactory
Closed 6 years ago by kparal. Opened 6 years ago by kparal.

@@ -0,0 +1,43 @@ 

+ #!/bin/bash

+ # Kill all imagefactory build processes which took longer than specified

+ # (3 hours by default)

+ 

+ DEFAULT_TIMEOUT=180

+ 

+ # print usage

+ if [ "$1" = '--help' ] || [ "$1" = '-h' ]; then

+     echo "Usage: $0 [TIMEOUT]"

+     echo -n 'Kill all imagefactory build processes which took longer than '

+     echo    "TIMEOUT (in minutes, $DEFAULT_TIMEOUT by default)."

+     exit 1

+ fi

+ 

+ PROCESS_ID='/usr/bin/qemu-system-x86_64 -machine accel=kvm -name guest=factory-build'

+ 

+ TIMEOUT=${1:-$DEFAULT_TIMEOUT}

+ # convert to seconds

+ TIMEOUT=$(( $TIMEOUT * 60 ))

+ 

+ while pgrep -f "$PROCESS_ID"; do

+     # get PID of the oldest (pgrep -o) running matched process

+     PID=$(pgrep -o -f "$PROCESS_ID")

+     if [ -z "$PID" ]; then

+         echo 'No (more) processes match, exiting'

+         exit

+     fi

+     AGE=$(ps -o etimes= -p "$PID")

+     if (( $AGE >= $TIMEOUT )); then

+         echo "Process matched, killing: $(ps -p $PID --no-headers -o pid,args)"

+         kill -s TERM "$PID"

+         # wait a while and see if the process is really terminated, otherwise

+         # force kill it

+         sleep 3

+         if [ "$PID" = "$(pgrep -o -f "$PROCESS_ID")" ]; then

+             echo "Process $PID wasn't terminated, force killing it"

+             kill -s KILL "$PID"

+         fi

+     else

+         echo 'Some processes match, but they are not old enough, exiting'

+         exit

+     fi

+ done

@@ -0,0 +1,7 @@ 

+ [Unit]

+ Description=Kill hanging imagefactory build processes

+ 

+ [Service]

+ Type=oneshot

+ ExecStart=/usr/local/bin/imagefactory-kill-zombie

+ TimeoutStartSec=300

@@ -0,0 +1,10 @@ 

+ [Unit]

+ Description=Kill hanging imagefactory build processes regularly

+ 

+ [Timer]

+ OnCalendar=daily

+ RandomizedDelaySec=1h

+ Persistent=true

+ 

+ [Install]

+ WantedBy=timers.target

@@ -13,6 +13,26 @@ 

  - name: copy oz.cfg config file

    copy: src=oz.cfg dest=/etc/oz/oz.cfg owner=root group=root mode=0644

  

+ - name: copy imagefactory-kill-zombie script

+   copy:

+     src: imagefactory-kill-zombie

+     dest: /usr/local/bin

+     owner: root

+     group: root

+     mode: '0744'

+ 

+ - name: copy imagefactory-kill-zombie service

+   copy:

+     src: imagefactory-kill-zombie.service

+     dest: /etc/systemd/system

+   register: imagefactory-kill-zombie_service

+ 

+ - name: copy imagefactory-kill-zombie timer

+   copy:

+     src: imagefactory-kill-zombie.timer

+     dest: /etc/systemd/system

+   register: imagefactory-kill-zombie_timer

+ 

  - name: hotfix imagefactory's REST api to allow file download

    copy: src=hotfix_imgfac_RESTv2.py dest=/usr/lib/python2.7/site-packages/imgfac/rest/RESTv2.py owner=root group=root mode=0644

  
@@ -25,11 +45,17 @@ 

  

  - name: reload systemd

    command: systemctl daemon-reload

-   when: imagefactory_service.changed

+   when: imagefactory_service.changed or imagefactory-kill-zombie_service.changed or imagefactory-kill-zombie_timer.changed

  

  - name: enable imagefactory

    service: name=imagefactoryd state=started enabled=yes

  

+ - name: enable imagefactory-kill-zombie.timer

+   service:

+     name: imagefactory-kill-zombie.timer

+     state: started

+     enabled: yes

+ 

  - name: create directory for git clone

    file: path=/var/lib/fedoraqa state=directory owner=root group=root mode=1755