#238 Automate running `countme` update scripts
Merged 3 years ago by smooge. Opened 3 years ago by wwoods.
fedora-infra/ wwoods/ansible countme  into  master

@@ -0,0 +1,1 @@ 

+ 0 09  * * * countme /usr/local/bin/countme-update.sh > /dev/null

@@ -0,0 +1,101 @@ 

+ #!/bin/bash

+ 

+ # Where do we keep our local/internal data?

+ LOCAL_DATA_DIR=/var/lib/countme

+ RAW_DB=$LOCAL_DATA_DIR/raw.db

+ TOTALS_DB=$LOCAL_DATA_DIR/totals.db

+ TOTALS_CSV=$LOCAL_DATA_DIR/totals.csv

+ 

+ # Where do we put the public-facing data?

+ PUBLIC_DATA_DIR=/var/www/html/csv-reports/countme

+ PUBLIC_TOTALS_DB=$PUBLIC_DATA_DIR/totals.db

+ PUBLIC_TOTALS_CSV=$PUBLIC_DATA_DIR/totals.csv

+ 

+ # Names of the update commands (if not in $PATH..)

+ UPDATE_RAWDB=countme-update-rawdb.sh

+ UPDATE_TOTALS=countme-update-totals.sh

+ 

+ # Copy with atomic overwrite

+ atomic_copy() {

+     local src="$1" dst="$2"

+     cp -f ${src} ${dst}.part

+     mv -f ${dst}.part ${dst}

+ }

+ 

+ # die [MESSAGE]: prints "$PROG: error: $MESSAGE" on stderr and exits

+ die() { echo "${0##*/}: error: $*" >&2; exit 2; }

+ 

+ # _run [COMMAND...]: Run a command, honoring $VERBOSE and $DRYRUN

+ _run() {

+     if [ "$VERBOSE" -o "$DRYRUN" ]; then echo "$@"; fi

+     if [ "$DRYRUN" ]; then return 0; else "$@"; fi

+ }

+ 

+ # CLI help text

+ HELP_USAGE="usage: countme-updates.sh [OPTION]..."

+ HELP_OPTIONS="

+ Options:

+   -h, --help           Show this message and exit

+   -v, --verbose        Show more info about what's happening

+   -n, --dryrun         Don't run anything, just show commands

+   -p, --progress       Show progress meters while running

+ "

+ 

+ # Turn on progress by default if stderr is a tty

+ if [ -z "$PROGRESS" -a -t 2 ]; then PROGRESS=1; fi

+ 

+ # Parse CLI options with getopt(1)

+ _GETOPT_TMP=$(getopt \

+     --name countme-update \

+     --options hvnp \

+     --longoptions help,verbose,dryrun,progress,checkoutdir: \

+     -- "$@")

+ eval set -- "$_GETOPT_TMP"

+ unset _GETOPT_TMP

+ while [ $# -gt 0 ]; do

+     arg=$1; shift

+     case $arg in

+         '-h'|'--help') echo "$HELP_USAGE"; echo "$HELP_OPTIONS"; exit 0 ;;

+         '-v'|'--verbose') VERBOSE=1 ;;

+         '-n'|'--dryrun') DRYRUN=1 ;;

+         '-p'|'--progress') PROGRESS=1 ;;

+         # Hidden option for testing / manual use

+         '--checkoutdir') COUNTME_CHECKOUT=$1; shift ;;

+         '--') break ;;

+     esac

+ done

+ 

+ # Tweak path if needed

+ if [ -d "$COUNTME_CHECKOUT" ]; then

+     cd $COUNTME_CHECKOUT

+     PATH="$COUNTME_CHECKOUT:$COUNTME_CHECKOUT/scripts:$PATH"

+ fi

+ 

+ # Check for required commands

+ command -v $UPDATE_RAWDB  >/dev/null || die "can't find '$UPDATE_RAWDB'"

+ command -v $UPDATE_TOTALS >/dev/null || die "can't find '$UPDATE_TOTALS'"

+ command -v git            >/dev/null || die "can't find 'git'"

+ 

+ # Apply other CLI options

+ if [ "$PROGRESS" ]; then

+     UPDATE_RAWDB="$UPDATE_RAWDB --progress"

+     UPDATE_TOTALS="$UPDATE_TOTALS --progress"

+ fi

+ 

+ # Exit immediately on errors

+ set -e

+ 

+ # Run the updates

+ _run $UPDATE_RAWDB --rawdb $RAW_DB

+ _run $UPDATE_TOTALS --rawdb $RAW_DB --totals-db $TOTALS_DB --totals-csv $TOTALS_CSV

+ 

+ # Update local git repo

+ if [ ! -d $LOCAL_DATA_DIR/.git ]; then

+     _run git init $LOCAL_DATA_DIR

+     _run git -C $LOCAL_DATA_DIR add -N $(realpath $TOTALS_CSV --relative-to $LOCAL_DATA_DIR)

+ fi

+ _run git -C $LOCAL_DATA_DIR commit -a -m "$(date -u +%F) update"

+ 

+ # Copy new data into place

+ _run atomic_copy $TOTALS_DB $PUBLIC_TOTALS_DB

+ _run atomic_copy $TOTALS_CSV $PUBLIC_TOTALS_CSV

@@ -0,0 +1,3 @@ 

+ [user]

+     name = countme log-parser bot

+     email = countme@data-analysis.fedoraproject.org

@@ -85,3 +85,62 @@ 

      - web-data

      - cron

  

+ - name: install package deps for mirrors-countme

+   package:

+     # tqdm is optional but it gives nice progress meters for interactive use

+     name: ['python3-pip', 'python3-setuptools', 'python3-tqdm']

+     state: present

+   tags:

+     - packages

+     - web-data

+ 

+ - name: make countme user

+   user:

+     name: countme

+     group: countme

+     shell: /sbin/nologin

+     home: /srv/countme

+     comment: "DNF countme counter"

+   tags:

+     - web-data

+ 

+ - name: install countme user gitconfig

+   copy: src=countme.gitconfig dest=/srv/countme/.gitconfig mode=0664 owner=countme group=countme

+   tags:

+     - web-data

+ 

+ - name: checkout mirrors-countme from git

+   git:

+     repo: https://pagure.io/mirrors-countme

+     dest: /srv/countme/mirrors-countme

+     version: prod

Is that going to re-clone everytime or will it also fetch/update if the clone already exists?

+   register: gitcountme

+   tags:

+     - web-data

+ 

+ - name: install mirrors-countme from git checkout

+   command: "pip install --no-index --no-deps /srv/countme/mirrors-countme"

hm, anyway we could package this as RPM, include it in the infra repo and install it this way instead of from git/pip? (+ I know how much you love RPM :))

+   when: "gitcountme is changed"

+   tags:

+     - web-data

+ 

+ - name: make countme web subdir

+   file: path=/var/www/html/csv-reports/countme state=directory mode=0775 owner=countme group=countme

+   tags:

+     - web-data

+ 

+ - name: make countme local data dir

+   file: path=/var/lib/countme state=directory mode=0775 owner=countme group=countme

+   tags:

+     - web-data

+ 

+ - name: install countme script to parse new logs & update totals

+   copy: src=countme-update.sh dest=/usr/local/bin/ mode=0755

+   tags:

+     - web-data

+ 

+ - name: install cron file to run countme-update.sh daily

+   copy: src=countme-update.cron dest=/etc/cron.d/ mode=0644

+   tags:

+     - web-data

+     - cron

This should automate running the "countme" scripts every day to parse new log data and publish updated totals.
There's more detailed info in commit e6376b9, but here's a couple notes:

  1. It's currently pulling code from the prod branch of https://pagure.io/mirrors-countme. I've seen that pattern used elsewhere in the playbooks, so I think that should be OK here?
  2. I tried to make this run as countme instead of root wherever possible. That's different from how the other scripts run, but it seems like a good idea.
  3. Keeping totals.csv in a private git repo isn't required but it also seemed like a good idea to have complete history of changes to the data we publish - especially if we later end up changing the counting algorithms or something. Good for accountability!

I ran the YAML changes through ansible-lint and it came out OK, and I tested the scripts manually and they're running fine.
But: this is my first attempt to modify any of our playbooks, so.. sanity checks and close reviews might be a good idea. OK thanks!

Is that going to re-clone everytime or will it also fetch/update if the clone already exists?

hm, anyway we could package this as RPM, include it in the infra repo and install it this way instead of from git/pip? (+ I know how much you love RPM :))

I think we could simplify this if we packaged countme as RPM and installed it this way.

Yeah, for the git clone it would be nice to use 'version=' to tie it to a commit. Then, when we want to update it we just update that and the rest of the time it doesn't pull changes when we are trying to do something else.

For the pip install, yeah, an rpm would be nicer. Failing that however, for now, ansible has a pip module, you could use that instead of shell?

Can you fix the git clone and the pip call and then we can merge this and get the rpm done after?

rebased onto f8a5720

3 years ago

rebased onto f8a5720

3 years ago

Pull-Request has been merged by smooge

3 years ago