#153 roles/awstats/files/combineHttpLogs.sh: accept DATE_STR
Merged 3 years ago by smooge. Opened 3 years ago by wwoods.
fedora-infra/ wwoods/ansible master  into  master

@@ -19,26 +19,58 @@ 

  # along with Fedora Project Infrastructure Ansible Repository.  If

  # not, see <http://www.gnu.org/licenses/>.

  

- # Because sync-http may not get all logs for 3 days, we only merge

- # things after 4 days. 

- 

- # 2019-10-01 Dropped this down to 3 days.. 

- 

- NUMDAYS=2

- YEAR=$(/bin/date -d "-${NUMDAYS} days" +%Y)

- MONTH=$(/bin/date -d "-${NUMDAYS} days" +%m)

- DAY=$(/bin/date -d "-${NUMDAYS} days" +%d)

- 

+ # Some constants / standard paths

  LOGDIR=/var/log/hosts

  NFSDIR=/mnt/fedora_stats/combined-http

- PROXYLOG=${LOGDIR}/proxy*/${YEAR}/${MONTH}/${DAY}/http/

- DL_LOG=${LOGDIR}/dl*/${YEAR}/${MONTH}/${DAY}/http/

- PEOPLE=${LOGDIR}/people*/${YEAR}/${MONTH}/${DAY}/http/

+ LOGMERGE=/usr/share/awstats/tools/logresolvemerge.pl

  

- TARGET=${NFSDIR}/${YEAR}/${MONTH}/${DAY}

+ # Because sync-http may not get all logs immediately, we look back

+ # a couple days to find the "latest" logs to merge.

+ LATEST_LOG_DATE="2 days ago"

+ 

+ # Funtion to parse a DATE_STR and print YYYY/MM/DD

+ ymd() { date -d "$*" +%Y/%m/%d; }

+ 

+ # Get YYYY/MM/DD for LATEST_LOG_DATE, for later use

+ LATEST_YMD=$(ymd $LATEST_LOG_DATE)

+ 

+ # Prints usage. Also serves as docs for anyone reading the source (hi there!)

+ usage() {

+ cat <<__USAGE__

+ usage: $0 [DATE_STR]

+ combine daily logs from $LOGDIR to $NFSDIR.

+ 

+ Default date is '$LATEST_LOG_DATE' (currently $LATEST_YMD).

+ DATE_STR may be any date older than that, in any format understood by date(1):

+   "June 9"

+   "2020-06-23 -2weeks"

+ __USAGE__

+ }

+ 

+ # Check CLI args to set LOG_DATE

+ case $# in

+     0) LOG_DATE="$LATEST_LOG_DATE"; UPDATE_LATEST=1 ;;

+     1) [ "$1" == "-h" -o "$1" == "--help" ] && usage && exit 0

+        LOG_DATE="$1" ;;

+     *) usage; exit 2 ;;

+ esac

+ 

+ # Parse LOG_DATE

+ YMD=$(ymd $LOG_DATE) || exit 2

+ 

+ # Safety check for dates that are too new for us to handle.

+ # (Also catches weird "dates" that date(1) allows, like '' or '0' or 'wet')

+ if [[ "$YMD" > "$LATEST_YMD" ]]; then

+     echo "$0: error: DATE_STR '$LOG_DATE' ($YMD) newer than LATEST_LOG_DATE ($LATEST_YMD)" >&2

+     exit 3

+ fi

  

- LOGMERGE=/usr/share/awstats/tools/logresolvemerge.pl

+ # Okay we're good. Set paths, make directories, and do some merging!

+ PROXYLOG=${LOGDIR}/proxy*/${YMD}/http/

+ DL_LOG=${LOGDIR}/dl*/${YMD}/http/

+ PEOPLE=${LOGDIR}/people*/${YMD}/http/

  

+ TARGET=${NFSDIR}/${YMD}

  mkdir -p ${TARGET}

  

  ##
@@ -76,10 +108,10 @@ 

  # 3. remove the old links

  # 4. link up all the files we merged over

  

- if [[ -d ${NFSDIR}/latest ]]; then

+ if [[ "$UPDATE_LATEST" && -d ${NFSDIR}/latest ]]; then

      pushd ${NFSDIR}/latest &> /dev/null

      /bin/rm -f *

-     for file in ../${YEAR}/${MONTH}/${DAY}/*; do

+     for file in ../${YMD}/*; do

  	ln -s ${file} .

      done

      popd &> /dev/null

This commit tweaks combineHttpLogs.sh so that you can provide an
optional DATE_STR on the CLI to have it run for a date other than the
default (which is currently 2 days ago, despite the comments about it
being somewhere between 5-3 days..). Links in latest/ are not updated
when running for user-provided dates.

There's some other refactoring involved but AFAICT this doesn't change
its default behavior at all.

This should let us re-run this script manually to combine logs for the
days that we missed during the colo move etc.

Pull-Request has been merged by smooge

3 years ago