#31 Use checksum (not mtime) to check if filelist changed
Merged 7 years ago by tibbs. Opened 7 years ago by adamwill.
adamwill/quick-fedora-mirror checksum  into  master

file modified
+5 -17
@@ -388,21 +388,15 @@ 

  logit o Remote file list download start

  db2 Downloading file lists

  # ZSHISM (declare associative array)

- typeset -A fltimes

+ typeset -A checksums

  for module in $MODULES; do

      # ZSHISM? (associative array indexing)

      moduledir=$MODULEMAPPING[$module]

      mkdir $moduledir

      flname=${FILELIST/'$mdir'/$moduledir}

- 

-     # ZSHISM (assign assoc. array value)

-     fltimes[$module]=0

      if [[ -f $DESTD/$moduledir/$flname ]]; then

-         cp -p $DESTD/$moduledir/$flname $moduledir

-         # So that we can diff it later

-         cp -p $moduledir/$flname $flname.old

          # ZSHISM (assign assoc. array value)

-         fltimes[$module]=$(stat --format=%Y $moduledir/$flname)

+         checksums[$module]=$(sha1sum $DESTD/$moduledir/$flname | cut -d' ' -f1)

      fi

  

      echo $moduledir/$flname >> filelist-transferlist
@@ -431,9 +425,9 @@ 

      # First fetch each filefiletimelist, and make one big transfer list from

      # all of them.  Copy in the old file from our repo to speed up rsync in case

      # most of it didn't change

-     if [[ -z $alwayscheck && -f $fl.old && $(stat --format=%Y $fl.old) == $(stat --format=%Y $fl) ]]; then

-         logit N No change in file list time for $module

-         db2 No change in file list mtime.  Skipping $module.

+     if [[ -n $checksums[$module] ]] && [[ $(sha1sum $fl | cut -d' ' -f1) == $checksums[$module] ]]; then

+         logit N No change in file list for $module

+         db2 No change in file list checksum.  Skipping $module.

          continue

      fi

  
@@ -441,12 +435,6 @@ 

      logit P Processing start: $module

      db2 Processing $module

  

-     # Diffing the whole thing is not really useful

-     #if [[ $VERBOSE -ge 6 && -f $fl.old ]]; then

-     #    echo "Changes in file list since last run:"

-     #    diff -u $fl.old $fl

-     #fi

- 

      flversion=$(awk -F '\t' '/^\[Version/ {s=1; next} /^$/ {exit} {if (s) print $0}' < $fl)

      if [[ "$flversion" != '2' ]]; then

          (>&2 echo File list version from the mirror cannot be processed by this script.

My mirror files are on an NFS share, which seems to mess with
the mtime check. That seems like a slightly fragile approach
anyhow, so why not just md5sum the list? This re-purposes the
fltimes array, which AFAICS was never used for anything at all,
as a checksums array and uses md5sum for the 'file list did not
change, bail out now' check. Also entirely removes the disabled
diffing code; having commented-out old stuff all over the place
is awful for readability, the commit history is the correct
place to memorialize old mistakes ;)

rebased

7 years ago

rebased

7 years ago

rebased

7 years ago

rebased

7 years ago

Pull-Request has been merged by tibbs

7 years ago
Metadata