From 111b807923989ad003798e3d49f74d0f27af4be0 Mon Sep 17 00:00:00 2001 From: Rich Megginson Date: Nov 22 2013 22:38:42 +0000 Subject: Ticket #381 Recognize compressed log files https://fedorahosted.org/389/ticket/381 Reviewed by: mreynolds, tbordaz (Thanks!) Branch: master Fix Description: Add support for gzip, bzip2, and xz compressed files, based on the file extension (.gz, .bz2, .xz). Uses the perl module IO::Uncompress and the IO::Uncompress::AnyUncompress class. This also adds support for TAR archives and compressed TAR archives - any file ending in .tar, .tar.bz2, .tar.gz, .tar.xz, .tgz, .tbz, and .txz. This uses the perl module Archive::Tar to unpack and read the files. Caveats: 1) Assumes all of the regular files in the TAR archive are access logs - archive members which are not plain files are skipped - results are undefined if non-access log plain files are processed 2) Assumes the TAR archive members are in the correct order - that is, the access logs are in date sequential order, with the file "access" last 3) No support for .zip files Platforms tested: RHEL6 x86_64 Flag Day: no Doc impact: yes --- diff --git a/ldap/admin/src/logconv.pl b/ldap/admin/src/logconv.pl index 38a84af..7da46e0 100755 --- a/ldap/admin/src/logconv.pl +++ b/ldap/admin/src/logconv.pl @@ -51,6 +51,8 @@ use IO::File; use Getopt::Long; use DB_File; use sigtrap qw(die normal-signals); +use Archive::Tar; +use IO::Uncompress::AnyUncompress qw($AnyUncompressError); Getopt::Long::Configure ("bundling"); Getopt::Long::Configure ("permute"); @@ -367,9 +369,10 @@ my %monthname = ( my $linesProcessed; my $lineBlockCount; my $cursize = 0; +my $LOGFH; sub statusreport { if ($lineBlockCount > $limit) { - my $curpos = tell(LOG); + my $curpos = tell($LOGFH); my $percent = $curpos/$cursize*100.0; print sprintf "%10d Lines Processed %12d of %12d bytes (%.3f%%)\n",--$linesProcessed,$curpos,$cursize,$percent; $lineBlockCount = 0; @@ -400,50 +403,100 @@ $logCount = $file_count; my $logline; my $totalLineCount = 0; +sub isTarArchive { + my $_ = shift; + return /\.tar$/ || /\.tar\.bz2$/ || /\.tar.gz$/ || /\.tar.xz$/ || /\.tgz$/ || /\.tbz$/ || /\.txz$/; +} + +sub isCompressed { + my $_ = shift; + return /\.gz$/ || /\.bz2$/ || /\.xz$/; +} + +$Archive::Tar::WARN = 0; # so new will shut up when reading a regular file for (my $count=0; $count < $file_count; $count++){ + my $logname = $files[$count]; # we moved access to the end of the list, so if its the first file skip it - if($file_count > 1 && $count == 0 && $skipFirstFile == 1){ - next; - } - $linesProcessed = 0; $lineBlockCount = 0; + if($file_count > 1 && $count == 0 && $skipFirstFile == 1){ + next; + } + $linesProcessed = 0; $lineBlockCount = 0; $logCount--; - my $logCountStr; - if($logCount < 10 ){ - # add a zero for formatting purposes - $logCountStr = "0" . $logCount; - } else { - $logCountStr = $logCount; - } - my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$atime,$mtime,$ctime,$blksize,$blocks); - ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$cursize, - $atime,$mtime,$ctime,$blksize,$blocks) = stat($files[$count]); - print sprintf "[%s] %-30s\tsize (bytes): %12s\n",$logCountStr, $files[$count], $cursize; - - open(LOG,"$files[$count]") or do { openFailed($!, $files[$count]) }; - my $firstline = "yes"; - while(){ - unless ($endFlag) { - if ($firstline eq "yes"){ - if (/^\[/) { - $logline = $_; - $firstline = "no"; + my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$atime,$mtime,$ctime,$blksize,$blocks); + ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$cursize, + $atime,$mtime,$ctime,$blksize,$blocks) = stat($logname); + print sprintf "[%03d] %-30s\tsize (bytes): %12s\n",$logCount, $logname, $cursize; + + my $tar = 0; + my $tariter = 0; + my $tarfile = 0; + my $comp = 0; + if (isTarArchive($logname)) { + $tar = Archive::Tar->new(); + $tariter = Archive::Tar->iter($logname); + if (!$tariter) { + print "$logname is not a valid tar archive, or compression is unrecognized: $!\n"; + next; + } + } elsif (isCompressed($logname)) { + $comp = 1; + } + + while (!$tariter or ($tarfile = $tariter->())) { + if ($tarfile) { + if ($tarfile->is_file) { + print sprintf "\t%-30s\tsize (bytes): %12s\n",$tarfile->name, $tarfile->size; + $cursize = $tarfile->size; + } else { + print "\tskipping non-file $tarfile->name\n"; + next; + } + if (isCompressed($tarfile->name)) { + $LOGFH = new IO::Uncompress::AnyUncompress \$tarfile->name or + do { openFailed($AnyUncompressError, $logname); next }; + # no way in general to know how big the uncompressed file is - so + # assume a factor of 10 inflation - only used for progress reporting + $cursize *= 10; + } else { + open(LOG,"<",\$tarfile->data) or do { openFailed($!, $tarfile->name) ; next }; + $LOGFH = \*LOG; + } + } elsif ($comp) { + $LOGFH = new IO::Uncompress::AnyUncompress $logname or + do { openFailed($AnyUncompressError, $logname); next }; + # no way in general to know how big the uncompressed file is - so + # assume a factor of 10 inflation - only used for progress reporting + $cursize *= 10; + } else { + open(LOG,$logname) or do { openFailed($!, $logname); next }; + $LOGFH = \*LOG; + } + my $firstline = "yes"; + while(<$LOGFH>){ + unless ($endFlag) { + if ($firstline eq "yes"){ + if (/^\[/) { + $logline = $_; + $firstline = "no"; + } + $linesProcessed++;$lineBlockCount++; + } elsif (/^\[/ && $firstline eq "no"){ + &parseLine(); + $logline = $_; + } else { + $logline = $logline . $_; + $logline =~ s/\n//; } - $linesProcessed++;$lineBlockCount++; - } elsif (/^\[/ && $firstline eq "no"){ - &parseLine(); - $logline = $_; - } else { - $logline = $logline . $_; - $logline =~ s/\n//; - } - } - } - &parseLine(); - close (LOG); - print_stats_block( $s_stats ); - print_stats_block( $m_stats ); - $totalLineCount = $totalLineCount + $linesProcessed; + } + } + &parseLine(); + close ($LOGFH); + print_stats_block( $s_stats ); + print_stats_block( $m_stats ); + $totalLineCount = $totalLineCount + $linesProcessed; statusreport(); + last if (!$tariter); + } } print "\n\nTotal Log Lines Analysed: " . ($totalLineCount - 1) . "\n";