IIS Log File Stripper
Tagged:  •    •  

The following script filters a given input directory of IIS format log files and outputs the filtered results into the specified path. Log files typically have the name formated as exYYMMDD.log. Using the startswith parameter, you can filter files by name as well. For example, C:\IISLogs\ contains the following:

ex030715.log
ex030716.log
ex030717.log
ex030722.log
ex030723.log
ex030729.log
ex030802.log
ex030803.log
Running the following logstripper.pl C:\Filtered\ C:\IISLogs\ ex0308 will results in the following filtered IISLog files in C:\Filtered\
ex030802.log
ex030803.log

Filtering is done against the URI for a request. Filtering is not passed via command line, but within the script.

#!/usr/bin/perl
# Copyright (C) 2003 Andrew Loree
# $Id: iisstripper.pl,v 1.1.1.1 2003/10/05 21:51:29 andy Exp $
############################################################################
#
# iisstripper.pl - Filters IIS Log files based upon URIs
#
############################################################################
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
#############################################################################
# Version History:
#   1.0  - Initial Release
#############################################################################
use File::stat;		# for tell
use File::Basename qw(basename);
use FileHandle;

my $program = basename($0);
my $version = "1.0";

#### BEGIN CONFIG ####

### URL Includes
$aInclude[0] = "/example/path/";
$aInclude[1] = "/example.asp";

#### URL Excludes
$aExclude[0] = ".gif";
$aExclude[1] = ".jpg";
$aExclude[2] = ".css";
$aExclude[3] = ".js";
$aExclude[4] = "/images/";

#### END CONFIG ####

$debug = 0;

# look at command line to get an input and output file
if ($ARGV[0] eq "") {
	# error to display if input or output file is missing
	usage();
}

$outputpath = $ARGV[0];
$path = $ARGV[1];
$filebeginswith = $ARGV[2];


# UCase all excludes and filters
for ($i=0;$i<$#aExclude;$i++) {
	$aExclude[$i]=uc($aExclude[$i]);
}
if (defined(@aInclude)) {
	for ($i=0;$i<$#aInclude;$i++) {
		$aInclude[$i]=uc($aInclude[$i]);
	}
}

if (!($path =~ /\\$/)) {
	$path = $path . "\\";
}

if (!($outputpath =~ /\\$/)) {
	$outputpath = $outputpath . "\\";
}


# Get list of filenames that match the filename wildcard
opendir(DIR_HANDLE,$path);
foreach $filename (sort readdir(DIR_HANDLE)) {
	if ($filename =~ /^$filebeginswith/) {
		print "$filename (";
		$filesize = (stat($path . $filename))->size;
		print  "$filesize bytes) |" . chr(219);
		StripLogfile($path,$filename,$filesize);
	}
}
closedir(DIR_HANDLE);

sub StripLogfile(){
my($strPath) = $_[0];
my($strFilename) = $_[1];
my($nFileSize) = $_[2];
my($line_count) = 0;
my($nCurPos) = 0;
my($nCurPerct) = 0;
my($nPrevPerct) = 0;
my($nNumFilteredRequests) = 0;
my($nNumRequests) = 0;

	if ($nFileSize == 0) {
		$nFileSize = 1;
	}
	open (INPUT_FH,$strPath . $strFilename)
		|| die "Unable to open inputfile: $ARGV[0]\n$!";
	open (OUTPUT_FH, "> " . $outputpath . $strFilename)
		|| die "Unable to open output file: $strOutput\n$!";


	$nNumRequests = 0;
	$nNumFilteredRequests = 0;

	while (($cur_line = <INPUT_FH>)) {
		$nNumRequests++;
		if ($cur_line =~ / GET (\/.*?) /) {
			if (PassFilterExcludes($1) == 1) {
				$nNumFilteredRequests++;
				if (!$debug == 1) {
					print OUTPUT_FH $cur_line;
				}
				else{
					print "$1\n";
				}
			}
		}
		elsif($cur_line =~ / POST (\/.*?) /){
			if (PassFilterExcludes($1) == 1) {
				$nNumFilteredRequests++;
				if (!$debug == 1){
					print OUTPUT_FH $cur_line;
				}
				else{
					print "$1\n";
				}
			}
		}
		elsif($cur_line =~ /$\#/){
			# Include all comments
			$nNumRequests--;
			if (!$debug == 1){
				print OUTPUT_FH $cur_line;
			}
			else{
				print $cur_line;
			}
		}

		if (($debug == 1) && ($nNumRequests > 100)){
			last;	
		}
		$line_count++;

		$nCurPos = tell(INPUT_FH);
		$nCurPerct = int(($nCurPos / $nFileSize) * 10);
		for ($i=$nPrevPerct; $i < $nCurPerct;$i++) {
			if (($i % 2) == 0) {
				print chr(178);
			}
			else{
				print chr(219);
			}
		}
		$nPrevPerct = $nCurPerct;

	}
	print "|\n";
	print "Total Requests: $nNumRequests\n";
	print "Total Filtered Requests: $nNumFilteredRequests\n";
	if ($nNumRequests > 0) {
		print "Percent Filtered: "
			. (int($nNumFilteredRequests/$nNumRequests * 10000)/100)
			. "\%\n\n";
	}
	else{ # divide by zero
		print "Percent Filtered: 0%\n\n";
	}
	close(INPUT_FH);
	close(OUTPUT_FH);
}


sub PassFilterExcludes{
my ($strURL) = uc($_[0]);

	# No URL filters, just excludes
	if (defined(@aInclude) == 0) {
		# Check extension exclusion
		for($j = 0; $j <= $#aExclude; $j++){
			if ($strURL =~ /$aExclude[$j]/i) {
				return 0;
			}
		}
		return 1;
	}
	else{
		for($i = 0; $i <= $#aInclude; $i++){
			if ($strURL =~ /$aInclude[$i]/i) {
				# Check extension exclusion
				for($j = 0; $j < $#aExclude; $j++){
					if ($strURL =~ /$aExclude[$j]/i) {
						return 0;
					}
				}
				return 1;
			}
		}
	}
	return 0;
}

sub usage {

	print STDERR <<END_OF_USAGE;

NAME
     $program -- Filters IIS Log files based upon URIs

USAGE
     $program output_path input_path starts_with

DESCRIPTION
     output_path  Directory to output filtered log files
     input_path   Directory containing IIS log files
     starts_with  Name of the files to filter

VERSION
	$version

END_OF_USAGE

	exit 0;
}
AttachmentSize
iisstripper.pl5.4 KB