# this module contains the main loop of the system. 
# the opened filehandle is provided to the run method
# and then all lines are read and processed.  

package SM::Loop;

use SM::Conf;
use SM::Defs;
use SM::GV qw(%log_record); # Global Variables
use SM::PP; # Pretty Print
use SM::Statistic;
use SM::Tools; 
use SM::Scrutinizer;
use SM::HashTable;
use Time::HiRes qw(gettimeofday);
require RRDs if ($SM::Conf::DO_RRD_STAT);
require SM::RRD if ($SM::Conf::DO_RRD_STAT);
use strict;
use warnings;


# the run function puts the hole system into moving
sub run {
	# get the filehandle
	*FH = shift();

	# define some variables
	my ($node, $move_node);
	my ($rrd_requests, $rrd_data_transfered, $rrd_max_alert_level) = 
	   (0, 0, 0);
	my ($existing_node) = (0);

	# this is the main loop
	# read line by line of the
	# file handle
	while(<FH>) {

		# check if the line limit is reached	
		# if yes, skip the rest of the lines
		last if($SM::Conf::USE_RECORD_LIMIT && $SM::Conf::RECORD_LIMIT--<=0);

		# go for the next line if this line couldn't be parsed
		next if (SM::Tools::parse_line($_) != 0);

		# if this is the first run we remember the starttime
		# based on the timestamp of the first entry.
		# will be used for RRD statistics.
		if($SM::GV::firstrun && $SM::Conf::DO_RRD_STAT) {
			# save the initial timestamp
			SM::RRD::init($log_record{timestamp});
			# unmark the firstrun variable
			$SM::GV::firstrun=0;
			# save the start time as global variable
			$SM::GV::system_start_time=$log_record{timestamp};

			# print debug information if enabled
			print(STDERR "debug starttime set to ", 
					$SM::GV::system_start_time, "\n") if ($SM::Conf::DEBUG>3);
		}

		# skip that entry if the ip address is defined
		# as a member of the white list (don't scrutinize them)
		next if (exists($SM::Conf::WHITELIST{$log_record{ip}}));

		# start benchmarking 'hash_key'
		$SM::GV::perf{hash_key}{start} = gettimeofday if ($SM::Conf::BENCHMARK);

		# create "hash key"
		my $h_key=SM::Tools::ip_to_nr($log_record{ip})%$SM::Conf::HASH_TBL_SIZE;

		# stop benchmarking 'hash_key'
		$SM::GV::perf{hash_key}{sum}+=gettimeofday-$SM::GV::perf{hash_key}{start}
			if ($SM::Conf::BENCHMARK);

		# get a reference to the base element 
		my $head=\@{$SM::HashTable::htbl[$h_key]};

		# cur is now the first element of the LL
		my $cur=$head->[NEXT];
	
		# for every element in the list
		my $h_elements = $SM::HashTable::htbl[$h_key][VAL];

		# start benchmarking 'linked list'
		$SM::GV::perf{ll}{start} = gettimeofday if ($SM::Conf::BENCHMARK);

		# loop thru the hole list
		while($cur != $head) {	
			

			# check if an ip entry wasen't update for the defined timeout 
			if($log_record{timestamp}-$cur->[VAL]->{last_ts}>$SM::Conf::TIMEOUT_IP)
			{

				# print debug information if enabled
				print "DELETE $cur $log_record{ts} ip: " . $cur->[VAL]->{ip} . 
					  "\t del entry\n" if ($SM::Conf::DEBUG>1); 

				# remove the record
				$cur = SM::HashTable::del_entry($h_key, $cur);

			# check if we know this ip source already
			} elsif ($cur->[VAL]->{ip} eq $log_record{ip}) {
		
				# check wheter there are more than
				# on hit by this host in a second
				# (only do the calculations once
				# per second)
				if($cur->[VAL]->{last_ts}==$log_record{timestamp}) {

					# if a hit occures in the same second as the last request
					# to this ip occured, just count it.
					$cur->[VAL]->{hits_per_second}++;
				} else {

					# scrutinize current element
					SM::Scrutinizer::scrutinize($cur);
				}
	
	
				# if this element isn't on the first position
				# move it to the front of the LL. this should
				# speed up processing
				if($cur->[PREV] != $head) {
	
					# save content of record
					$move_node = $cur->[VAL];
	
					# move entry to the front of the LL
					SM::HashTable::move_entry($h_key, $cur);
				}

				# store the value 
				$node = $cur->[VAL];

				# go to the end of the list
				$cur = $head;

				# mark node as existing
				$existing_node=1;
	
			} else {
				# move on... (next element)
				$cur=$cur->[NEXT];
			}
		}
		
		# stop benchmarking 'linked list'
		$SM::GV::perf{ll}{sum}+=gettimeofday-$SM::GV::perf{ll}{start}
			if ($SM::Conf::BENCHMARK);

		# start benchmarking 'update record'
		$SM::GV::perf{update_rec}{start} = gettimeofday
			if ($SM::Conf::BENCHMARK);

		# check if node is existant
		if(!$existing_node) {

			# create a new node
			$node = {
					"ip"			=> $log_record{ip},
					"last_ts"		=> $log_record{timestamp},
					"hits_per_second"=> 1,
					"time_periodes"	=> 0,
					"req_st"		=> 0,
					"req_lt"		=> 0,
					"max_ma"		=> 0,
					"max_hit_ratio" => 0,
					"max_uri_hits"	=> 0,
					"alert"			=> 0,
					"chi"			=> -1,
			};
		} 

		# save the requested URI
		my $uri = $log_record{request};

		# increase the hit counter of the node.
		$node->{count_sum}++; 

		# increase the hit counter of this uri
		$node->{e}{$uri}{hits}++;

		# increase the method counter for the
		# used method of this request. this will store
		# how many request returned 2xx,3xx,4xx,...
		# status codes. 
		$node->{methods}{int($log_record{code}/100)}++;

		# increase the hitcount of a given timestamp
		$node->{ts}{$log_record{timestamp}}++;

		# count the "different" browsers. this could give
		# a hint for a proxy.
		$node->{browser}{$log_record{browser}}++;

		# do a statistic on the supplemented parameters 
		# to the uri. this is currently not evaluated 
		# because it can be fooled very easy. however
		# this can be an interesting information for
		# the administrator to decide if the client
		# has a normal behavior. 
		if($log_record{param} ne "") {
			$node->{e}{$uri}{param}{$log_record{param}}++;
		} else {
			$node->{e}{$uri}{param}{'_no_param_'}++;
		}
	
		# do the same for the referer. currently also
		# not rated.	
		if($log_record{referer} ne "") {
			$node->{e}{$uri}{referer}{$log_record{referer}}++;
		} else {
			$node->{e}{$uri}{referer}{'_unknown_'}++;
		}
		
		# extract the file extension from the request
		if($uri=~m/(\.(\w+)|(\/))$/) {
			my $match=lc($2||$3);

			# store it to our log_record
			if(exists($SM::Conf::FILETYPES{$match})) {
				$node->{e}{$uri}{file_type}=$SM::Conf::FILETYPES{$match};	
			} else {
				# print a warning
				print "warn don't know $match\n" if ($SM::Conf::DEBUG==10);

				# take the DEFAULT value 
				$node->{e}{$uri}{file_type}=$SM::Conf::FILETYPES{DEFAULT};	
			}

		} else {
			# if we couldn't detect a file extension at all
			# take the DEFAULT value
			$node->{e}{$uri}{file_type} = $SM::Conf::FILETYPES{DEFAULT}; 
			print "don't know file extension $uri\n" if ($SM::Conf::DEBUG>2);
		}	

		# increase the hit counter of this filetype
		$node->{filetypes}{$node->{e}{$uri}{file_type}}++;

		# stop benchmarking 'update record'
		$SM::GV::perf{update_rec}{sum}+=gettimeofday-$SM::GV::perf{update_rec}{start}
			if ($SM::Conf::BENCHMARK);


		# if the statistics are enabled
		if($SM::Conf::DO_STATISTICS) {

			# do statistic on the different return codes of the server
			$SM::Statistic::stat{codes}[$log_record{code}]++;

			# do statistic on the amount of transfered data 
			my $bytes=$log_record{bytes} eq "-" ? -1 : $log_record{bytes};
			$SM::Statistic::stat{bytes}[int(1+$bytes/1000)]++;

			# do statistic on the methods used
			$SM::Statistic::methods{$log_record{method}}++;

		}


		# update the last seen timestamp of this node
		$node->{e}{$uri}{last_ts} = $log_record{timestamp};


		# if this is not an existing node we have to add it
		if (!$existing_node) {
			$SM::GV::perf{add_rec}{start} = gettimeofday 
				if ($SM::Conf::BENCHMARK);

			SM::HashTable::add_entry($h_key, $node);

			print "$log_record{ts} ip: $log_record{ip}\t 
				   add entry $log_record{timestamp}\n"
						if ($SM::Conf::DEBUG>1); 

			$SM::GV::perf{add_rec}{sum}+=gettimeofday-$SM::GV::perf{add_rec}{start}
				if ($SM::Conf::BENCHMARK);
		}

		# reset the toggle value
		$existing_node=0;


		# if we run in PLAY mode we sleep for a defined time if 
		# a change in the second is detected	
		if($SM::Conf::MODE==PLAY &&
		   $log_record{timestamp}>$SM::GV::last_written_stat) {
			select(undef, undef, undef, $SM::Conf::PLAY_TIMEOUT);
		}

		# do the RRD stuff if we have to		
		if($SM::Conf::DO_RRD_STAT) {

			# count how many requests occured in this period...
			$rrd_requests++;
			# how much data got transfered...
			$rrd_data_transfered+=($log_record{bytes} eq "-")?0:$log_record{bytes};
			# and the hightest alert level	
			if($node->{alert}>$rrd_max_alert_level) {
				$rrd_max_alert_level=$node->{alert};
			}

			# if the next second started, add the record
			if($log_record{timestamp}>$SM::GV::last_written_stat) {
				SM::RRD::update(
					$log_record{timestamp},
					$rrd_requests,
					$rrd_data_transfered,
					$rrd_max_alert_level,
					$SM::GV::nodes_in_system,
					$SM::GV::load_average,	
				);
				# reset the values
				$rrd_requests=$rrd_data_transfered=$rrd_max_alert_level=0;
			}
		}

		# update the timestamp if the last statistic was
		# done at least one second ago
		# do it maximum once per second -> much more performant
		if( $SM::GV::last_written_stat<$log_record{timestamp}) {
			# get the load average of the cpu from
			# the proc file system, but only if RRD_LOAD
			# is enabled
			SM::Scrutinizer::get_load_average() if ($SM::Conf::RRD_LOAD);	
			# update the timestamp of the statistics
			$SM::GV::last_written_stat=$log_record{timestamp};
			# clean up the blacklist and check if one
			# of the hosts on the list can be freed now.
			# do this also once per second
			SM::Blacklist::clean_up($SM::GV::last_written_stat);
		}
	}
	
}
1;
