# This is the configuration file of the analysis engine
# (scrutinizer) and the visualisation tool (scc, srrdv).


######## START - DON'T TOUCH SECTION  #########
package SM::Conf;
use SM::Defs;
use strict;
require SM::Parameters;

our %WHITELIST;
our %ALERT_CMD; 
our %CMD;
our %FILETYPES;
########## END - DON'T TOUCH SECTION ##########

########## START - MAIN USER CONFIG ###########
# the place where the hole software is installed.
# please without trailing '/'. 
#our $BASE_DIR="/usr/local/scrutinizer";
our $BASE_DIR=".";

# the mode the program is runned in (DEAD|LIVE|PLAY)
# DEAD -> used to train the system and to analyze old logfiles
# LIVE -> used with File::Tail to continiously read the logfile.
#         you can also archive this in DEAD mode when you start
#         strutinizer as follows:
#         tail -f <logfile> | ./scrutinizer
#         this will also have the same effect
# PLAY -> modus with some "sleep" between requests. good
#         for debugging and to watch what was going on 
#         in old logfiles with the visualization tools
our $MODE=DEAD;

# the logfile format
# 1 => combined
# 0 => common
our $LOG_FORMAT=1;

# train the system?
# if you set this value to 1 you have to enable also
# DO_STATISTICS. in training mode a new Parameters.pm
# file is generated in the ./SM directory which
# holds the parameters for the fitted functions.  
our $DO_TRAINING=0;

# do the statistics? Has to be enabled in trainings mode. 
# you can do statisctics without training, when you just
# interested how your fitted functions describes the
# current behaviour of the clients on your system 
our $DO_STATISTICS=1;


# decide if you want to execute the alert commands
# when an alert is detected
our $DO_ALERT_CMD=0;

# insert ip addresses which are on a whitelist.
# this means that the requests of this ips aren't
# used to train the system and don't get rated 
$WHITELIST{"127.0.0.1"}="example entry 1";
$WHITELIST{"127.0.0.2"}="example entry 2";

# the logfile to analyze if not spezified on command line 
our $DEF_LOGFILE	= "/var/log/apache2/access_log";

# do RRD statistics (which means you have to have
# RRDs.pm installed -> rrdtool).
# this statistic stores the systems state over a
# short time with high resolution and over a very
# long time with a raw resolution.
our $DO_RRD_STAT=0;

# if you enable DO_RRD_STAT, this value has currently
# to be on, or else a bug will occur in the srrdv tool.
# please just leaf it as it is right now, thanks  
our $RRD_LOAD=$DO_RRD_STAT;

# Debug output? higher number, more output...
our $DEBUG=0;

# do you have Fail::Tail installed?
our $HAVE_FILE_TAIL=0;

# enable this if you want to stop processing
# a logfile after $RECORD_LIMIT records.
# useful if you have very very big logfiles.
our $USE_RECORD_LIMIT=0;
our $RECORD_LIMIT=200000;

# external executables.
# adjust this to where you have gnuplot installed. 
$CMD{gnuplot}='/usr/bin/gnuplot';

########### END - MAIN USER CONFIG ############


############ START - ALERT CONFIG #############

# the different alert level stages.
# LEVEL0 : will not generate actual alerts, but
#          writes a logfile, that you can see which
#          requests generated almost an alert.
# LEVEL1 : will generate an alert with the defense
#          mechanisms defined on LEVEL1
# LEVEL2 : the same for LEVEL2 as on 1 

our $ALERT_LEVEL0 = 0.7;
our $ALERT_LEVEL1 = 1.0;
our $ALERT_LEVEL2 = 2.0;

# if you enable this, the actual content of the
# client's data scructure will be dumped when a 
# a alert is generated in the LOG directory
our $ALERT_DEBUG_INFO=1;

# Here you can add commands that get called when a alert
# occurs. You can define commands for two different alert
# stages. For each stage you can add es many commands as
# you like. Below you may define commands that do something
# when a client gets unblocked.

# commands that gets called when a alert occurs begin with
# the prefix:
# -> $ALERT_CMD{BAN}{__X__}{CMDS}
# where X is has the value LEVEL1 or LEVEL2
#
# for commands that gets called on unbaning the prefix is
# -> $ALERT_CMD{UNBAN}{__X__}{CMDS}
# where X is has the value LEVEL1 or LEVEL2
#
# the suffix you can add, consists of two parts separated
# by a '='
#
# part I  -> {'description of what gets called'}
# =
# part II -> 'command that gets executed'
#
# have a look at the examples below :-)

# as you have to write the config file in perl syntax (and
# you may probably don't like this), you have atleast all the
# advantages of perl. Here the pipe to the apache module
# gets defined that it can be used in the commands.
my $APACHE_PIPE="$BASE_DIR/run/bad_ip"; 

# for the commands you can use the following parameters
# which get supstituted with the actual values when 
# they are executed:
# %i -> ip address of offender
# %s -> time that he gets blocked
# %d -> debug data structure of all requests of the client

$ALERT_CMD{BAN}{LEVEL1}{CMDS}{'echo'}='echo block %i on level 1 for %s seconds';
#$ALERT_CMD{BAN}{LEVEL1}{CMDS}{'apache'}='echo "%i %s" > ' . $APACHE_PIPE;
#$ALERT_CMD{BAN}{LEVEL1}{CMDS}{'inform admin'}= 'echo -en "%t block %i on apache for %s sec\n\n%d" | mail -r scrutinizer@your.domain -s "scrutinized %i" you@your.domain';

$ALERT_CMD{BAN}{LEVEL2}{CMDS}{'echo'}='echo block %i on level 2 for %s seconds';
#$ALERT_CMD{BAN}{LEVEL2}{CMDS}{'inform admin'}='echo %t block %i on iptables for %s sec | mail -r scrutinizer@your.domain -s "scrutinized %i" you@your.domain';

$ALERT_CMD{UNBAN}{LEVEL1}{CMDS}{'echo'}='echo unblock %i on level 1';
#$ALERT_CMD{UNBAN}{LEVEL1}{CMDS}{'apache'}='echo %i > ' . $APACHE_PIPE;
#$ALERT_CMD{UNBAN}{LEVEL1}{CMDS}{'inform admin'}='echo %t UNblock %i on apache | mail -r scrutinizer@your.domain -s "unblock %i" you@your.domain';

$ALERT_CMD{UNBAN}{LEVEL2}{CMDS}{'echo'}='echo unblock %i on level 2';
#$ALERT_CMD{UNBAN}{LEVEL2}{CMDS}{'inform admin'}='echo %t UNblock %i on iptables | mail -r scrutinizer@your.domain -s "unblock %i" you@your.domain';


# set the default timeout, for howlong a user
# gets blocked on LEVEL1 and LEVEL2
$ALERT_CMD{BAN}{LEVEL1}{DEFAULT_TIMEOUT}=300;
$ALERT_CMD{BAN}{LEVEL2}{DEFAULT_TIMEOUT}=600;

############# END - ALERT CONFIG ##############



######## START - SYSTEM TUNING SECTION ########

# number of elements stored in Hashtable.
# A bigger number will increase speed but also memory usage
our $HASH_TBL_SIZE=10;

# time that a ip record stays at least in the system when
# no more requests are comming from a this ip address.
# if you didn't read the documentation, you shouldn't probably
# touch this values :-)
our $TIMEOUT_IP=10;
our $TIMEOUT_URI=20;

# alpha value for the exponential weighted moving average
# of the requests.
# ST = short time, which means an average in which the most
# current value will have a big wight -> 0.5 = 50%
our $ALPHA_ST = 0.5;
# LT = long time, where the history counts more (1-0.1) = 90%
our $ALPHA_LT = 0.1;

# print out a benchmark report at the end, which shows where
# the cpu time was used in the code...
our $BENCHMARK=0;

# where the boundary is, where we switch from the simple
# weighted moving average to the exponential moving average.
our $WMA_2_EWMA=6;

# how fast should the play mode run. the system sleeps
# for $PLAY_TIMEOUT when the next timestep occurs. 
our $PLAY_TIMEOUT="0.01";

# max time we sleep on the logfile until
# we finaly try to read new data. just used
# with File::Tail. don't edit if you didn't
# read the man page of File::Tail
our $FH_MAXINTERVAL=1;

######### END - SYSTEM TUNING SECTION #########



#### START - PATH & PID FILE SYSTEM SECTION ###

# the directory where all the statistic logfiles
# are stored.
our $LOG_DIR="$BASE_DIR/LOG/";

# the prefix of the alert logfiles.
# a _I, _II, _III will be appended depending
# on the alert level.
our $ALERT_LOG_PREFIX="$BASE_DIR/LOG/alerts.log";

# the pid file for scrutzinizer
our $PID_FILE="$BASE_DIR/run/scrutinizer.pid";

# the two pipes for the visualisation toolkit
# to communicate with scrutinizer
our $PIPE_HANDLE_S2G="$BASE_DIR/run/scrut2gui";
our $PIPE_HANDLE_G2S="$BASE_DIR/run/gui2scrut";

# home of the RRD database
our $RRD_DB="$BASE_DIR/LOG/scrutinizer.rrd";

##### END - PATH & PID FILE SYSTEM SECTION ####


######### START - FILETYPE SECTION #########

# here you can add filetypes that you have
# on your pages. they will be used to differentiate
# between dynamic and static content and pictures.

$FILETYPES{"php"}=DYNAMIC;
$FILETYPES{"cfm"}=DYNAMIC;
$FILETYPES{"asp"}=DYNAMIC;

$FILETYPES{"gif"}=PICTURE;
$FILETYPES{"jpg"}=PICTURE;
$FILETYPES{"png"}=PICTURE;
$FILETYPES{"ico"}=PICTURE;

$FILETYPES{"/"}=STATIC;
$FILETYPES{"css"}=STATIC;


# if the filetype couldn't be determined
$FILETYPES{DEFAULT}=STATIC;

########## END - FILETYPE SECTION ##########


########## START - RATING SECTION ##########

# decide which tests you want do run
our $DO_REQUEST_RATIO=1;
our $DO_PERIODES=1;
our $DO_TIME_DIST=1;
our $DO_URI_SPREADING=1;
our $DO_RET_STATUS=1;
our $DO_FILE_TYPE=0;

# here you can configure how much weight you
# want to give to every single test.
# all the rating function output a value between
# 0 and 1. Now its up to you to configure the
# system that it gives values, for example
# between 0.5 to 2.
# the base values, give the rating for the least
# suspicious behaviour. the variable stretch,
# gives then the range in which the client 
# influences the total rating value, based on
# its behaviour. for the given example above,
# the values would be base=>0.5, stretch=1.5.

# the entries which have a max value are a bit
# special. when you did the training, have a look
# at the generated graphics. for all the grphics with
# a f(x) function this max value needs to be set.
# the max values cuts the graph in two parts.
# the left: this is the part where you accept
# that this is the normal behaviour on your server.
# the right: in this part, only a few events
# occured and they are to much scattered
# from the other events.
# hint: set the max value at the point, where
# the gradient of the curve reached almost zero. this
# is the case when the cdf value rearly reached 1. 
# the idea behind this. at the max point the
# function curve is point rotated 180 degree around
# the maxpoint. have a look at the documentation!
# this way events around the max point, will have
# a value of about 1. events that are farther on
# the right side of the maxpoint, will experience
# a hight increase in the alert value. 
 
our %f_request_ratio = (
	base	=>	0.5,
	stretch	=>	0.5,
	max		=> 60.0,
	p		=> $SM::Parameters::param_alert{request_ratio},
);

our %f_periodes = (
	base	=>	0.4,
	stretch	=>	0.6,
	max		=> 300.0,
	p		=> $SM::Parameters::param_alert{periodes},
);

our %f_time_dist = (
	base	=>	0.9,
	stretch	=>	0.2,
	p		=> $SM::Parameters::param_alert{time_dist},
);


our %f_uri_spreading = (
	base	=>	0.6,
	stretch	=>	0.5,
	max		=> 60.0,
	p		=> $SM::Parameters::param_alert{uri_spreading},
);

# logbase is used to rate 4xx return status codes from
# apache. take a look at the source code or the documentation.
# a highter value for logbase, gives a smaller increase
# for the alert value.

our %f_ret_status = (
	base	=>	0.6,
	stretch	=>	0.4,
	logbase	=>  10, 
);

our %f_file_type = (
	base	=>	0.6,
	stretch	=>	0.4,
);
########### END - RATING SECTION ###########

1;
