
Bots section
MIRBOT 1.0
by 
The Mystical Friend  
September 1999
courtesy of Fravia's pages of reverse engineering
I'm happy to present you another kind of perl-bot, that should be a valuable addition 
to our bot-section. I thank Bernd and/or The Mystical Friend  
for this excellent contribution.
Please send all feedback and amelioration proposals to both The Mystical Friend  (mystend(at)yahoo(point)com) and myself, 
I'll publish if worth..
#!/usr/local/bin/perl -I.  ## -I. stands for Include . (you understand later why)
## This is a very special kind of a Robot. You can start
## ---------------------------------------------------------------
##                    M I R B O T   1. 0
## ---------------------------------------------------------------
## this one from a free cgi-bin service like virtualave or netfirms remotly
## with a browser. You need only a start_cgi script, which opens a new shell
## e.g.    #!/usr/bin/perl -I.
##         system(./bot.cgi?xyz); #xyz stands for the parameters
## So and here is how it works:
##         You give the bot an URL (e.g. http://129.105.116.5/Fravia/index.html),
## Username and Password for a FTP-Account, and an E-mail Adress for statisical
## reasons. First the bot downloads the index.html, then he parses the doc. and
## after that, he loads the docoment up to the Ftp Server. This can be
## used for automatical mirroring. When the bot has finished, he sends you an
## E-mail (over sendmail). This Mail tells you about the errors, how many and 
## which kind of files has been uploaded.
## Installation: Upload all standard perl modules into the cgi-bin Directory,
## then upload the bot, and finally upload a little File and rename it to "live"
## (read the source and the comments why).
##
## Hmm the bot worked fine after programming; He is tested
## well, and no bugs has been found.
## Tips for newbies: Learn Perl, and you can write bots like this one.
##
##                              The Mystical Friend    mystend@yahoo.com
 
 
   
###########################################################################
#Include of packages
@INC = ($INC[0], $INC[1], $INC[2]);
# use CGI::Carp qw(fatalsToBrowser); # uncomment this, if the bot causes an Internal Server Error 500.
use strict;
use vars qw($FtpServer $Username $Password $Site2mirror $Email);
use LWP::RobotUA;  
use URI::URL;
use HTML::TreeBuilder;
use Net::FTP;
use File::Basename;
print "Content-type: text/html\n\n
"; 
###########################################################################
#Start of main routine
package main;
$| = 1;
print "\n\nScript $0 successfully initialized.\n\n";
print "Content-type: text/plain\nPragma: no-cache\n";
print '_' x 100;
print "@INC \n";   #only for debugging reasons, just to show you the Include pathes. If you are using the bot on a free service, you have to upload the whole modules-directroy to get it work
#($FtpServer, $Username, $Password, $Site2mirror, $Email) = GetFormData();  # you can use this, if you want to start the bot with a HTML
                                                                             ##page (POST!). This works with URL-Coding.
                                                                            ## e.g. bot.cgi?FtpServer .....
($FtpServer, $Username, $Password, $Site2mirror, $Email) = ("ftp.server", "username", "password", "site2mirror", "email\@adress.com");
my $domain = URI::URL -> new ($Site2mirror) -> host();
###########################################################################
#Initialization of FTP-connection
#print "\n\nInitializing FTP-connection to $FtpServer ..... ";
my $FtpAccess = Net::FTP -> new($FtpServer, Timeout => 200) || print
"ERROR while connecting to $FtpServer, $!"; 
$FtpAccess->login($Username, $Password) || print "LOGIN ERROR";
$FtpAccess->cwd("public_html") || print "Directory ERROR";
my @filelist = $FtpAccess -> dir();
print "DONE \n";                                                              t
###########################################################################
#Initialization of User Agent as RobotUA
print "Initializing robot ...... ";
my $MirBo = new LWP::RobotUA ("Mirror-Bot\/1\.0", "MirBo\@hotmail\.com"); # Change Mirror-Bot\/1/.0 to something like Mozilla
$MirBo -> delay(0,15); 
print "DONE. \n";
###########################################################################
#Initialization of global variables
#my %SEEN = (); # For Hash mode. But I think an Array is better.
my @URLs = ($Site2mirror);
my @SEEN = ($URLs[0]);
# $SEEN{$URLs[0]} = $URLs[0];
my ($TotalRequests, $HTMLDocs, $Binaries, $Uploads, $TotalBytes, $Errors);
print "\nStart main recursive routine ..... \n\n";
recursiv ();
print "\nMain routine finished.\n\n";
###########################################################################
#Some statistics
$Errors = $TotalRequests - ($HTMLDocs + $Binaries);
Sendmail ("$TotalRequests requests done \(Binaries: $Binaries, Docs: $HTMLDocs, Errors: $Errors\)\n\n $Uploads files retrieved and uploaded. \($TotalBytes bytes\).\n\n");
###########################################################################
#Program exit
print "Exit.... \n\n";
$FtpAccess -> quit();
exit 0;
###########################################################################
#Begin of main routine
sub recursiv {
    foreach (@URLs) {
        open(LIVE, "header("Content-Type"), $response->header("Content-Length"));	
	my $unchanged = grep {/$cl.{1,}$file/s} @filelist;
	print "\t$URL is being requested..... ";
      
      
   # debugging starts here
      #     open(ERRORF, ">>errors") || print "Cannot make a log-File";
      #     print ERRORF $response->code();
           
           
      #     close(ERRORF); #debugging ends here
           
             
            if ($response->is_success() && $ct =~ "text\/html" || $ct =~ "text\/plain") {
	    $HTMLDocs++;
	    print "SUCCESSFUL. \n";
	    my $response = SendRequest ("GET", $URL);		    
    	    my $doc = LinxParser ($response->content(), $URL);
	    
	    SaveDoc ($file, $doc);
	    FTPUpload ("ascii", $file, $cl);
	}
	elsif ($response->is_success() && $ct !~ "text\/html" && $ct !~ "text\/plain") {
	    $Binaries++;
	    print "SUCCESSFUL. \n";
	    	    
	    unless ($unchanged) {
		my $response = SendRequest ("GET", $URL, $file);	
	       	FTPUpload ("binary", $file, $cl);
	    } else {
	   	print "\t\t> We already have it!\n\n"; }
	}
	elsif ($response->is_error()) {
	    print "ERROR. \n";
	    print "\t\t> File not found!\n\n";
	}
	
    }
}
###########################################################################
#GetFilename routine
sub GetFilename {
    my $URL = shift;
    my $path = URI::URL -> new ($URL) -> path();
    $path =~ s,/$,/index.html,g;
    $path =~ s,^/,,g;    
    my $file = basename($path);
    
    
    
    return $file; 
}
###########################################################################
#FTPUpload routine
sub FTPUpload {
    my ($method, $file, $bytes) = @_;
    print "\t\t> Uploading $file via FTP to $FtpServer ..... ";
    $FtpAccess -> $method();
    $FtpAccess -> put ($file, $file) || print "ERROR while trying to upload $file";
    unlink $file;
   print "DONE. \n\n";
    $TotalBytes += $bytes;
    $Uploads++;
 }
###########################################################################
#SaveDoc routine
sub SaveDoc {
    my ($file, $doc) = @_;
    print "\t\t> Saving HTML-DOC $file ..... ";
    open (FILE, ">$file");
    print FILE "$doc";
    close (FILE);
   print "DONE. \n"; 
}
###########################################################################
#SendRequest routine
sub SendRequest {
    $TotalRequests++;
    my ($type, $URL, $file) = @_;
    my $req = HTTP::Request->new($type,$URL);
    my $res = $MirBo->request($req, $file);
    
    
    return $res;
 }
###########################################################################
#Linx-parser routine
sub LinxParser {
    my ($doc, $URL) = @_;
    print "\t\t> Substituting absolute links .....  ";
    my $tree = HTML::TreeBuilder->new->parse($doc);
    for (@{$tree->extract_links(qw/a area img/)}) {
	my $l = URI::URL->new($_->[0]);
    
	unless ($l =~ /javascript:/ || $l =~ /mailto:/) {
	    (my $s = $l->abs($URL)) =~ s/#.*//g;
	    my $host = URI::URL -> new ($s) -> host;
	    
             if ($host eq $domain) {
		# my @ALREADYSEEN = $SEEN {$s}; # this is for the Hash-Mode.
		my @ALREADYSEEN = grep { /$s/ } @SEEN;
                push (@URLs, $s) unless @ALREADYSEEN;			
		push (@SEEN, $s);
                # $SEEN {$s} = $s;  # Hash-Mode		
		
                my $linkfile = GetFilename($l);
	      if ($l =~ /:\/\// || $l =~ /www./) {
        	$doc =~ s/\=\"$l/\=\"$linkfile/;
	        }
           }
	}
    }
    $tree->delete;	
    print "DONE. \n";
    return $doc; 
}
###########################################################################
#Sendmail routine
sub Sendmail {
    my $BODY = shift;
    
    open(SENDMAIL, "|/usr/sbin/sendmail -t") || print "ERROR while opening sendmail\n";
    print SENDMAIL "From: Bill Gate\$ \n";       # Of course you can change this to what you want
    print SENDMAIL "To: Mirbot <$Email>\n";
    print SENDMAIL "Subject: Re:Status report\n\n";
    print SENDMAIL "$BODY";
    close(SENDMAIL);
}
###########################################################################
#Get_form_data routine
 
sub GetFormData {
    my ($buffer, @pairs, @FORM, $name, $value) = ();
    $buffer = $ENV{'QUERY_STRING'};
    @pairs = split(/&/, $buffer);
    foreach (@pairs) {
	my ($name, $value) = split(/=/, $_);
	$value =~ tr/+/ /;
	$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
	$value =~ s///g;	
	push (@FORM, $value);	
    }
    return @FORM;
}
#The end
###########################################################################


homepage 
links 
anonymity 
+ORC 
students' essays 
academy database 
bots wars
antismut 
tools 
cocktails 
javascript wars 
search_forms 
mail_Fravia 
Is reverse engineering illegal?