# Web Audit Library
#
# FILENAME      : wal.py
# CODER         : Simon Roses Femerling
# DATE          : 1/17/2004
# LAST UPDATE   : 11/21/2004
# ABSTRACT      : Python library for making easier the audit of Web/HTTP applications.
#
# - Roses Labs Innovations (RL+I)
# Roses Labs
# http://www.roseslabs.com
#
# Copyright (c) 2003-2004 Roses Labs.
#
# You may not distribute, transmit, repost this software for commercial 
# purposes without Roses Labs written permission. 
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, publish,
# distribute the Software, and to permit persons to whom the Software 
# is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#

import socket
import mimetools
import md5
from random import random, choice, uniform
import string
import re
import codecs
import base64
import cPickle
import os

# For our HTML parser
import htmllib

import formatter

# Import tlslite
try:
    from tlslite.api import *
except ImportError:
    print "Error: No SSL support, please install TLSLITE"    

try:
    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO

__version__ = "0.4"
__author__ = "Simon Roses Femerling"
__mail__ = "sroses@roseslabs.com"

__all__ = [ 'http_init', 'http_new_request', 'http_new_response', 'http_do_request', 'http_fixup_request',
            'http_construct_headers', 'WalNetwork', 'utils_randstr', 'utils_get_dir', 'utils_port_open',
            'utils_lowercase_keys', 'utils_find_lowercase_key', 'utils_split_uri', 'utils_join_uri',
            'utils_save_page', 'utils_split_dir', 'utils_join_tag', 'utils_dmkdir', 'utils_pathjoin',
            'check_custom404', 'server_version', 'encode_base64', 'decode_base64', 'encode_md5',
            'encode_uri_hex', 'encode_uri_randomhex', 'encode_anti_ids', 'encode_unicode', 'auth_set',
            'auth_unset', 'auth_brute_force', 'encode_decode_rot13', 'cookie_read', 'cookie_parse',
            'cookie_write', 'cookie_get', 'cookie_set', 'get_page', 'get_page_to_file', 'brute_url',
            'WalParser', 'WalSerializer', 'YES', 'NO' ]

#############################################################################################
# Defines
#############################################################################################

YES = 1
NO  = 0
    
HTTP_PORT  = 80
HTTPS_PORT = 443

TCP = 1
UDP = 2

_UNKNOWN = 'UNKNOWN'

#############################################################################################
# Variables
#############################################################################################

# For internal use only!
Options = {
    'use_ssl'       : NO,
    'debug'         : YES,
    'debug_file'    : 'debugfile.txt'
    }    

#############################################################################################
# Our Functions
#############################################################################################

###############################################
#
# Socket Engine Functions
#
###############################################

# FUNC     : class WalNetwork
# PARAMS   : ...
# RETURN   : ...
# ABSTRACT : Wal Network class
class WalNetwork:

    # FUNC     : __init__
    # PARAMS   : proto, timeout, dossl
    # RETURN   : ...
    # ABSTRACT : Init function of the class
    def __init__(self, proto = TCP, timeout = 10, do_ssl = NO):
        self.proto = proto
        self.timeout = timeout
        self.dossl = do_ssl
        self.sock_fd = None
        self.dobind = NO
        self.bind_host = ''
        self.bind_port = 33133

        self.ssl_con =  None

    # EOF: def __init__

    # FUNC     : open_socket
    # PARAMS   : ...
    # RETURN   : YES on sucess, NO on failure
    # ABSTRACT : Open a TCP or UDP socket
    def open_socket(self):
        if self.proto == TCP:
            self.sock_fd = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        elif self.proto == UDP:
            self.sock_fd = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        else:
            return NO # Invalid proto, so error!
    
        if self.dobind == YES:
            #print self.bind_host
            #self.h = socket.inet_aton(self.bind_host)
            #print self.bind_port
            #print self.h
            self.sock_fd.bind((self.bind_host, self.bind_port))

        return YES # Success!
    # EOF: def open_socket    

    # FUNC     : connect_socket
    # PARAMS   : host, port
    # RETURN   : return the socket
    # ABSTRACT : Connect to host on port
    def connect_socket(self, host, port):        
        self.sock_fd.settimeout(self.timeout)
        er = self.sock_fd.connect_ex((host,port))
        if self.dossl == YES:
            self.ssl_con = TLSConnection(self.sock_fd)
            self.ssl_con.handshakeClientUnknown() # Unknow SSL client ;)
        return er     
    # EOF: def connect_socket

    # FUNC     : close_socket
    # PARAMS   : ...
    # RETURN   : ...
    # ABSTRACT : Close socket
    def close_socket(self):
        if self.dossl == NO:        
            self.sock_fd.close()
            self.sock_fd = None
        else: # SSL Stuff 
            self.ssl_con.close()
            self.ssl_con.sock.close()
            self.ssl_con =  None
        return    
    # EOF: def close_socket

    # FUNC     : send_socket_data
    # PARAMS   : data
    # RETURN   : send data size
    # ABSTRACT : Send data 
    def send_socket_data(self, data):
        dsize = 0

        if self.dossl == NO:    
            dsize += self.sock_fd.send(data)
        else: # SSL Stuff
            dsize += self.ssl_con.send(data)
        return dsize    
    # EOF: def send_socket_data

    # FUNC     : recv_socket_data
    # PARAMS   : size
    # RETURN   : recived data
    # ABSTRACT : Recive data from socket
    def recv_socket_data(self,size=10000):

        if self.dossl == NO:       
            data = self.sock_fd.recv(size)
        else: # SSL Stuff
            data = self.ssl_con.recv(size)
        return data
    # EOF: def recv_socket_data    

    # FUNC     : socket_makefile
    # PARAMS   : ...
    # RETURN   : socket
    # ABSTRACT :
    def socket_makefile(self):
        fp = None

        if self.dossl == NO: 
            fp = self.sock_fd.makefile('rb', 0)
        else: # SSL Stuff
            fp = self.ssl_con.makefile('rb',0)
            
        return fp
    # EOF: def socket_makefile

    # FUNC     : give_socket
    # PARAMS   : ...
    # RETURN   : socket
    # ABSTRACT : Return the socket
    def give_socket(self):
        if self.dossl == NO:
            return self.sock_fd
        else:
            return self.ssl_con
    # EOF: def give_socket

    # FUNC     : set_bind_data
    # PARAMS   : host, port 
    # RETURN   : ...
    # ABSTRACT : Set port and host for bind to local port   
    def set_bind_data(self, host, port = 0):
        self.dobind = YES
        self.bind_host = host
        self.bind_port = port
    # EOF: set_bind_data

    # FUNC     : set_socket
    # PARAMS   : s
    # RETURN   : ...
    # ABSTRACT : Set socket descriptor
    def set_socket(self, s):
        if self.dossl == NO:
            self.sock_fd = s
        else:
            self.ssl_con = s
    # EOF: def set_socket
    
# EOF: class WalNetwork 

###############################################
#
# Wrappers and Internal funcs
#
###############################################

# FUNC     : def wrap_readline
# PARAMS   : fp
# RETURN   : fp
# ABSTRACT : Internal wapper to readline, used for debugging/logging purposes.
def wrap_readline(fp):
    if not Options.has_key('debug') or \
    Options['debug'] == NO:
        return fp.readline()
    else:
        line = fp.readline()
        try:
            file=open(Options['debug_file'],'a')
            file.write(line)
            file.close()
        except:
            print "Error in wal.py: Unable to write to dump file"

    	return line
# EOF: wrap_readline
        
###############################################
#
# HTTP Engine Functions
#
###############################################

# FUNC     : def http_init
# PARAMS   : ireq (Input REQuest)
# RETURN   : Nothing
# ABSTRACT : Init our HTTP engine.
#            This code has been ported from perl to python. (libwhisker).
def http_init(ireq):

    # Begin filling dict

    ireq['wal'] = {
            'host'                  : 'localhost',
            'port'                  : HTTP_PORT,
            'method'                : 'GET',
            'protocol'              : 'HTTP',
            'uri'                   : '/',
            'uri_prefix'            : '',
            'uri_postfix'           : '',
            'uri_param_sep'         : '?',
            'version'               : '1.1',
            'http_space1'           : ' ',
            'http_space2'           : ' ',    
            'bind_socket'           : NO,
            'bind_host'             : '',
            'bind_port'             : 33133,
            'http_eol'              : '\x0d\x0a',
            'include_host_in_uri'   : NO,
            'proto'                 : TCP,
            'force_close'           : NO,
            'force_open'            : NO,
            'timeout'               : 10,
            'wal_id'                : YES
        }
    ireq['Connection'] = 'Keep-Alive'
    ireq['User-Agent'] = 'Wal/%s' % __version__

# EOF: http_init

# FUNC     : def http_new_request
# PARAMS   : config
# RETURN   : A dict
# ABSTRACT : This function basically 'objectifies' the creation of wal
#            request hash objects. This code has been ported from
#            perl to python. (libwhisker).
def http_new_request(config = {}):
    RET = {}

    http_init(RET)
    for k,v in config.iteritems():
        RET['wal'][k] = v

    return RET
# EOF: def http_new_request    

# FUNC     : def http_new_response
# PARAMS   : ...
# RETURN   : A dict
# ABSTRACT : This function basically 'objectifies' the creation of wal
#            response hash objects. This code has been ported from
#            perl to python. (libwhisker).
def http_new_response():
    RET = {}

    RET['wal'] = {}
    RET['wal']['uri'] = ''

    return RET    
# EOF: def http_new_response    

# FUNC     : http_do_request
# PARAMS   : ireq (Input REQuest), oreq (Output REQuest), config
# RETURN   : Return 0 on success, 1 on HTTP error, and 2 on network error. 
# ABSTRACT : Perform the HTTP request.
#            This code is a mix between perl code (libwhisker) ported to python
#            and python httplib module :)
def http_do_request(ireq, oreq, config=None):
    str = ""
    data_buffer = []            # data buffer holdder
    WA = {}                     # Reference to wal hash
    count_100 = 0               # Hold the number of recived 100 responses (HTTP)
    sock_class = None;          # Handler to our network class

    msg = None
    chunked = _UNKNOWN         # is "chunked" being used?
    length = _UNKNOWN          # number of bytes left in response
    will_close = _UNKNOWN      # conn will close at end of response
    chunk_left = _UNKNOWN      # bytes left to read in current chunk

    # Modify 'wal' hash if necessary!
    if config != None:
        for k,v in config.iteritems():
            ireq['wal'][k] = v

    # Begin creating output request hash, put URI.
    oreq['wal'] = {}
    oreq['wal']['uri'] = ireq['wal']['uri']

    # Pass a reference to WA, its easier to work with!
    WA = ireq['wal']

    # Do Anti-IDS
    if WA.has_key('anti_ids'):
        #copy = ireq
        #copy['wal'] = {}
        #encode_anti_ids(copy,ireq['wal']['anti_ids'])
        #ireq = copy
        encode_anti_ids(ireq,WA['anti_ids'])

    # Create HTTP request and put it into buffer
    if WA.has_key('raw') and WA['raw'] != '':
        reqline = WA['raw']
    else:
        reqline = http_create_req(ireq)   
    data_buffer.append(reqline)

    # Set Wal ID
    if WA.has_key('wal_id') and WA['wal_id'] == YES:
        ireq['WTag'] = "x77x61x6c"

    # Set Debug Option
    if WA.has_key('debug') and WA['debug'] == YES:
        Options['debug'] = YES 
        if WA.has_key('debug_file'):
            Options['debug_file'] = WA['debug_file']
    else:
        Options['debug'] = NO

    # If HTTP Version is not 0.9 (1.0/1.1) put necessary info inside buffer
    if WA['version'] != "0.9":

        # Fill buffer with headers, defined inside ireq, like defaults and add-ons headers!
        wal_str = ""
        for kn,vl in ireq.iteritems():
            if kn == "wal" or kn == '':
                continue
            if kn == "Content-Length":
                wal_str += "%s: " % kn
                wal_str += "%d" % ireq[kn] + WA['http_eol']
                continue
            wal_str += "%s: " % kn
            wal_str += ireq[kn] + WA['http_eol']
        data_buffer.append(wal_str)    
        
        # Fill buffer with extra headers defined in header_data (user defined)
        wal_str = ""
        if WA.has_key('header_data'):
            for k,v in WA['header_data'].iteritems():
                wal_str += "%s: %s" % (k,v) + WA['http_eol']
        data_buffer.append(wal_str)
        
        # Have we some data? put into buffer!
        if WA.has_key('data'):
            wal_str = ""
            wal_str += WA['http_eol']
            wal_str += WA['data']
            data_buffer.append(wal_str)

        # Done with HTTP request creation. Put two end-of-line!
        wal_str = ""
        wal_str += WA['http_eol']
        wal_str += WA['http_eol']
        data_buffer.append(wal_str)
    
    # End, if WA['version'] != "0.9":

    # Do MD5 of HTTP request if requested!
    if WA.has_key('request_fingerprint'):
        wal_str = ""
        for data in data_buffer:
            wal_str += data
        oreq['wal']['request_fingerprint'] = encode_md5(wal_str)

    # Done with the HTTP request, create socket
    if WA.has_key('ssl') and WA['ssl'] == YES:
        sock_class = WalNetwork(WA['proto'],WA['timeout'], WA['ssl'])
        Options['use_ssl'] = YES
    else:        
        sock_class = WalNetwork(WA['proto'],WA['timeout'])

    # Set bind to port if necessary!
    if WA.has_key('bind_socket') and WA['bind_socket'] == YES:
        if not WA.has_key('bind_host') or WA['bind_host'] == '':
            h = ''
        else:
            h = WA['bind_host']
        if not WA.has_key('bind_port'):
            p = 33133
        else:
            p = WA['bind_port']
        sock_class.set_bind_data(h,p)

    # Open socket            
    sock_class.open_socket()

    # Proxy Stuff
    if WA.has_key('proxy_host'):
        h = WA['host']
        if not WA.has_key('port'):
            p = 80
        else:
            p = WA['port']
        if not WA.has_key('proxy_port'):
            pp = 80
        else:
            pp = WA['proxy_port']
        if WA.has_key('proxy_version'):
            pstr = "CONNECT %s:%s HTTP/%s" % (h,p,WA['proxy_version'])
        else:            
            pstr = "CONNECT %s:%s HTTP/1.0" % (h,p)
        pstr += WA['http_eol']
        if WA.has_key('proxy_authorization'):
            pstr += "Proxy-Authorization : Basic " + encode_base64(WA['proxy_user'] + ':' + WA['proxy_pass'])            
            pstr += WA['http_eol']                
        pstr += WA['http_eol']

        # Connect socket to proxy host
        if Options['use_ssl'] == YES and WA.has_key('ssl_port'):
            sock_class.connect_socket(WA['host'],WA['ssl_port'])
        elif Options['use_ssl'] == YES and not WA.has_key('ssl_port'):
            sock_class.connect_socket(WA['host'],HTTPS_PORT)            
        else: # No SSL
            sock_class.connect_socket(WA['host'],WA['port'])

        sock_class.send_socket_data(pstr) # Send data
        resp=sock_class.recv_socket_data() # Read

        if WA.has_key('proxy_data'): # Save proxy response data if user request it!
            oreq['wal']['proxy_data'] = resp
        
        stat=resp.split()[1]  
        if not int(stat) == 200: # Check for HTTP 200 code
            print "Proxy error (HTTP code): " + stat
    
    else: # Not proxy support
        # Connect socket to host:port
        if Options['use_ssl'] == YES and WA.has_key('ssl_port'):
            sock_class.connect_socket(WA['host'],WA['ssl_port'])
        elif Options['use_ssl'] == YES and not WA.has_key('ssl_port'):
            sock_class.connect_socket(WA['host'],HTTPS_PORT)            
        else: # No SSL
            sock_class.connect_socket(WA['host'],WA['port'])

    # Convert data buffer to send trough socket.
    wal_str = ""
    for data in data_buffer:
        wal_str += data

    # dump the HTTP request to the DEBUG_FILE if requested
    if Options.has_key('debug') and \
    Options['debug'] == YES: 
        try:
            file=open(Options['debug_file'],'a')
            file.write('>>>> REQUEST BEGINS >>>>\n')
            file.write(wal_str)
            file.close()
        except:
            print "Error in wal.py: Unable to write to dump file"

    # Send data!
    sock_class.send_socket_data(wal_str)
    
    # read data trough socket
    fp = sock_class.socket_makefile()
      
    # OK, data was send, now read response!
    while 1:    # Get HTTP Response
        version, status, reason = _read_status(fp)
        if status == "100": # Catch 100 continues responses
            count_100 = count_100 + 1    
            oreq['wal']['100_continue'] = count_100
        if status != "100":
            break
        # skip the header from the 100 response
        while 1:
            skip = wrap_readline(fp).strip()
            if not skip:
                break

    reason = reason.strip()
    oreq['wal']['code'] = status
    oreq['wal']['message'] = reason
    
    if version == 'HTTP/1.0':           # HTTP/1.0
        oreq['wal']['version'] = "1.0"
    elif version.startswith('HTTP/1.'): # HTTP/1.1 
        oreq['wal']['version'] = "1.1"   
    elif version == 'HTTP/0.9':         # HTTP/0.9
        oreq['wal']['version'] = "0.9"   
    else:                               # What the hell did we get ??
        oreq['wal']['version'] = version  

    if version == "0.9":        # HTTP/0.9 response
        chunked = 0
        will_close = 1
        msg = HTTPMessage(StringIO())
    else:                       # HTTP/1.x response or something!
        msg = HTTPMessage(fp, 0)

        # Save all the HTTP headers if resquested!
        if WA.has_key('save_headers'):
            oreq['wal']['header_data'] = {}
            oreq['wal']['header_list'] = [] + msg.headers
            for hds in msg.headers:
                s1 = hds.split(": ")    # This is a ':' and a white space!
                oreq['wal']['header_data'][s1[0]] = s1[1]

        # don't let the msg keep an fp
        msg.fp = None

        # are we using the chunked-style of transfer encoding?
        tr_enc = msg.getheader('transfer-encoding')
        if tr_enc and tr_enc.lower() == "chunked":
            chunked = 1
            chunk_left = None
        else:
            chunked = 0

        # Get all Cookies
        mycookies = []
        myc = msg.getheader('set-cookie')
        if myc:
            mycookies.append(myc)
        myc = msg.getheader('set-cookie2')
        if myc:
            mycookies.append(myc)
        oreq['wal']['cookies'] = mycookies

        # will the connection close at the end of the response?
        conn = msg.getheader('connection')
        if conn:
            conn = conn.lower()
            # a "Connection: close" will always close the connection. if we
            # don't see that and this is not HTTP/1.1, then the connection will
            # close unless we see a Keep-Alive header.
            will_close = conn.find('close') != -1 or \
                              ( version != "HTTP/1.1" and \
                                not msg.getheader('keep-alive') ) \
                                and WA['force_open'] != YES

        else:
            # for HTTP/1.1, the connection will always remain open
            # otherwise, it will remain open IF we see a Keep-Alive header
            will_close = version != "HTTP/1.1" and \
                              not msg.getheader('keep-alive') \
                              and WA['force_open'] != YES

        if WA['force_close'] > 0:
            will_close = YES

        # do we have a Content-Length?
        # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
        length = msg.getheader('content-length')
        if length and not chunked:
            try:
                length = int(length)
            except ValueError:
                length = None
        else:
            length = None

        # does the body have a fixed length? (of zero)
        if (status == "204" or            # No Content
            status == "304" or            # Not Modified
            100 <= int(status) < 200):         # 1xx codes
            length = 0

        # if the connection remains open, and we aren't using chunked, and
        # a content-length was not provided, then assume that the connection
        # WILL close.
        if not will_close and \
           not chunked and \
           length is None:
            will_close = 1
    # Done with our big else HTTP/1.x response

    # -> Begin reading data 
    amt = None

    try: # Catch timeout error, this need python 2.3 or later!!  
        data = read(fp, chunked, chunk_left, will_close, length, amt)
    except socket.timeout:
        oreq['wal']['error'] = "Network Error: Timeout"
        oreq['wal']['data'] = data      # Put here what we got!
        return 2    # oops! return network error!
            
    oreq['wal']['data'] = data

    # -> End reading data

    # Close socket
    sock_class.close_socket()    
    sock_class = None
    
    # Are we here? everything is fine :)
    oreq['wal']['error'] = ""
    return 0 # Return success!!
# EOF: http_do_request

# FUNC     : http_create_req
# PARAMS   : ireq (Input REQuest), flag 
# RETURN   : Return the created HTTP request, aka URI.
# ABSTRACT : Create the HTTP URI and return it.
#            This code has been ported from perl to python. (libwhisker).
def http_create_req(ireq, flag=NO):
    wal_str = ""

    if ireq['wal'].has_key('full_request_override'):
        return ireq['wal']['full_request_override']
    else:
        if flag != YES:
            wal_str += ireq['wal']['method']
            wal_str += ireq['wal']['http_space1']

            if ireq['wal']['include_host_in_uri'] > 0:
                wal_str += 'http://'
                if ireq['wal'].has_key('uri_user'):
                    wal_str += ireq['wal']['uri_user']
                    if ireq['wal'].has_key('uri_password'):
                        wal_str += ':'
                        wal_str += ireq['wal']['uri_password']
                    wal_str += '@'
                wal_str += ireq['wal']['host']
                wal_str += ':'
                wal_str += str(ireq['wal']['port'])
          
        wal_str += ireq['wal']['uri_prefix']        
        wal_str += ireq['wal']['uri']
        wal_str += ireq['wal']['uri_postfix']

        if ireq['wal'].has_key('parameters') and ireq['wal']['parameters'] != '':
            wal_str += ireq['wal']['uri_param_sep']
            wal_str += ireq['wal']['parameters']

        if flag != YES:
            if ireq['wal']['version'] != '0.9':
                wal_str += ireq['wal']['http_space2']
                wal_str += ireq['wal']['protocol'] + '/'
                wal_str += ireq['wal']['version']
            wal_str += ireq['wal']['http_eol']    

    return wal_str
# EOF: http_create_req    

# FUNC     : http_fixup_request
# PARAMS   : ireq (Input REQuest)
# RETURN   : Nothing.
# ABSTRACT : Fix the HTTP request, if user didn't. Like "Host" and "Content-Lenght", etc.
#            This code has been ported from perl to python. (libwhisker).
def http_fixup_request(ireq = None):

    if ireq == None:
        return

    if ireq['wal']['uri'] == '':
        ireq['wal']['uri'] = '/'    

    if ireq['wal']['version'] == "1.1":
        if not ireq.has_key('Host'):
            ireq['Host'] = ireq['wal']['host']
        if not ireq['wal'].has_key('Connection'):
            ireq['Connection'] = 'Keep-Alive'

    if ireq['wal'].has_key('data'):
        if not ireq['wal'].has_key('Content-Length'):
            ireq['Content-Length'] = len(ireq['wal']['data'])
        if not ireq['wal'].has_key('Content-Type'):
            ireq['Content-Type'] = 'application/x-www-form-urlencoded'

    if ireq['wal'].has_key('proxy_host'):
        ireq['wal']['include_host_in_uri'] = YES                                
# EOF: def http_fixup_request

# FUNC     : http_construct_headers
# PARAMS   : rhin
# RETURN   : 
# ABSTRACT : 
def http_construct_headers(rhin = {}):
    pass
# EOF: def http_construct_headers

# FUNC     : _read_status
# PARAMS   : fp
# RETURN   : version, status, reason
# ABSTRACT : Read a line, and extract HTTP version, HTTP code, and HTTP message (reason).
#            This code is a modify version of the function in python httplib module.
def _read_status(fp):
        # Initialize with Simple-Response defaults
        line = wrap_readline(fp)
                     
        try:
            [version, status, reason] = line.split(None, 2)
        except ValueError:
            try:
                [version, status] = line.split(None, 1)
                reason = ""
            except ValueError:
                # empty version will cause next test to fail and status
                # will be treated as 0.9 response.
                version = ""
        if not version.startswith('HTTP/'): # The response is 0.9, return the info.
                return "0.9", "200", ""

        return version, status, reason
# EOF: def _read_status

# FUNC     : read
# PARAMS   : fp, chunked, chunk_left, will_close, length, amt
# RETURN   : Return empty string on error, or readed data.
# ABSTRACT : Read data.
#            This code is a modify version of the function in python httplib module.
def read(fp, chunked, chunk_left, will_close, length, amt=None):
    if fp is None:
        return ''

    if chunked:
        return _read_chunked(chunked, chunk_left, fp, amt)

    if amt is None:
        # unbounded read
        if will_close:      # This code can have some trouble with webservers
                            # that do not send a end-of-file for the read() func.!! bug?
            s = fp.read()
        #    print s
            if s.count("</html>)") > 0:
                return s
        else:
            s = _safe_read(fp, length)
        #    print s
        return s

    if length is not None:
        if amt > length:
            # clip the read to the "end of response"
            amt = length
            length -= amt

    # we do not use _safe_read() here because this may be a .will_close
    # connection, and the user is reading more bytes than will be provided
    # (for example, reading in 1k chunks)
    s = fp.read(amt)

    return s
# EOF: read

# FUNC     : _read_chunked
# PARAMS   : chunked, chunk_l, fp, amt
# RETURN   : value
# ABSTRACT : Read HTTP chunked data.
#            This code is a modify version of the function in python httplib module.
def _read_chunked(chunked, chunk_l, fp, amt=None):
    assert chunked != _UNKNOWN
    value = ''

    chunk_left = chunk_l

    # XXX This accumulates chunks by repeated string concatenation,
    # which is not efficient as the number or size of chunks gets big.
    while 1:
        if chunk_left is None:
            line = wrap_readline(fp)
            i = line.find(';')
            if i >= 0:
                line = line[:i] # strip chunk-extensions
            chunk_left = int(line, 16)
            if chunk_left == 0:
                break
        if amt is None:
            value += _safe_read(fp, chunk_left)
        elif amt < chunk_left:
            value += _safe_read(fp, amt)
            chunk_left = chunk_left - amt
            return value
        elif amt == chunk_left:
            value += _safe_read(fp, amt)
            _safe_read(fp, 2)  # toss the CRLF at the end of the chunk
            chunk_left = None
            return value
        else:
            value += _safe_read(fp, chunk_left)
            amt -= chunk_left

        # we read the whole chunk, get another
        _safe_read(fp, 2)      # toss the CRLF at the end of the chunk
        chunk_left = None

    # read and discard trailer up to the CRLF terminator
    ### note: we shouldn't have any trailers!
    while 1:
        line = wrap_readline(fp)
        if line == '\r\n':
            break
        
    return value
# EOF: def _read_chunked

# FUNC     : _safe_read
# PARAMS   : fp, amt
# RETURN   : string
# ABSTRACT : Perform a safe read.
#            This code is a modify version of the function in python httplib module.
def _safe_read(fp, amt):
    s = ''
    while amt > 0:
        chunk = fp.read(amt)
        if not chunk:
            pass
        s = s + chunk
        amt = amt - len(chunk)
    return s
# EOF: _safe_read

# FUNC     : class HTTPMessage
# PARAMS   : mimetools.Message
# RETURN   : ...
# ABSTRACT : A HTTP message class.
#            Ripped from python httplib module.
class HTTPMessage(mimetools.Message):

    def addheader(self, key, value):
        prev = self.dict.get(key)
        if prev is None:
            self.dict[key] = value
        else:
            combined = ", ".join((prev, value))
            self.dict[key] = combined

    def addcontinue(self, key, more):
        prev = self.dict[key]
        self.dict[key] = prev + "\n " + more

    def readheaders(self):
        self.dict = {}
        self.unixfrom = ''
        self.headers = list = []
        self.status = ''
        headerseen = ""
        firstline = 1
        startofline = unread = tell = None
        if hasattr(self.fp, 'unread'):
            unread = self.fp.unread
        elif self.seekable:
            tell = self.fp.tell
        while 1:
            if tell:
                try:
                    startofline = tell()
                except IOError:
                    startofline = tell = None
                    self.seekable = 0
            line = wrap_readline(self.fp)
            if not line:
                self.status = 'EOF in headers'
                break
            # Skip unix From name time lines
            if firstline and line.startswith('From '):
                self.unixfrom = self.unixfrom + line
                continue
            firstline = 0
            if headerseen and line[0] in ' \t':
                # XXX Not sure if continuation lines are handled properly
                # for http and/or for repeating headers
                # It's a continuation line.
                list.append(line)
                x = self.dict[headerseen] + "\n " + line.strip()
                self.addcontinue(headerseen, line.strip())
                continue
            elif self.iscomment(line):
                # It's a comment.  Ignore it.
                continue
            elif self.islast(line):
                # Note! No pushback here!  The delimiter line gets eaten.
                break
            headerseen = self.isheader(line)
            if headerseen:
                # It's a legal header line, save it.
                list.append(line)
                self.addheader(headerseen, line[len(headerseen)+1:].strip())
                continue
            else:
                # It's not a header line; throw it back and stop here.
                if not self.dict:
                    self.status = 'No headers'
                else:
                    self.status = 'Non-header line where header expected'
                # Try to undo the read.
                if unread:
                    unread(line)
                elif tell:
                    self.fp.seek(startofline)
                else:
                    self.status = self.status + '; bad seek'
                break
# EOF: class HTTPMessage

###############################################
#
# Crawler/Spider Engine Functions
#
###############################################


###############################################
#
# Utils Functions
#
###############################################

# FUNC     : utils_randstr
# PARAMS   : drift, chars
# RETURN   : A string
# ABSTRACT : Returns length string specified with the characters specified.
def utils_randstr(drift = 10, chars = None):
    str = ""
    
    if chars == None:
        chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"

    while drift > 0:
        ch = choice(chars)
        str += ch
        drift = drift - 1

    return str        
# EOF: def utils_randstr

# FUNC     : utils_get_dir
# PARAMS   : mydir
# RETURN   : string
# ABSTRACT : Split a URI and give back the dir.
def utils_get_dir(mydir = None):
    w = 0

    if mydir == None:
        return

    w = string.find(mydir,'#')
    if w >= 0:
        mydir = mydir[0:w]

    w = string.find(mydir,'?')
    if w >= 0:
        mydir = mydir[0:w]

    w = string.rindex(mydir,'/')
    if w >= 0:
        mydir = mydir[0:w+1]
    
    return mydir
# EOF: def utils_get_dir    

# FUNC     : utils_port_open
# PARAMS   : host, port
# RETURN   : YES if port is open, NO if port is closed
# ABSTRACT : Check if a port is open in host
def utils_port_open(host = None, port = None):
    
    if host == None and port == None:
        return 0

    sock = WalNetwork(TCP,10,0) # Use TCP, timeout 10, no SSL!
    sock.open_socket()  
    er = sock.connect_socket(host, port)
    if er == 0: # Success!
        sock.close_socket()
        return YES
    else:       # Looser!
        return NO

# EOF: utils_port_open    

# FUNC     : utils_split_uri
# PARAMS   : uri, hr
# RETURN   : list
# ABSTRACT : Split the URI in parts and put them in a list
def utils_split_uri(uri = None, hr = None):
    res = [None, 'http', None, 0, None, None, None, None]
    scheme = None
    netloc = None
    user = None
    passw = None
    query = None
    fragment = None
    t = ""

    if uri == None:
        return None

    i = uri.find(':')
    if i > 0:
        if uri[:i] == 'http': # optimize the common case
            scheme = uri[:i].lower()
            uri = uri[i+1:]
            if uri[:2] == '//':
                i = uri.find('/', 2)
                if i < 0:
                    i = uri.find('#')
                    if i < 0:
                        i = len(uri)
                netloc = uri[2:i]
                uri = uri[i:]
            if '#' in uri:
                uri, fragment = uri.split('#', 1)
            if '?' in uri:
                uri, query = uri.split('?', 1)

    # Parse netloc for user:passwd and :port
    if netloc != None:
        z = netloc.find('@')
        if z >= 0:
            t, netloc = netloc.split('@',1)
            x = t.find(':')
            if x >= 0:
                user, passw = t.split(':',1)
        c = netloc.find(':')
        if c >= 0:
            netloc, res[3] = netloc.split(':',1)

    # Fill data into array
    res[0] = uri
    res[1] = scheme
    res[2] = netloc
    res[4] = query
    res[5] = fragment
    res[6] = user
    res[7] = passw

    if res[3] == 0 and res[1] != None:
        if res[1] == 'http':
            res[3] = 80
        if res[1] == 'https':
            res[3] == 443
        
    if res[3] == 0:
        return res

    if hr != None:
        if res[0] != None:
            hr['wal']['uri'] = res[0]
        if res[1] == 'https':
            hr['wal']['ssl'] = YES
        if res[2] != None:
            hr['wal']['host'] = res[2]
        hr['wal']['port'] = res[3]

        if res[4] != None:
            hr['wal']['parameters'] = res[4]
        else:
            if hr['wal'].has_key('parameters'):    
                del hr['wal']['parameters']
        if res[6] != None:
            hr['wal']['uri_user'] = res[6]
        else:
            if hr['wal'].has_key('uri_user'):
                del hr['wal']['uri_user']
        if res[7] != None:
            hr['wal']['uri_password'] = res[7]
        else:
            if hr['wal'].has_key('uri_password'):
                del hr['wal']['uri_password']

    return res        
# EOF: utils_split_uri    

# FUNC     : utils_lowercase_keys
# PARAMS   : href
# RETURN   : ...
# ABSTRACT : Lower case the headers
def utils_lowercase_keys(href = None):

    if href == None:
        return None

    for k,v in href.iteritems():
        del href[k]
        k = string.lower(k)
        href[k] = v
# EOF: def utils_lowercase_keys

# FUNC     : utils_find_lowercase_key
# PARAMS   : href, key
# RETURN   : None on error, and a list on success
# ABSTRACT : Return a list with the data of the key used to do the search
def utils_find_lowercase_key(href = None, key = None):

    if href == None:
        return None
    if key == None:
        return None

    data =  []    
    key = string.lower(key)
    for k,v in href.iteritems():
        k = string.lower(k)
        if k == key:
            data.append(v)

    if data:
        return data
    else:
        return None
# EOF: def utils_find_lowercase_key    

# FUNC     : utils_join_uri
# PARAMS   : res
# RETURN   : None on error, and on success a string
# ABSTRACT : Join to a string the given splitted URI
#            This code has been ported from perl to python. (libwhisker).
def utils_join_uri(res = []):
    URL = ""

    if res[1] != None:
        URL += res[1] +  ':'

    if res[2] != None:
        URL += "//"
        if res[6] != None:
            URL += res[6]
            if res[7] != None:
                URL += ':' + res[7]
                URL += '@'
        URL += res[2]

    if res[3] > 0:
        nu = 0
        if res[3] == 80 and res[1] != None and res[1] == 'http':
            nu = nu + 1
        if res[3] == 443 and res[1] != None and res[1] == 'https':
            nu = nu + 1
        if not nu:
            URL += ':' + res[3]

    URL += res[0]
    if res[4] != None:
        URL += '?' + res[4]
    if res[5] != None:
        URL += '#' + res[5]

    return URL        
# EOF: def utils_join_uri

# FUNC     : utils_save_page
# PARAMS   : file, hr
# RETURN   : NO on error, YES on success
# ABSTRACT : Save to file the HTML page
def utils_save_page(file = None, hr = None):

    if hr == None and file == None:
        return NO

    if not hr['wal'].has_key('data'):
        return YES

    fp = open(file,"w")
    fp.write(hr['wal']['data'])
    fp.close()

    return YES
# EOF: def utils_save_page

# FUNC     : utils_split_dir
# PARAMS   : uri, mode
# RETURN   : None on error, a list on success
# ABSTRACT : Split the URI into a list of dirs
def utils_split_dir(uri = None, mode = 1):
    uri_split = []

    if uri == None:
        return None

    i = uri.count('/')
    if (i  >=  0):
        for z in range(i):
            str,uri = uri.split('/',1)
            if str != "":
                if mode == 1:       # Put / after dir
                    str += '/'
                if mode == 2:       # Put / in front of dir
                    t = ""
                    t += '/' + str
                    str = ""
                    str = t
                uri_split.append(str)   # mode != 1 or 2, no /
        return uri_split
    else:
        return None
# EOF: def utils_split_dir    

# FUNC     : utils_join_tag
# PARAMS   : name, href
# RETURN   : A string
# ABSTRACT : Join a HTML tag and return it as a string
def utils_join_tag(name = None, href = None):

    if name == None:
        return None

    if href == None:
        return None

    out = "<" + name
    for k,v in href:
        if k == '':
            continue
        out += " " + k
        if v != '':
            out += "\"" + v + "\""     
    out += '>'

    return out
# EOF: def utils_join_tag 

# FUNC     : utils_dmkdir
# PARAMS   : newdir
# RETURN   : ...
# ABSTRACT : Create a directory.
#           Ripped from spike proxy, and spike proxy ripped from
#           http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/82465
def utils_dmkdir(self, newdir):

    """works the way a good mkdir should :)
    - already exists, silently complete
    - regular file in the way, raise an exception
    - parent directory(ies) does not exist, make them as well
    """
    if os.path.isdir(newdir):
        pass
    elif os.path.isfile(newdir):
        raise OSError("a file with the same name as the desired " \
                      "dir, '%s', already exists." % newdir)
    else:
        head, tail = os.path.split(newdir)
        if head and not os.path.isdir(head):
            utils_dmkdir(head)
        #print "_mkdir %s" % repr(newdir)
        if tail:
            os.mkdir(newdir)    
# EOF: def utils_dmkdir

# FUNC     : utils_pathjoin
# PARAMS   : *paths
# RETURN   : A string
# ABSTRACT : Join the path.
#            Ripped from spike proxy
def utils_pathjoin(*paths): 
    temp="" 

    for path in paths: 
        #print "Pathjoin "+path  
        if path!="": 
            if path[0]=="/" or path[0]=="\\": 
                #we are windoze compliant! 
                path=path[1:] 
            temp=os.path.join(temp,path) 
    #if the first was an absolute path... 
    if paths[0][0]=="/": 
        temp="/"+temp 
        #add that back 

    return temp 
# EOF: def utils_pathjoin

###############################################
#
# Code/Encode Functions
#
###############################################

# FUNC     : encode_base
# PARAMS   : str
# RETURN   : base64 encode string
# ABSTRACT : Encode string to base64
def encode_base64(str = ""):
    return base64.encodestring(str)
    
# EOF: def encode_base64

# FUNC     : decode_base64
# PARAMS   : str
# RETURN   : decode string
# ABSTRACT : Decoce a string in base64
def decode_base64(str = ""):
    return base64.decodestring(str)
# EOF: def decode_base64

# FUNC     : encode_md5
# PARAMS   : str
# RETURN   : MD5 encode string
# ABSTRACT : Encode string to MD5
def encode_md5(str = ""):
    m = md5.new()
    m.update(str)
    return m.hexdigest()
# EOF: def encode_md5

# FUNC     : encode_uri_hex
# PARAMS   : str
# RETURN   : encoded string
# ABSTRACT : Encode a URI
def encode_uri_hex(str = ""):
    strt = ""
    con = "%%%02x"
    
    for c in str:
        if c == '/':
            continue
        strt += con % ord(c)

    return strt
# EOF: encode_uri_hex

# FUNC     : encode_uri_randomhex
# PARAMS   : str
# RETURN   : encode string
# ABSTRACT : Encode a URI randomly
def encode_uri_randomhex(str = ""):
    strt = ""
    con = "%%%02x"

    for c in str:
        if c == '/':
            continue
        i = int(uniform(0,10))
        i = i % 2
        if i == 1:
            strt += con % ord(c)
        else:
            strt += c

    return strt
# EOF: encode_uri_randomhex

# FUNC     : encode_decode_rot13
# PARAMS   : str
# RETURN   : encode/decode string
# ABSTRACT : Encode or decode a string in ROT13
def encode_decode_rot13(str = ""):
    table = string.maketrans(
            'nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM',
            'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')

    return string.translate(str, table)
# EOF: def encode_decode_rot13    

# FUNC     : encode_anti_ids
# PARAMS   : rhin, mode
# RETURN   : ...
# ABSTRACT : Do anti-IDS techniques on URI
#            Some of this code has been ported from perl to python. (libwhisker) 
def encode_anti_ids(rhin = None, mode = ""):
    m = 0
    ENCODED = 0
    
    WA = rhin['wal']

    if rhin == None:
        return

    rhin['wal']['uri_orig'] = rhin['wal']['uri']

    # mode 4 - prepend long random string
    if '4' in mode:
        s = ''
        if re.match('^/',WA['uri']):
            while(len(s) < 512):
                x = utils_randstr()
                s += x
            WA['uri']="/" + s + "/.." + WA['uri']

    # mode 7  - (windows) random case sensitivity
    if '7' in mode:
        strt = ""
        t = string.split(WA['uri'],'/')
        i = len(t)
        z = 0
        for str in t:
            for c in str:
                x = int(uniform(0,10))
                x = x % 2
                if x == 1:
                    strt += c.upper()
                else:
                    strt += c
            if z < i-1:
                strt += "/"
                z = z + 1
            WA['uri'] = strt
                
    # mode 2 - directory self-reference (/./)
    if '2' in mode:
        str = WA['uri']
        p = re.compile('/')
        str = p.sub('/./', str)
        WA['uri'] = str

    # mode 8 - windows directory separator (\)
    if '8' in mode:
        str = WA['uri']
        p = re.compile('/')
        str = p.sub('\\', str)
        p = re.compile('^\\')
        str = p.sub('/', str)
        p = re.compile('^(http|file|ftp|nntp|news|telnet):\\',re.IGNORECASE)
        str = p.sub('://', str)
        p = re.compile('\\$')
        str = p.sub('/', str)
        WA['uri'] = str

    # mode 1 - random URI (non-UTF8) encoding
    if '1' in mode:
        if ENCODED == 0:
            WA['uri'] = encode_uri_randomhex(WA['uri'])
            ENCODED = 1

    # mode 5 - fake parameter
    if '5' in mode:
        s = utils_randstr()
        y = utils_randstr()
        WA['uri'] = "/" + s + ".html%3f" + y + "=/../" + WA['uri']   

    # mode 3 - premature URL ending
    if '3' in mode:
        s = utils_randstr()
        WA['uri'] = "/%20HTTP/1.1%0D%0A%0D%0AAccept%3A%20" + s + "/../.." + WA['uri']

    # mode 6 - TAB as request spacer
    if '6' in mode:
        WA['http_space'] = "\t"
    
# EOF: def encode_anti_ids    

# FUNC     : DO NOT WORK
# PARAMS   : 
# RETURN   : 
# ABSTRACT : 
def encode_unicode(str = "", mode = 1):
    pass

    """    
    if mode == 1:
        uni = str.decode('utf-8')
    if mode == 2:
        uni = str.decode('utf-16')

    return uni
    """
# EOF: encode_unicode

###############################################
#
# Auth Functions
#
###############################################

# FUNC     : auth_set
# PARAMS   : method, href, user, passw
# RETURN   : ... 
# ABSTRACT : Set auth header
def auth_set(method = "", href = None, user = "", passw = ""):

    if href == None:
        return

    if user == "" and passw == "":
        return

    if method == 'basic':
        href['Authorization'] = 'Basic ' + encode_base64(user + ':' + passw)  

    if method == 'proxy-basic':
        href['Proxy-Authorization'] = 'Basic ' + encode_base64(user + ':' + passw) 
# EOF: def auth_set

# FUNC     : auth_unset
# PARAMS   : href
# RETURN   : ...
# ABSTRACT : Removes the values set by auth_set()
def auth_unset(href = None):

    if href == None:
        return

    del href['Authorization']
    del href['Proxy-Authorization']
 #   del href['auth_callback']
 #   del href['auth_proxy_callback']
 #   del href['auth_data']
 #   del href['auth_proxy_data']
# EOF: def auth_unset    

# FUNC     : auth_brute_force
# PARAMS   : method, hin, user, passw, fail_code
# RETURN   : None in error, a string on success
# ABSTRACT : Perform a brute force auth
def auth_brute_force(method = None, hin = None, user = "", passw = "", fail_code = 401):
    oreq = {}    
    
    if method == None:
        return None

    if hin == None:
        return None

    if user == "":
        return None

    if passw == "":
        return None

    for P in passw:
        auth_set(method, hin, user, P)
                    
        res = http_do_request(hin, oreq)
        if res != 0:
            return None
        cod = int(oreq['wal']['code'])
        if (fail_code != cod):
            return P
        
    return None
# EOF: auth_brute_force

###############################################
#
# Cookies Functions
#
###############################################

#
# The cookie functions need to be tested!!
# 

# FUNC     : cookie_read
# PARAMS   : jarref, href
# RETURN   : No on failure, number of cookies processed
# ABSTRACT : Read cookies and parse them
#            This code has been ported from perl to python. (libwhisker).
def cookie_read(jarref = None, href = None):
    count = 0

    if jarref == None:
        return NO

    if href == None:
        return NO

    if not href['wal'].has_key('cookies'):    
        return NO

    for c in href['wal']['cookies']:
        cookie_parse(jarref, c)
        count = count + 1
    
    return count
# EOF: def cookie_read    

# FUNC     : cookie_parse
# PARAMS   : jarref, header
# RETURN   : Nothing on error
# ABSTRACT : Parse a cookie and put values into a list
#            This code has been ported from perl to python. (libwhisker).
def cookie_parse(jarref = None, header = None):
    dodel = 0
    part = ""
    parts = []
    construct = ["", "", "", "", ""] # Cookie: value, domain, path, expire, secure
    cookie_name = ""
    hdr = ""

    if jarref == None:
        return
    if header == None:
        return

    hdr = header[0]
    parts = hdr.split(';')

    for part in parts:
        re_str = re.compile('^[ \t]*(.+?)=(.*)$',re.IGNORECASE)
        res = re_str.match(part)
        if res:
            name = res.group(1)
            val = res.group(2)

            if re.match('^domain$', name, re.IGNORECASE):
                p = re.compile('^http://',re.IGNORECASE)
                val = p.sub('',val)
                p = re.compile('/.*$',re.IGNORECASE)
                val = p.sub('',val)
                del construct[1]
                construct.insert(1,val)

            elif re.match('^path$', name, re.IGNORECASE):
                if val != '/':
                    p = re.compile('/$')
                    val = p.sub('',val)
                del construct[2]    
                construct.insert(2,val)

            elif re.match('^expires$', name, re.IGNORECASE):
                del construct[3]
                construct.insert(3,val)
    
            else:
                cookie_name=name;
                if val == '':
                    dodel = 1
                else:
                    del construct[0]
                    construct.insert(0,val)
        else:
            re_str = re.compile('secure',re.IGNORECASE)
            res = re_str.match(part)
            if res:
                del construct[4]
                construct.insert(4,1)

    if dodel == 0:
        if jarref.has_key('cookie_name'):
            del jarref[cookie_name]
    else:
        jarref[cookie_name] = construct
     
# EOF: cookie_parse    

# FUNC     : cookie_write
# PARAMS   : jarref, hin, override
# RETURN   : Nothing on error
# ABSTRACT : Write cookie to header dict
#            This code has been ported from perl to python. (libwhisker).
def cookie_write(jarref = None, hin = None, override = NO):
    name = ''
    out = ''

    if jarref == None:
        return
    if hin == None:
        return

    if not hin['wal'].has_key('ssl'):
        hin['wal']['ssl'] = NO
        
    for name,val in jarref.iteritems():
        if name == '':
            continue
        if hin['wal']['ssl'] == NO and val[4] > 0:
            continue
        if override == YES or (hin['wal']['host'] == val[1].lower() and hin['wal']['uri'] == val[2].lower()):
            out += "name=" + val[0] + ";"

    if out != '':
        hin['Cookie'] = out
# EOF: def cookie_write

# FUNC     : cookie_get
# PARAMS   : jarref, name
# RETURN   : None on failure, cookie on success
# ABSTRACT : Get cookie
#            This code has been ported from perl to python. (libwhisker).
def cookie_get(jarref = None, name = None):

    if jarref == None:
        return None

    if jarref.has_key(name):
        return jarref[name]

    return None
# EOF: def cookie_get     

# FUNC     : cookie_set
# PARAMS   : jarref, name, value, domain, path, expire, secure
# RETURN   : Nothing on error
# ABSTRACT : Construct cookie
#            This code has been ported from perl to python. (libwhisker).
def cookie_set(jarref = None,name = "",value = "" ,domain = "", path = "", expire = "", secure = ""):
    construct = []

    if jarref == None:
        return

    if name == "":
        return

    if value == "":
        del jarref[name]
        return

    if path == "":
        path = "/"

    if secure == "":
        secure = 0

    construct[0] = value
    construct[1] = domain
    construct[2] = path
    construct[3] = expire
    construct[4] = secure
    jarref[name] = construct
# EOF: cookie_set

###############################################
#
# Misc Functions
#
###############################################

# FUNC     : get_pase
# PARAMS   : url, hin
# RETURN   : None and string on failure, code and data on success
# ABSTRACT : Get HTTP code and HTML data from URI
def get_page(url = "", hin = None):
    rptr = None
    req = {}
    resp = {}

    if url == "":
        return None,"No URL supplied"

    if hin != None:
        rptr = hin
    else:
        rptr = req
        http_init(req)

    utils_split_uri(url, rptr)
    http_fixup_request(rptr)

    res = http_do_request(rptr, resp)
    if res != 0:
        return (None,resp['wal']['error'])

    return (resp['wal']['code'], resp['wal']['data'])
# EOF: def get_page    

# FUNC     : get_page_to_file
# PARAMS   : url, filepath, hin
# RETURN   : None on failure, code on success
# ABSTRACT : Save HTML page to file
def get_page_to_file(url = "", filepath = "", hin = None):
    rptr = None
    req = {}
    resp = {}

    if url == "":
        return None
    if filepath == None:
        return None

    if hin != None:
        rptr = hin
    else:
        rptr = req
        http_init(req)    

    utils_split_uri(url, rptr)
    http_fixup_request(rptr)

    res = http_do_request(rptr, resp)
    if res != 0:
        return (None,resp['wal']['error'])

    fp = open(filepath,"w")
    fp.write(resp['wal']['data'])
    fp.close()
    
    return resp['wal']['code']
# EOF : get_page_to_file

# FUNC     : brute_url
# PARAMS   : hin, pre, post, arin, arout
# RETURN   : Nothing on failure
# ABSTRACT : Perform a URI brute force
def brute_url(hin = None, pre = None, post = None, arin = None, arout = None):
    U = ""
    hout = {}

    if hin == None:
        return
    if arin == None:
        return
    if arout == None:
        return
    if pre == None:
        return
    if post == None:
        return

    http_fixup_request(hin)

    for U in arin:
        ur = {'uri': pre+U+post}
        res = http_do_request(hin, hout,ur)
        if res == 0:
            cod = int(hout['wal']['code'])
            if (cod == 200) or (cod == 403):
                arout.append(U)

# EOF: def brute_url      

# FUNC     : check_custom404
# PARAMS   : data, sig
# RETURN   : NO on failure, YES on success
# ABSTRACT : Check if HTML data has been customized, nice for HTTP 404
def check_custom404(data = "", sig = ""):

    # Use regex expression to find string!
    p = re.compile(sig, re.IGNORECASE)
    s = p.search(data)
    if s:
        return YES
    else:
        return NO

    return NO
# EOF: def check_custom404    

# FUNC     : server_version
# PARAMS   : ireq
# RETURN   : None on failure, server string on success
# ABSTRACT : Get server version 
def server_version(ireq = None):
    oreq = {}

    if ireq == None:
        return None

    # Check if save_header hash exist, if not set it!
    if not ireq['wal'].has_key('save_headers'):
        ireq['wal']['save_headers'] = ''
    
    res = http_do_request(ireq,oreq)
    if res != 0:
        return None

    # Look for the Server hash
    if oreq['wal']['header_data'].has_key('Server'):
        return oreq['wal']['header_data']['Server']

    return None    
# EOF: def server_version    

###############################################
#
# HTML Parser Functions
#
###############################################

# FUNC     : WalParser
# PARAMS   : htmllib.HTMLParser
# RETURN   : ...
# ABSTRACT : Wal HTML class
class WalParser(htmllib.HTMLParser):

    # FUNC     : __init__
    # PARAMS   : ...
    # RETURN   : ...
    # ABSTRACT : Init HTML parser
    def __init__(self):

        # initialize the master class

        htmllib.HTMLParser.__init__(self,formatter.NullFormatter())
        self.wreset()
        self.IMG_LIST = '(\.jpg|\.gif|\.png)'   # Image pattern
    # EOF: def __init__

    # FUNC     : wreset
    # PARAMS   : ...
    # RETURN   : ...
    # ABSTRACT : Reset values inside HTML parser
    def wreset(self):
        #htmllib.HTMLParser.reset()
        self.maillist = []              # store mails
        self.titlestr = ""              # store page title
        self.metalist = []              # store meta tags info
        self.linklist = []              # store URI links
        self.imglist = []               # store img links

        self.formdict = {}              # store FORM info
        self.inputlist = []
        self.selectlist = []
        self.optionlist = []
    # EOF: def reset

    # FUNC     : GetMails
    # PARAMS   : ...
    # RETURN   : List
    # ABSTRACT : Return a list of mails.
    def GetMails(self):
        return self.maillist
    # EOF: def GetMails

    # FUNC     : GetTitle
    # PARAMS   : ...
    # RETURN   : String 
    # ABSTRACT : Return a string with page title.
    def GetTitle(self):
        return self.titlestr
    # EOF: def GetTitle

    # FUNC     : GetMeta
    # PARAMS   : ...
    # RETURN   : List
    # ABSTRACT : Return a list of meta.   
    def GetMeta(self):
        return self.metalist
    # EOF: def GetMeta

    # FUNC     : GetLinks
    # PARAMS   : ...
    # RETURN   : List
    # ABSTRACT : Return a list of links.
    def GetLinks(self):
        return self.linklist
    # EOF: def GetLinks

    # FUNC     : GetImg
    # PARAMS   : ...
    # RETURN   : Return a list of images.
    # ABSTRACT :
    def GetImg(self):
        return self.imglist
    # EOF: def GetImg

    # FUNC     : GetForm
    # PARAMS   : ...
    # RETURN   : Dict
    # ABSTRACT : Return a dict of the parser form.
    def GetForm(self):
        return self.formdict
    # EOF: def GetForm

    # FUNC     : GetFormInput
    # PARAMS   : ...
    # RETURN   : List
    # ABSTRACT : Return a list of input tags of the form.
    def GetFormInput(self):
        return self.inputlist
    # EOF: def GetFormInput

    # FUNC     : GetFormSelect
    # PARAMS   : ...
    # RETURN   : list
    # ABSTRACT : Return a list of select tags of the form.
    def GetFormSelect(self):
        return self.selectlist
    # EOF: def GetFormSelect

    # FUNC     : GetFormOption
    # PARAMS   : ...
    # RETURN   : List
    # ABSTRACT : Return a list of option tags of the form.
    def GetFormOption(self):
        return self.optionlist
    # EOF: def GetFormOption

    # FUNC     : start_title
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : Start parsing HTML TITLE tag
    def start_title(self, attrs):

        self.save_bgn()
    # EOF: def start_title

    # FUNC     : end_title
    # PARAMS   : ...
    # RETURN   : ...
    # ABSTRACT : End parsing HTML TITLE tag       

    def end_title(self):

        self.titlestr = string.strip(self.save_end())
    # EOF: def end_title

    # FUNC     : do_meta
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : Parse HTML META tags
    def do_meta(self, attrs):
        l = []
        for d,v in attrs:
            if not v == "":
                str = (d,v) # Create tuple
                l.append(str)
            else: # We have only one value
                l.append(d)
        self.metalist.append(l)               
    # EOF: def do_meta        

    # FUNC     : start_a
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : Parse HTML A tags
    def start_a(self, attrs):
         for d,v in attrs:
             d = d.lower()
             v = v.lower()
             if d == 'href':
                p = re.compile('^mailto:',re.IGNORECASE)                    
                str = p.search(v)
                if str:                                         # Mail found!!    
                    str,v = v.split(':')
                    self.maillist.append(v)                     # Put in mail list
                else:
                    if v[0] == '#':                             # Skip links in the same doc
                        continue
                    p = re.compile(self.IMG_LIST,re.IGNORECASE) # Macth images in link!
                    str = p.search(v)
                    if str:
                        self.imglist.append(v)                  # Put in img list  
                    else:    
                        self.linklist.append(v)                 # Put in link list
    # EOF: start_a        

    # FUNC     : end_a
    # PARAMS   : ...
    # RETURN   : ...
    # ABSTRACT : End parsing HTML A tags
    def end_a(self):
        pass
    # EOF: def end_a

    # FUNC     : do_img
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : Parse HTML IMG tags
    def do_img(self, attrs):
        for d,v in attrs:
            d = d.lower()
            if d.lower == 'src':      # Save img src
                self.imglist.append(v)
    # EOF: def do_img        

    # FUNC     : do_frame
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : Parse HTML FRAME tags
    def do_frame(self, attrs):
         for d,v in attrs:
            d = d.lower()
            if d.lower == 'src':      # Save link src
                self.linklist.append(v)    
    # EOF: def do_frame        

    # FUNC     : start_iframe
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : Parse HTML IFRAME tags
    def start_iframe(self, attrs):
         for d,v in attrs:
            d = d.lower()
            if d.lower == 'src':      # Save link src
                self.linklist.append(v)    
    # EOF: def start_iframe    

    # FUNC     : end_iframe
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : End parsing HTML IFRAME tags
    def end_iframe(self, attrs):
         pass
    # EOF: def end_iframe    

    # FUNC     : start_script
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : Parse HTML SCRIPT tags
    def start_script(self, attrs):
        for d,v in attrs:
            d = d.lower()
            if d == 'src':      # Save link src
                self.linklist.append(v) 
    # EOF: start_script         

    # FUNC     : end_script
    # PARAMS   : ...
    # RETURN   : ... 
    # ABSTRACT : End parsing HTML SCRIPT tags
    def end_script(self):
        pass
    # EOF: end_script

    # FUNC     : do_applet
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : Parse HTML APPLET tags
    def do_applet(self, attrs):
        dir = ""
        for d,v in attrs:
            d = d.lower()
            if d == 'codebase':       # Save dir
                dir = v
            if d == 'code':           # Save link src
                if dir == "":
                    self.linklist.append(v)  
                else:                    
                    self.linklist.append(dir + '/' + v)  
    # EOF: def do_applet        

    # FUNC     : do_link
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : Parse HTML LINK tags
    def do_link(self, attrs):
        for d,v in attrs:
            d = d.lower()
            if d.lower == 'href':       # Save link src
                self.linklist.append(v)        
    # EOF: def do_link        
    
    # FUNC     : start_form
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : Parse HTML FORM tags
    def start_form(self, attrs):
        for d,v in attrs:
            self.formdict[d] = v    
    # EOF: def start_form        

    # FUNC     : end_form
    # PARAMS   : ...
    # RETURN   : ...
    # ABSTRACT : End parsing HTML FORM tags
    def end_form(self):
        pass
    # EOF: def end_form

    # FUNC     : do_input
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : Parse HTML INPUT tags
    def do_input(self, attrs):
        l = []
        for d,v in attrs:
            str = (d,v)
            l.append(str)
        self.inputlist.append(l)
    # EOF: def do_input        

    # FUNC     : start_select
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : Parse HTML SELECT tags
    def start_select(self, attrs):
        l = []
        for d,v in attrs:
            str = (d,v)
            l.append(str)
        self.selectlist.append(l)
    # EOF: def start_select        

    # FUNC     : do_option
    # PARAMS   : attrs
    # RETURN   : ...
    # ABSTRACT : Parse HTML OPTION tags
    def do_option(self, attrs):
        l = []
        for d,v in attrs:
            str = (d,v)
            l.append(str)
        self.optionlist.append(l)
    # EOF: do_option        

    # FUNC     : end_select
    # PARAMS   : ...
    # RETURN   : ...
    # ABSTRACT : End parsing HTML tags
    def end_select(self):
        pass
    # EOF: def end_select

# EOF : class WALParser    

###############################################
#
# WalSerializer Class
#
###############################################

# FUNC     : WalSerializer
# PARAMS   : ...
# RETURN   : ...
# ABSTRACT : Wal serializer class
class WalSerializer:

    # FUNC     : __init__
    # PARAMS   : ...
    # RETURN   : ...
    # ABSTRACT : Init stuff
    def __init__(self):
        self.serializer_dir = "wal_data"
    # EOF: def __init__

    # FUNC     : SetSerializerDir
    # PARAMS   : d
    # RETURN   : ...
    # ABSTRACT : Set serializer dir.
    def SetSerializerDir(self, d):
        self.serializer_dir = d
    # EOF: def SetSerializerDir

    # FUNC     : GetSerializerDir
    # PARAMS   : ...
    # RETURN   : A string
    # ABSTRACT : Get serializer dir.
    def GetSerializerDir(self):
        return self.serializer_dir
    # EOF: def GetSerializerDir

    # FUNC     : CreateSerializerDir
    # PARAMS   : ...
    # RETURN   : ...
    # ABSTRACT : Create serializer dir
    def CreateSerializerDir(self):
        self.seridir = utils_pathjoin(os.getcwd(), self.serializer_dir)
        utils_dmkdir(self.seridir)
    # EOF: def CreateSerializerDir

    # FUNC     : SetSerializerData
    # PARAMS   : filename, data
    # RETURN   : ...
    # ABSTRACT : Save serializer data to file.
    def SetSerializeData(self, filename, data):
        fil = utils_pathjoin(self.serializer_dir, filename)
        filesave = open(fil ,"wb")
        cPickle.dump(data,openfile,binary)

        filesave.close()
    # EOF: def SetSerializerData

    # FUNC     : GetSerializerData
    # PARAMS   : filename
    # RETURN   : Data
    # ABSTRACT : Get serializer data from file.
    def GetSerializeData(self, filename):
        fil = utils_pathjoin(self.serializer_dir, filename)
        filesave = open(fil ,"rb")
        data=cPickle.load(filesave)

        filesave.close()
        return data
    # EOF: def GetSerializerData

# EOF: class WalSerializer

# RL+I E0F