//Please refer to http://dansguardian.org/?page=copyright2
//for the license for this code.
//Written by Daniel Barron (daniel@jadeb.com).
//For support go to http://groups.yahoo.com/group/dansguardian

//  This program is free software; you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation; either version 2 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program; if not, write to the Free Software
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

//This file contains modifications suggested and mostly provided by
//Daniel Robbins 13/4/01 drobbins@gento.org
//Modifications include, but not limited to, getcontenttype(), << , >>

#include "HTTPHeader.hpp"
#include "Socket.hpp"
#include "syslog.h"
#include <string>
#include <unistd.h>
#include <sys/socket.h>
#include <exception>
#include <fstream>
#include "RegExp.hpp"


void HTTPHeader::checkheader() {  // this is used to fix bugs in certain
                                  // web servers that don't use the standards
    for (int i = 0; i < (signed)header.size(); i++) {  // check each line in
                                                     // the header
        if (header[i].startsWith("Content-length:")) {
            header[i] = "Content-Length:" + header[i].after("Content-length:");
        }
        if (header[i].startsWith("x-forwarded-for:")) {
            header[i] = "X-Forwarded-For:" + header[i].after("x-forwarded-for:");
        }
        if (header[i].startsWith("Content-type:")) {
            header[i] = "Content-Type:" + header[i].after("Content-type:");
        }
        if (header[i].startsWith("Content-disposition:")) {
            header[i] = "Content-Disposition:" + header[i].after("Content-disposition:");
        }
        if (header[i].startsWith("Proxy-Connection: keep-alive")) {
            header[i] = "Proxy-Connection: Keep-Alive" + header[i].after("Proxy-Connection: keep-alive");
        }
        if (header[i].startsWith("Content-encoding:")) {
            header[i] = "Content-Encoding:" + header[i].after("Content-encoding:");
        }
        if (header[i].startsWith("Accept-encoding:")) {
            header[i] = "Accept-Encoding:" + header[i].after("Accept-encoding:");
        }
        if (header[i].startsWith("Accept-Encoding:")) {
            header[i] = modifyEncodings(header[i]);
        }

        if (header[i].startsWith("Connection: keep-alive")) {
            header[i] = "Connection: Keep-Alive" + header[i].after("Connection: keep-alive");
        }
        if (header[i].startsWith("Connection: Keep-alive")) {
            header[i] = "Connection: Keep-Alive" + header[i].after("Connection: Keep-alive");
        }
        if (header[i].startsWith("Proxy-Connection: Keep-Alive")) {
            header[i] = "Proxy-Connection: Close";  // Need to force HTTP/1.1
                    // clients and servers to use non persistant connections
        }
        if (header[i].startsWith("Connection: Keep-Alive")) {
            header[i] = "Connection: Close";  // ditto
        }
        if (header[i].startsWith("Proxy-authorization:")) {
            header[i] = "Proxy-Authorization:" + header[i].after("Proxy-authorization:");
        }

        #ifdef DGDEBUG
            std::cout << header[i] << std::endl;
        #endif
    }
}

int HTTPHeader::contentlength() {  // all self explanetory
    String temp;
    for (int i = 0; i < (signed)header.size(); i++) {  // check each line in
                                                     // the header
        if (header[i].startsWith("Content-Length:")) {
            temp = header[i].after(" ");
            return temp.toInteger();
        }
    }
    return 0;  // it finds the length of the POST data
}


String HTTPHeader::disposition() {
    String filename;
    for (int i = 0; i < (signed)header.size(); i++) {  // check each line in
                                                     // the header
        if (header[i].startsWith("Content-Disposition:")) {
            filename = header[i].after(";").after("=");
            filename.removeWhiteSpace();  // incase of trailing space
            if (filename.contains("\"")) {
                return header[i].after(";").after("\"").before("\"");
            }
            return filename;
            // example format:
            // Content-Disposition: attachment; filename="filename.ext"
            // Content-Disposition: attachment; filename=filename.ext
        }
    }
    return "";  // it finds the header proposed filename
}


String HTTPHeader::getcontenttype() {  // most self explanetory
    String mimetype;
    int j;
    unsigned char c;
    for (int i = 0; i < (signed)header.size(); i++) {
        if (header[i].startsWith("Content-Type:")) {
            mimetype = header[i].after(" ");
            j = 0;
            while (j < (signed)mimetype.length()) {
                c = mimetype[j];
                if (c == ' ' || c == ';' || c < 32) {  // remove the
                    mimetype = mimetype.subString(0, j);
                                                    // extra info not needed
                    j = 0;
                }
                j++;
            }
            break;
        }
    }
    mimetype.toLower();
    if (mimetype.length() < 1) {
        mimetype = "-";
    }
    return mimetype;
}

bool HTTPHeader::iscontenttype(String t) {  // again too simple to explain
    return getcontenttype().startsWith(t); // it checks to see if the MIME
                                 // type is the one specified
}

// Modification based on a submitted patch by
// Jimmy Myrick (jmyrick@tiger1.tiger.org)
std::string HTTPHeader::getXForwardedForIP() {
    String line;
    for (int i = 0; i < (signed)header.size(); i++) {
        if (header[i].startsWith("X-Forwarded-For:")) {
            line = header[i].after("or: ");
            break;
        }
    }
    line.chop();
    return std::string(line.toCharArray());
}



bool HTTPHeader::malformedURL(String url) {
    String host = url.after("://");
    if (host.contains("/")) {
        host = host.before("/");
    }
    if (host.length() < 2) {
        return true;
    }
    if (host.contains("..")) {
        return true;
    }
    int i, len;
    unsigned char c;
    len = host.length();
    for (i = 0; i < len; i++) {
        c = (unsigned char)host[i];
        if ( !(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z')
             && !(c >= '0' && c <= '9') && c != '.' && c != '-') {
            return true;
            // only allowed letters, digits, hiphen, dots
        }

    }
    return false;
}

// A request may be in the form:
//  GET http://foo.bar:80/ HTML/1.0 (if :80 is omitted 80 is assumed)
// or:
//  GET / HTML/1.0
//  Host: foo.bar (optional header in HTTP/1.0, but like HTTP/1.1, we require it!)
//  Port: 80 (not a standard header; do any clients send it?)
// or:
//  CONNECT foo.bar:443  HTTP/1.1
// So we need to handle all 3

String HTTPHeader::url() {
    String hostname;
    String answer = header[0].after(" ");
    if (answer.after(" ").startsWith("HTTP/")) {
        answer = answer.before(" HTTP/");
    }
    else {
        answer = answer.before(" http/");  // just in case!
    }
    if (requesttype() == "CONNECT") {
        if (!answer.startsWith("https://")) {
            answer = "https://" + answer;
        }
    }

    if (answer.length()) {
        int i;
        if (answer[0] == '/') {  // must be the latter above
            for (i = 1; i < (signed)header.size(); i++) {
                if (header[i].startsWith("Host:")) {
                    hostname = header[i].after(" ");
                    if (hostname.contains(":")) {
                        hostname = hostname.before(":");  // chop off the port bit
                    }
                    hostname.removeWhiteSpace();  // remove rubbish like
                                                  // ^M and blanks
                    hostname = "http://" + hostname;
                    answer = hostname + answer;
                    break;
                }
            }
        }
        else {  // must be in the form GET http://foo.bar:80/ HTML/1.0
            if (!answer.after("://").contains("/")) {
                answer += "/";  // needed later on so correct host is extracted
            }
            String protocol = answer.before("://");
            hostname = answer.after("://");
            String url = hostname.after("/");
            url.removeWhiteSpace();  // remove rubbish like ^M and blanks
            if (url.length() > 0) {
                url = "/" + url;
            }
            hostname = hostname.before("/");  // extra / was added 4 here
            if (hostname.contains(":")) {
                hostname = hostname.before(":");  // chop off the port bit
            }
            answer = protocol + "://" + hostname + url;
        }
    }
    if (answer.endsWith("//")) {
        answer.chop();
    }
    return answer;
}

// if you did not understand the last four functions, then read an RFC on
// HTTP and a book on C++.  It's simple really.

String HTTPHeader::decode(String s) {
    if (s.length() < 3) {
        return s;
    }
    #ifdef DGDEBUG
        std::cout << "decoding url" << std::endl;
    #endif
    RegExp re;

    #ifdef DGDEBUG
        std::cout << "matches:" << re.numberOfMatches() << std::endl;
    #endif
    if (!re.matched()) {return s;}  // exit if not found
      #ifdef DGDEBUG
        std::cout << "removing %XX" << std::endl;
    #endif
    int match;
    int offset;
    int pos = 0;
    int size = s.length();
    String result;
    String n;
    for (match = 0; match < re.numberOfMatches(); match++) {
        offset = re.offset(match);
        if (offset > pos) {
            result += s.subString(pos, offset-pos);
        }
        n = re.result(match).c_str();
        n.lop();  // remove %
        result += hexToChar(n);
        pos = offset + 3;
    }
    if (size > pos) {
        result += s.subString(pos, size-pos);
    }
    else {
        n = "%" + n;
    }
    return result;
}

String HTTPHeader::hexToChar(String n) {
    if (n.length() < 2) {
        return n;
    }
    char* buf = new char[2];
    unsigned int a, b;
    unsigned char c;
    a = n[0];
    b = n[1];
    if (a >= 'a' && a <= 'f') {
        a -= 87;
    }
    else if (a >= 'A' && a <= 'F') {
        a -= 55;
    }
    else if (a >= '0' && a <= '9') {
        a -= 48;
    }
    if (b >= 'a' && b <= 'f') {
        b -= 87;
    }
    else if (b >= 'A' && b <= 'F') {
        b -= 55;
    }
    else if (b >= '0' && b <= '9') {
        b -= 48;
    }
    c = a * 16 + b;
    if ( (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
         || (c >= '0' && c <= '9') || (c == '-') ) {
        buf[0] = c;
        buf[1] = '\0';
        n = buf;
    }
    else {
        n = "%" + n;
    }
    return n;
}


String HTTPHeader::requesttype() {  // GET,HEAD,PUT,etc - what?
    return header[0].before(" ");
}


void HTTPHeader::out(Socket sock) throw(exception) {
    String l;  // for amalgamating to avoid conflict with the Nagel algorithm
    for(int i = 0; i < (signed)header.size(); i++) {
        l = header[i] + "\n";
        // need exception for bad write
        if (!sock.writeToSocket(l.toCharArray(), l.length(), 0, timeout)) {
            throw exception();
        }
    }  // send each line of the header to the output stream
    // need exception for bad write
    if (!sock.writeToSocket("\r\n", 2, 0, timeout)) {
        throw exception();
    }
    if ( ((String) (header[0])).before(" ") == "POST") {
        postdata.out(sock);
    }
}


void HTTPHeader::in(Socket sock) {
    // the RFCs don't specify a max header line length so this should be
    // dynamic really.  Pointed out (well reminded actually) by Daniel Robbins
    char buff[8192];  // setup a buffer to hold the incomming HTTP line
    String line;  // temp store to hold the line after processing
    line="----";  // so we get past the first while
    while (line.length() > 3) {  // loop until the stream is
      // failed or we get to the end of the header (a line by itself)

        sock.getline(buff, 8192, timeout);  // get a line of header from the stream

        // getline will throw an exception if there is an error which will
        // only be caught by HandleConnection()


        line = buff;  // convert the line to a String

        header.push_back(line);  // stick the line in the deque that
                               // holds the header
    }
    header.pop_back();  // remove the final blank line of a header

    checkheader();  // sort out a few bits in the header

    if (header[0].before(" ") == "POST") {  // if the HTTP request is a POST        // (e.g. from a form on a web page), we need to grab the POST data and
       // store it
        int length = contentlength();  // find the length of the POST data
                            // so it can test and handle the stream better
        postdata.read(sock, length);  // get the DataBuffer to read the data
    }
}

bool HTTPHeader::isRedirection() {
    // The 1st line of the header for a redirection is thus:
    // HTTP/1.(0|1) 3xx
    if (header.size() < 1) {return false;}  // sometimes get called b 4 read
    String answer = header[0].after(" ").before(" ");
    if (answer[0] == '3' && answer.length() == 3) {
        return true;
    }
    return false;
}


// squid adds this so if more support it it may be useful one day
void HTTPHeader::addXForwardedFor(std::string clientip) {
    std::string line = "X-Forwarded-For: " + clientip + "\r";
    header.push_back(String(line.c_str()));
}

void HTTPHeader::setTimeout(int t) {
    timeout = t;
}

// If basic authentication is enabled DG is able to decode the username
// and password from the header - however we are only interested in the
// username
std::string HTTPHeader::getauthuser() {
    String t = getauth();
    t = t.before(":");
    t.toLower();
    return std::string(t.toCharArray());
}

String HTTPHeader::getauth() {
    String line;
    for (int i = 0; i < (signed)header.size(); i++) {
        if (header[i].startsWith("Proxy-Authorization: Basic ")
            || header[i].startsWith("Proxy-Authorization: basic ")) {
            line = header[i].after("asic ");
            line = decodeb64(line).c_str();  // its base64 MIME encoded
            break;
        }
    }
    return line;
}

std::string HTTPHeader::decodeb64(String line) {  // decode a block of b64 MIME
    long four = 0;
    int d;
    std::string result = "";
    int len = line.length() - 4;
    for (int i = 0; i < len; i += 4) {
        four = 0;
        d = decode1b64(line[i + 0]);
        four = four | d;
        d = decode1b64(line[i + 1]);
        four = (four << 6) | d;
        d = decode1b64(line[i + 2]);
        four = (four << 6) | d;
        d = decode1b64(line[i + 3]);
        four = (four << 6) | d;
        d = (four & 0xFF0000) >> 16;
        result += (char)d;
        d = (four & 0xFF00) >> 8;
        result += (char)d;
        d = four & 0xFF;
        result += (char)d;
    }
    return result;
}

int HTTPHeader::decode1b64(char c) {
    unsigned char i = '\0';
    switch (c) {
        case '+':
            i = 62;
            break;
        case '/':
            i = 63;
            break;
        case '=':
            i = 0;
            break;
        default: // must be A-Z, a-z or 0-9
            i = '9' - c;
            if (i > 0x3F) {  // under 9
                i = 'Z' - c;
                if (i > 0x3F) { // over Z
                    i = 'z' - c;
                    if (i > 0x3F) { // over z so invalid
                        i = 0x80;  // so set the high bit
                    }
                    else {
                        // a-z
                        i = c - 71;
                    }
                }
                else {
                    // A-Z
                    i = c - 65;
                }
            }
            else {
                // 0-9
                i = c + 4;
            }
            break;
    }
    return (int)i;
}


bool HTTPHeader::isCompressed() {
    for (int i = 0; i < (signed)header.size(); i++) {
        if (header[i].startsWith("Content-Encoding:")) {
            if (header[i].indexOf("identity") != -1) {  // http1.1 says this
                                     // should not be here, but not must not
                return false;
            }
            #ifdef DGDEBUG
                std::cout << "is compressed" << std::endl;
            #endif
            return true;  // i.e. encoded with something other than clear
        }
    }
  return false;
}


String HTTPHeader::contentEncoding() {
    String ce;
    for (int i = 0; i < (signed)header.size(); i++) {
        if (header[i].startsWith("Content-Encoding:")) {
            ce = header[i].after("Content-Encoding: ");
            ce.toLower();
            return ce;
        }
    }
  return "";  // we need a default don't we?
}



String HTTPHeader::modifyEncodings(String e) {

    // There are 4 types of encoding: gzip, deflate, compress and identity
    // deflate is in zlib format
    // compress is in unix compress format
    // identity is uncompressed and supported by all browsers (obviously)
    // we do not support compress

    e.toLower();
    String o = "Accept-Encoding: identity";
    if (e.contains("gzip")) {
        o += ",gzip";
    }
    if (e.contains("deflate")) {
        o += ",deflate";
    }

    return o;
}

void HTTPHeader::removeEncoding(int newlen) {
    for (int i = 0; i < (signed)header.size(); i++) {
        if (header[i].startsWith("Content-Length:")) {
            header[i] = "Content-Length: " + String(newlen);
        }
        if (header[i].startsWith("Content-Encoding:")) {
            header[i] = "X-DansGuardian-Removed: Content-Encoding";
        }
    }
}

void HTTPHeader::setContentLength(int newlen) {
    for (int i = 0; i < (signed)header.size(); i++) {
        if (header[i].startsWith("Content-Length:")) {
            header[i] = "Content-Length: " + String(newlen);
        }
    }
}



bool HTTPHeader::isPostUpload() {
    bool answer = false;
    int postlen = postdata.buffer_length;
    int i;
    if (postlen < 14) {  // min length for there to be a match
        return false;
    }
    char* postdatablock = new char[postlen + 64];  // extra 64 for search
    try {
        postdata.copytomemory(postdatablock);
        for(i = 0; i < postlen; i++) { // make lowercase char by char
            if ((postdatablock[i] >= 'A') && (postdatablock[i] <= 'Z')) {
                postdatablock[i] = 'a' + postdatablock[i] - 'A';
            }
        }
        RegExp mysearch;
        std::string dis = "content-type: ";  // signifies file upload
        char* p = new char[32];
        try {
            for(i = 0; i < (signed)dis.length(); i++) {
                p[i] = dis[i];  // copy it to the block of memory
            }
            char* pend = p + dis.length();  // pointer for search
            char* postdatablockend = postdatablock + postlen;
            char* res = mysearch.search(postdatablock, postdatablockend, p, pend);
            if (res != postdatablockend) {
                answer = true;
            }
        } catch (exception& e) {};
        delete[] p;
    } catch (exception& e) {};
    delete[] postdatablock;
    return answer;
}
