//Please refer to http://dansguardian.org/?page=copyright2
//for the license for this code.
//Written by Daniel Barron (daniel@jadeb.com).
//For support go to http://groups.yahoo.com/group/dansguardian

//  This program is free software; you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation; either version 2 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program; if not, write to the Free Software
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

#include "RegExp.hpp"

RegExp::RegExp()
:imatched(false) {}

bool RegExp::match(const char* exp, const char* text) {
    char* pos = (char*)text;
    regex_t reg;
    int i;
    results.clear();
    offsets.clear();
    lengths.clear();
    if (regcomp(&reg, exp, REG_ICASE | REG_EXTENDED)) {  // compile regex
        regfree(&reg);
        imatched = false;
        return false;  // need exception?
    }
    regmatch_t *pmatch;
    pmatch = new regmatch_t[reg.re_nsub + 1];  // to hold result

    if (!pmatch) {  // if it failed
        regfree(&reg);
        delete[] pmatch;
        imatched = false;
        return false;
        // exception?
    }
    if (regexec(&reg, text, reg.re_nsub + 1, pmatch, 0)) {  // run regex
        regfree(&reg);
        delete[] pmatch;
        imatched = false;
        return false;  // if no match
    }
    size_t matchlen;
    char* submatch;
    unsigned int largestoffset;
    int error = 0;
    while (error == 0) {
        largestoffset = 0;
        for (i = 0; i <= (signed)reg.re_nsub; i++) {
            if (pmatch[i].rm_so != -1) {
                matchlen = pmatch[i].rm_eo - pmatch[i].rm_so;
                submatch = new char[matchlen + 1];
                strncpy(submatch, text + pmatch[i].rm_so, matchlen);
                submatch[matchlen] = '\0';
                results.push_back(std::string(submatch));
                offsets.push_back(pmatch[i].rm_so);
                lengths.push_back(matchlen);
                delete[] submatch;
                if ((pmatch[i].rm_so + matchlen) > largestoffset) {
                    largestoffset = pmatch[i].rm_so + matchlen;
                }
            }
        }
        if (largestoffset > 0) {
            pos += largestoffset;
            error = regexec(&reg, pos, reg.re_nsub + 1, pmatch, REG_NOTBOL);
        }
        else {
            error = -1;
        }
    }
    imatched = true;
    regfree(&reg);
    delete[] pmatch;
    return true;  // match(s) found
}


std::string RegExp::result(int i) {
    if (i >= (signed)results.size() || i < 0) {  // reality check
        return ""; // maybe exception?
    }
    return results[i];
}

unsigned int RegExp::offset(int i) {
    if (i >= (signed)offsets.size() || i < 0) {  // reality check
        return 0; // maybe exception?
    }
    return offsets[i];
}

unsigned int RegExp::length(int i) {
    if (i >= (signed)lengths.size() || i < 0) {  // reality check
        return 0; // maybe exception?
    }
    return lengths[i];
}

int RegExp::numberOfMatches() {
    int i = (signed)results.size();
    return i;
}

bool RegExp::matched() {
    return imatched;  // regexp matches only
}


// My own version of STL::search() which seems to be 5-6 times faster
char* RegExp::search(char* file, char* fileend, char* phrase, char* phraseend) {

    int j, l;  // counters
    int p;  // to hold precalcuated value for speed
    bool match;  // flag
    int qsBc[256];  // Quick Search Boyer Moore shift table (256 alphabet)
    char* k;  // pointer used in matching

    int pl = phraseend - phrase;  // phrase length
    int fl = (int)(fileend - file) - pl;  // file length that could match

    if (fl < pl) return fileend;  // reality checking
    if (pl > 126) return fileend;  // reality checking

    // For speed we append the phrase to the end of the memory block so it
    // is always found, thus eliminating some checking.  This is possible as
    // we know an extra 127 bytes have been provided by NaughtyFilter.cpp
    // and also the OptionContainer does not allow phrase lengths greater
    // than 126 chars

    for(j = 0; j < pl; j++) {
        fileend[j] = phrase[j];
    }

    // Next we need to make the Quick Search Boyer Moore shift table

    p = pl + 1;
    for (j = 0; j < 256; j++) {  // Preprocessing
        qsBc[j] = p;
    }
    for (j = 0; j < pl; j++) {  // Preprocessing
        qsBc[(unsigned char)phrase[j]] = pl - j;
    }

    // Now do the searching!

    for(j = 0;;) {
        k = file + j;
        match = true;
        for (l = 0; l < pl; l++) {  // quiv, but faster, memcmp()
            if (k[l] != phrase[l]) {
                match = false;
                break;
            }
        }
        if (match) {
            return (j + file);  // match found at offset j (but could be the
                                // copy put at fileend)
        }
        j += qsBc[(unsigned char)file[j + pl]];  // shift
    }
    return fileend;  // should never get here as it should always match
}

