/***************************************************************************
 *   Copyright (C) 2006 Meni Livne <livne@kde.org>                         *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/


#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#ifndef WIN32
#include <ctype.h>
#endif

#include "phish.h"
#include "phish_util_url.h"

static int isNum(const char *str)
{
  while (*str != '\0')
  {
    if (!isdigit(*str))
      return 0;

    str++;
  }

  return 1;
}

static phish_result_t parseURL(const char *str, phish_util_url_t *url)
{
  int state = 0;
  unsigned int i = 0;
  unsigned int begin = 0;
  int user_read = 0;
  char *tmp = NULL; /* either user name or host, before we know which one */
  char *port = NULL;
  char c;

  while ((c = str[i]) != '\0')
  {
    switch(state)
    {
      case -2:
        /* memory allocation error occured */
        free(port);
        phish_util_deleteURL(url);
        return PHISH_ERR_MEMORY;

      case -1:
        /* error in format of URL */
        free(port);
        phish_util_deleteURL(url);
        return PHISH_ERR_MALFORMED_URL;

      case 0:
        /* reading protocol - only alphabetic characters are allowed */
        if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')))
        {
          if (c == ':' && str[i + 1] == '/' && str[i + 2] == '/')
          {
            url->protocol = malloc(i + 1);
            if (url->protocol == NULL)
            {
              state = -2;
            }
            else
            {
              if (i == begin)
              {
                /* must contain at least one character */
                state = -1;
              }
              else
              {
                strncpy(url->protocol, str, i);
                url->protocol[i] = '\0';
                i += 2;
                begin = i + 1;
                state = 1; /* move to reading user name or host */
              }
            }
          }
          else
          {
            state = -1;
          }
        }
        break;

      case 1:
        /* reading user name or host */
        if (c == '@')
        {
          if (user_read)
          {
            state = -1; /* @ character illegal in host */
          }
          else
          {
            url->user = malloc(i - begin + 1);
            if (url->user == NULL)
            {
              state = -2;
            }
            else
            {
              if (i == begin)
              {
                /* must contain at least one character */
                state = -1;
              }
              else
              {
                strncpy(url->user, str + begin, i - begin);
                url->user[i - begin] = '\0';
                begin = i + 1;
                user_read = 1; /* remain at same state and read host */
              }
            }
          }
        }
        else if (c == ':')
        {
          tmp = malloc(i - begin + 1);
          if (tmp == NULL)
          {
            state = -2;
          }
          else
          {
            if (i == begin)
            {
              /* must contain at least one character */
              free(tmp);
              state = -1;
            }
            else
            {
              strncpy(tmp, str + begin, i - begin);
              tmp[i - begin] = '\0';
              begin = i + 1;
              state = 2; /* move to reading password or port */
            }
          }
        }
        else if (c == '/')
        {
          url->host = malloc(i - begin + 1);
          if (url->host == NULL)
          {
            state = -2;
          }
          else
          {
            if (i == begin)
            {
              /* must contain at least one character */
              state = -1;
            }
            else
            {
              strncpy(url->host, str + begin, i - begin);
              url->host[i - begin] = '\0';
              begin = i;
              state = 3; /* move to reading path */
            }
          }
        }
        break;

      case 2:
        /* reading password or port */
        if (c == '@')
        {
          url->user = tmp;
          user_read = 1;

          url->password = malloc(i - begin + 1);
          if (url->password == NULL)
          {
            state = -2;
          }
          else
          {
            if (i == begin)
            {
              /* must contain at least one character */
              state = -1;
            }
            else
            {
              strncpy(url->password, str + begin, i - begin);
              url->password[i - begin] = '\0';
              begin = i + 1;
              state = 1; /* move to reading host */
            }
          }
        }
        else if (c == '/')
        {
          url->host = tmp;

          port = malloc(i - begin + 1);
          if (port == NULL)
          {
            state = -2;
          }
          else
          {
            if (i == begin)
            {
              /* must contain at least one character */
              state = -1;
            }
            else
            {
              strncpy(port, str + begin, i - begin);
              port[i - begin] = '\0';
              if (!isNum(port))
              {
                /* port string must evaluate to a number */
                state = -1;
              }
              else
              {
                url->port = atoi(port);
                begin = i;
                state = 3; /* move to reading path */
              }
            }
          }
        }
        break;
        
      case 3:
        /* reading path */
        if (c == '#')
        {
          url->path = malloc(i - begin + 1);
          if (url->path == NULL)
          {
            state = -2;
          }
          else
          {
            if (i == begin)
            {
              /* must contain at least one character */
              state = -1;
            }
            else
            {
              strncpy(url->path, str + begin, i - begin);
              url->path[i - begin] = '\0';
              begin = i + 1;
              state = 4; /* move to reading anchor */
            }
          }
        }
        break;

      case 4:
        /* reading anchor  - read characters until end of string */
        break;
    }

    if (state >= 0)
      i++;
  }

  free(port);

  if (state == 0)
  {
    /* URL can't contain only protocol */
    phish_util_deleteURL(url);
    return PHISH_ERR_MALFORMED_URL;
  }
  else if (state == 1)
  {
    /* URL ended in host */
    url->host = malloc(i - begin + 1);
    if (url->host == NULL)
    {
      phish_util_deleteURL(url);
      return PHISH_ERR_MEMORY;
    }
    else
    {
      if (i == begin)
      {
        /* must contain at least one character */
        phish_util_deleteURL(url);
        return PHISH_ERR_MALFORMED_URL;
      }
      else
      {
        strncpy(url->host, str + begin, i - begin);
        url->host[i - begin] = '\0';
      }
    }
  }
  else if (state == 2)
  {
    /* URL ended in port */
    url->host = tmp;

    port = malloc(i - begin + 1);
    if (port == NULL)
    {
      phish_util_deleteURL(url);
      return PHISH_ERR_MEMORY;
    }
    else
    {
      if (i == begin)
      {
        /* must contain at least one character */
        free(port);
        phish_util_deleteURL(url);
        return PHISH_ERR_MALFORMED_URL;
      }
      else
      {
        strncpy(port, str + begin, i - begin);
        port[i - begin] = '\0';
        if (!isNum(port))
        {
          /* port string must evaluate to a number */
          free(port);
          phish_util_deleteURL(url);
          return PHISH_ERR_MALFORMED_URL;
        }
        else
        {
          url->port = atoi(port);
          free(port);
        }
      }
    }
  }
  else if (state == 3)
  {
    /* URL ended in path */
    url->path = malloc(i - begin + 1);
    if (url->path == NULL)
    {
      phish_util_deleteURL(url);
      return PHISH_ERR_MEMORY;
    }
    else
    {
      if (i == begin)
      {
        /* must contain at least one character */
        phish_util_deleteURL(url);
        return PHISH_ERR_MALFORMED_URL;
      }
      else
      {
        strncpy(url->path, str + begin, i - begin);
        url->path[i - begin] = '\0';
      }
    }
  }
  else if (state == 4)
  {
    /* URL ended in anchor */
    url->anchor = malloc(i - begin + 1);
    if (url->anchor == NULL)
    {
      phish_util_deleteURL(url);
      return PHISH_ERR_MEMORY;
    }
    else
    {
      strncpy(url->anchor, str + begin, i - begin);
      url->anchor[i - begin] = '\0';
    }
  }

  return PHISH_SUCCESS;
}

phish_result_t phish_util_strToURL(const char *str, phish_util_url_t *url)
{
  phish_result_t r;

  r = parseURL(str, url);
  if (r != PHISH_SUCCESS)
    return r;

  if (url->path == NULL)
  {
    url->path = malloc(2);
    if (url->path == NULL)
    {
      phish_util_deleteURL(url);
      return PHISH_ERR_MEMORY;
    }
    
    url->path[0] = '/';
    url->path[1] = '\0';
  }

  return PHISH_SUCCESS;
}

void phish_util_checkURLScheme(phish_util_url_t *url,
                               phish_url_data_t *results)
{
  int i;

  if (url->user != NULL)
    results->user_scheme = 1;
  else
    results->user_scheme = 0;

  results->suspicious_host = 0;
  for (i = 0 ; i < strlen(url->host) ; i++)
  {
    char c = url->host[i];

    if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
          c == '.' || c == '-' || isdigit(c)))
    {
      results->suspicious_host = 1;
      break;
    }
  }
}

void phish_util_initURL(phish_util_url_t *url)
{
  url->protocol = NULL;
  url->user = NULL;
  url->password = NULL;
  url->host = NULL;
  url->port = -1;
  url->path = NULL;
  url->anchor = NULL;
}

void phish_util_deleteURL(phish_util_url_t *url)
{
  free(url->protocol);
  free(url->user);
  free(url->password);
  free(url->host);
  free(url->path);
  free(url->anchor);
}

