// read Mach-O symbols

#include <pro.h>
#include <fpro.h>
#include <kernwin.hpp>
#include <diskio.hpp>
#include "../../ldr/mach-o/common.cpp"
//#include "../../ldr/mach-o/h/mach-o/nlist.h"
#include "symmacho.hpp"

//--------------------------------------------------------------------------
bool read_macho_commands(linput_t *li, uint32 *p_off, bytevec_t &commands, int *ncmds)
{
  mach_header mh;
  *ncmds = 0;
  if ( qlread(li, &mh, sizeof(mh)) != sizeof(mh) )
    return false;

  uint32 off = 0;
  if ( mh.magic == FAT_MAGIC || mh.magic == FAT_CIGAM )
  {
    // locate the I386 part of the image
    bool mf = mh.magic == FAT_CIGAM;
    qlseek(li, 0, SEEK_SET);
    fat_header fh;
    if ( qlread(li, &fh, sizeof(fh)) != sizeof(fh) )
      return false;
    if ( mf )
      swap_fat_header(&fh);
    for ( int i=0; i < fh.nfat_arch; i++ )
    {
      fat_arch fa;
      if ( qlread(li, &fa, sizeof(fa)) != sizeof(fa) )
        return false;
      if ( mf )
        swap_fat_arch(&fa);
      if ( fa.cputype == CPU_TYPE_I386 )
      {
        off = fa.offset;
        break;
      }
    }
    if ( off == 0 )
      return false;
    qlseek(li, off, SEEK_SET);
    if ( qlread(li, &mh, sizeof(mh)) != sizeof(mh) )
      return false;
  }
  if ( mh.magic != MH_MAGIC || mh.sizeofcmds <= 0 )
    return false;

  commands.resize(mh.sizeofcmds);
  if ( qlread(li, &commands[0], commands.size()) != commands.size() )
    return false;

  *ncmds = mh.ncmds;
  if ( p_off != NULL )
    *p_off = off;

  return true;
}

//--------------------------------------------------------------------------
inline bool is_zeropage(const segment_command &sg)
{
  return sg.vmaddr == 0 && sg.fileoff == 0 && sg.initprot == 0;
}

//--------------------------------------------------------------------------
inline bool is_text_segment(const segment_command &sg)
{
  if ( is_zeropage(sg) )
    return false;
  const char *name = sg.segname;
  for ( int i=0; i < sizeof(sg.segname); i++, name++ )
    if ( *name != '_' )
      break;
  return strnicmp(name, "TEXT", 4) == 0;
}

//--------------------------------------------------------------------------
inline bool is_linkedit_segment(const segment_command &sg)
{
  return strnicmp(sg.segname, SEG_LINKEDIT, sizeof(SEG_LINKEDIT)-1) == 0;
}

//parse Mach-O loader commands and fill in requested information
//return expected image base (start of __TEXT segment)
//--------------------------------------------------------------------------
ea_t parse_mach_commands(
        linput_t *li,
        uint32 off,
        const bytevec_t &load_commands,
        int ncmds,
        nlists_t *symbols,
        bytevec_t *strings,
        seg_infos_t* seg_infos,
        bool in_mem)
{
  QASSERT(li != NULL);

  const uchar *begin = &load_commands[0];
  const uchar *end = &load_commands[load_commands.size()];
  const uchar *ptr = begin;
  sval_t expected_base = -1;
  sval_t linkedit_shift = -1;
  if ( seg_infos != NULL )
    seg_infos->qclear();

  for ( int i=0; i < ncmds; i++ )
  {
    load_command lc = *(load_command*)ptr;
    const uchar *lend = ptr + lc.cmdsize;
    if ( lend <= begin || lend > end )
      break;

    if ( lc.cmd == LC_SEGMENT )
    {
      segment_command &sg = *(segment_command*)ptr;
      if ( is_text_segment(sg) && expected_base == -1 )
        expected_base = sg.vmaddr;
      if ( is_linkedit_segment(sg) && linkedit_shift == -1)
        linkedit_shift = in_mem ? sg.vmaddr - expected_base - sg.fileoff : 0;

      if ( seg_infos != NULL )
      {
        if ( sg.nsects == 0 )
        {
          seg_info_t &si = seg_infos->push_back();
          si.name  = sg.segname;
          si.size  = sg.vmsize;
          si.start = sg.vmaddr;
        }
        struct section* sects = (struct section*)(ptr + sizeof(segment_command));
        for ( int i=0; i < sg.nsects; i++ )
        {
          seg_info_t &si = seg_infos->push_back();
          si.name  = sects[i].sectname;
          si.size  = sects[i].size;
          si.start = sects[i].addr;
        }
      }
    }
    else if ( lc.cmd == LC_SYMTAB )
    {
      symtab_command &st = *(symtab_command*)ptr;
      if ( st.nsyms > 0 && symbols != NULL )
      {
        size_t nbytes = st.nsyms*sizeof(struct nlist);
        symbols->resize(st.nsyms);
        memset(symbols->begin(), 0, nbytes);
        qlseek(li, off + linkedit_shift + st.symoff, SEEK_SET);
        // we do not check the error code, if fails, we will have zeroes
        qlread(li, symbols->begin(), nbytes);
      }
      if ( st.strsize > 0 && strings != NULL)
      {
        strings->resize(st.strsize, 0);
        qlseek(li, off + linkedit_shift + st.stroff, SEEK_SET);
        // we do not check the error code, if fails, we will have zeroes
        qlread(li, strings->begin(), st.strsize);
      }
      return expected_base == -1 ? 0 : expected_base;
    }
    ptr = lend;
  }
  return -1;
}

//parse a mach-o file image in memory and enumerate its segments and symbols
//--------------------------------------------------------------------------
bool parse_macho_mem(ea_t start, symbol_visitor_t &sv)
{
  linput_t *li = create_memory_linput(start, 0);
  if ( li == NULL )
    return false;

  uint32 off;
  int ncmds;
  bytevec_t commands;
  if ( !read_macho_commands(li, &off, commands, &ncmds) )
    return false;

  seg_infos_t seg_infos;
  nlists_t symbols;
  bytevec_t strings;
  ea_t expected_base = parse_mach_commands(li, off, commands, ncmds, &symbols, &strings, &seg_infos, true);
  sval_t slide = start - expected_base;
  close_linput(li);

  for ( size_t i=0; i < seg_infos.size(); i++ )
  {
    const seg_info_t &si = seg_infos[i];
    sv.visit_segment(si.start + slide, si.size, si.name.c_str());
  }

  for ( size_t i=0; i < symbols.size(); i++ )
  {
    const struct nlist &nl = symbols[i];
    if ( nl.n_un.n_strx > strings.size() )
      continue;
    const char *name = (const char*)&strings[0] + nl.n_un.n_strx;

    ea_t ea;
    int type = nl.n_type & N_TYPE;
    switch ( type )
    {
      case N_UNDF:
      case N_PBUD:
      case N_ABS:
        break;
      case N_SECT:
      case N_INDR:
        ea = nl.n_value + slide;
        if ( name[0] != '\0' )
        {
          if ( (nl.n_type & (N_EXT|N_PEXT)) == N_EXT ) // exported
          {
            sv.visit_symbol(ea, name);
          }
          else if ( type == N_SECT && nl.n_sect != NO_SECT ) // private symbols
          {
            sv.visit_symbol(ea, name);
          }
        }
        break;
    }
  }
  return true;
}

//--------------------------------------------------------------------------
asize_t calc_macho_image_size(linput_t *li, ea_t *p_base)
{
  if ( li == NULL )
    return 0;
  if ( p_base != NULL )
    *p_base = BADADDR;

  asize_t size = 0;
  uint32 off;
  bytevec_t commands;
  int ncmds;
  if ( read_macho_commands(li, &off, commands, &ncmds) )
  {
    const uchar *begin = &commands[0];
    const uchar *end = &commands[commands.size()];
    const uchar *ptr = begin;
    ea_t base = BADADDR;
    ea_t maxea = 0;
    for ( int i=0; i < ncmds; i++ )
    {
      load_command lc = *(load_command*)ptr;
      const uchar *lend = ptr + lc.cmdsize;
      if ( lend <= begin || lend > end )
        break;

      if ( lc.cmd == LC_SEGMENT )
      {
        segment_command &sg = *(segment_command*)ptr;
        // since mac os x scatters application segments over the memory
        // we calculate only the text segment size
        if ( is_text_segment(sg) )
        {
          if ( base == BADADDR )
            base = sg.vmaddr;
          ea_t end = sg.vmaddr + sg.vmsize;
          if ( maxea < end )
            maxea = end;
//          msg("segment %s base %a size %d maxea %a\n", sg.segname, sg.vmaddr, sg.vmsize, maxea);
        }
      }
      ptr = lend;
    }
    size = maxea - base;
    if ( p_base != NULL )
      *p_base = base;
//    msg("%s: base %a size %d\n", fname, base, size);
  }
  return size;
}

//--------------------------------------------------------------------------
bool is_dylib_header(ea_t base, read_memory_t read_mem, char *filename, size_t namesize)
{
  mach_header mh;
  if ( read_mem(base, &mh, sizeof(mh)) != sizeof(mh) )
    return false;

  if ( mh.magic != MH_MAGIC || mh.filetype != MH_DYLINKER )
    return false;

  // seems to be dylib
  // find its file name
  filename[0] = '\0';
  ea_t ea = base + sizeof(mh);
  for ( int i=0; i < mh.ncmds; i++ )
  {
    struct load_command lc;
    lc.cmd = 0;
    read_mem(ea, &lc, sizeof(lc));
    if ( lc.cmd == LC_ID_DYLIB )
    {
      struct dylib_command dcmd;
      read_mem(ea, &dcmd, sizeof(dcmd));
      read_mem(ea+dcmd.dylib.name.offset, filename, namesize);
      break;
    }
    else if ( lc.cmd == LC_ID_DYLINKER )
    {
      struct dylinker_command dcmd;
      read_mem(ea, &dcmd, sizeof(dcmd));
      read_mem(ea+dcmd.name.offset, filename, namesize);
      break;
    }
    ea += lc.cmdsize;
  }
  return true;
}
