/*
 *      Interactive disassembler (IDA).
 *      Copyright (c) 1990-99 by Ilfak Guilfanov, <ig@datarescue.com>
 *      ALL RIGHTS RESERVED.
 *
 */

/*


              V I R T U A L   M I C R O   M A C H I N E


Some comments:

        The virtual micro machine has a huge register file.
        Each register is addressed by its address. The register
        address is 16bit (i.e. we can have up to 65367 registers)
        Each register is 8bit wide. Processor status word is like ordinary
        registers and can have its own address. You can define as many
        registers as you want and you can define their meanings.

        Each micro-instruction has 0..3 operands. Possible types of
        operands:
                - immediate value
                - register
                - memory reference

        Each instruction has a size specifier. There are the following
        sizes:
                - byte        (char)
                - word        (short)
                - double word (long)
                - quadro word (long long)

        Actually each processor defines a size of a smallest addressable
        quantity itself. It may be a byte or a nibble.
        All operands of an instruction have the same size.
        Exception: the second operand of shift instructions is a byte.

        A memory is considered to consist of several segments.
        We don't make any assumptions about the segments: do they overlap or
        not, etc. A memory reference is made using a selector, offset pair.
        A selector is always 2 bytes long. An offset may be 2 or 4 bytes long.
        There are 2 sets of commands that refer to memory: ldx2, stx2, jmp2, jif2
        use 2 byte offsets, while ldx4,stx4,jmp4,jif4 use 4 byte offsets.

        Usually the first operand is the source and the second operand
        is destination of the operation.

        If the instruction size is bigger than a byte then
        all register references use block of registers. For example:

                ldc.d   #1,, r8

        loads a constant 1 to registers 8,9,10,11:

                 #1  ->  r8
                 #0  ->  r9
                 #0  ->  r10
                 #0  ->  r11

        (This example uses little-endian ordering of bytes)

*/

#ifndef __MICRO_HPP
#define __MICRO_HPP

#include <limits.h>
#pragma pack(push, 1)   // IDA uses 1 byte alignments!

class insn_t;
class mop_t;
class minsn_t;
class mlist_t;
class mblock_t;
class mbl_array_t;
typedef uchar mcode_t;
typedef ushort mreg_t;

        // We need to have two different DIV instructions. Example:
        //       2 / -2         -> -1
        //       2 / FFFE       -> 0

enum mcodes_t {
m_nop,          // 00 nop
m_ext,          // 01 ext                       // external. not microcode.
m_use,          // 02 use  l                    // mark a register as used. (most probably this is a cast)
m_jmp2,         // 03 jmp2          [r=sel, d=off] // Always, 16 bit
m_jmp4,         // 04 jmp4          [r=sel, d=off] // Always, 32 bit
m_jif2,         // 05 jif2  l=cond, [r=sel, d=off] // Conditional jump
m_jif4,         // 06 jif4  l=cond, [r=sel, d=off] // Conditional jump
m_stx2,         // 07 stx2  l,     [r=sel, d=off]
m_stx4,         // 08 stx4  l,     [r=sel, d=off]
m_ldx2,         // 09 ldx2  [l=sel,r=off], d
m_ldx4,         // 0A ldx4  [l=sel,r=off], d
m_ldc,          // 0B mov  l=const,     d
m_mov,          // 0C mov  l,           d
m_neg,          // 0D neg  l,           d
m_xds,          // 0E xds  l,           d       // extend sign to the specified size
m_xdb,          // 0F xdb  l=byte,      d       // extend sign to the specified size
m_setz,         // 10 setz l            d=flag  // flag register is always a byte
m_setnz,        // 11 setnz l           d=flag  // flag register is always a byte
m_sets,         // 12 sets l            d=flag  // flag register is always a byte
m_not,          // 13 not  l,           d
m_add,          // 14 add  l,   r,      d
m_sub,          // 15 sub  l,   r,      d       // r - l -> dst
m_mul,          // 16 mul  l,   r,      d
m_udiv,         // 17 udiv l,   r,      d       // r / l -> dst
m_sdiv,         // 18 sdiv l,   r,      d       // r / l -> dst
m_umod,         // 19 umod l,   r,      d
m_smod,         // 1A smod l,   r,      d
m_or,           // 1B or   l,   r,      d
m_and,          // 1C and  l,   r,      d
m_xor,          // 1D xor  l,   r,      d
m_shl,          // 1E shl  l,   r=byte, d    // shift logical
m_sha,          // 1F sha  l,   r=byte, d    // shift arithmetic
m_rcl,          // 20 rcl  l,   r=carry,d    // rotate 1 time thru carry
m_mkcadd,       // 21 mkcadd l,   r,    d=carry // carry register is always a byte
m_mkoadd,       // 22 mkoadd l,   r,    d=overf // overf register is always a byte
m_mkcsub,       // 23 mkcsub l,   r,    d=carry // carry register is always a byte
m_mkosub,       // 24 mkosub l,   r,    d=overf // overf register is always a byte
m_mkcshl,       // 25 mkcshl l,  r=byte,d=carry // carry register is always a byte
m_mkcrcl,       // 26 mkcrcl l,  r=byte,d=carry // carry register is always a byte
m_und,          // 27 und               d    // undefine
m_push,         // 28 push   l
m_pop,          // 29 pop               d
m_goto,         // 2a goto   l                  // l is mop_v or mop_b
m_gcnd,         // 2b gcnd   l, r=cond          // l is mop_v or mop_b
m_call,         // 2c call   l                  // l is mop_v or mop_b
m_ret,          // 2d ret    l                  // l is mop_n.

m_max,          // maximal opcode

};

// Let's map the processor flags to the first virtual registers:

#define mr_none         mreg_t(-1)
#define mr_cf           mreg_t(0)
#define mr_zf           mreg_t(1)
#define mr_sf           mreg_t(2)
#define mr_of           mreg_t(3)
#define mr_mf           mreg_t(4)       // memory reference
#define mr_first        mreg_t(5)       // the first IDP defined register

//-------------------------------------------------------------------------
// 'byte' is a 'mvm_t.charbit' quantity in the following definitions.
// For microprocessors, 'byte' actually may be a nibble
// For 64-bit processors, 'byte' is a byte.

//-------------------------------------------------------------------------
typedef int mbitmap_t;
const int mlist_width = (sizeof(mbitmap_t)*CHAR_BIT);
const int mlist_align = mlist_width - 1;

class mlist_t {         // list of registers
  mreg_t high;          // highest number of register+1 (multiply of MMAP_WIDTH)
  mbitmap_t *bitmap;    // bitmap of registers, 1-present,0-absent
  int alloc_bitmap(ushort reg);
  void truncate(void);  // make 'high' as low as possible
public:
  static void init_mlists(void);
  mlist_t(void) { init(); }
  mlist_t(const mlist_t &m);
  ~mlist_t(void){ kill(); }
  void init(void) { high = 0; bitmap = NULL; }
  void kill(void);
  mlist_t &operator=(const mlist_t &m); // copy
  int add(mreg_t reg);                  // add a byte register
  int add(mreg_t reg, int width);       // add a register
  int add(const mlist_t &ml);           // add another list
  void sub(mreg_t reg);                 // delete a register
  void sub(mreg_t reg, int width);      // delete a register
  void sub(const mlist_t &ml);          // delete another list
  int has(mreg_t reg) const;            // test presence of a register
  int has(mreg_t reg, int width) const; // test presence of a register
  char *print(char *buf,size_t size) const;
  int is_empty(void) const;             // is empty?
  void zero(void) { high = 0; }         // make empty
  int has_common(const mlist_t &ml) const; // has common elements?
  void intersect(const mlist_t &ml);    // intersect lists
  int is_subset_of(const mlist_t &ml) const; // is subset of?
  int is_equal(const mlist_t &ml) const;
};

//-------------------------------------------------------------------------

// Information about a function prolog. This structure should be used
// as a base class for module-specific prolog info structure.
// In the base class we define attributes common to all processors.

struct prolog_info_t {
  char flags;
#define PROLOG_HLL      0x01    // high level language prolog is present
#define PROLOG_CHECKSP  0x02    // stack overflow is checked
#define PROLOG_SAVEFP   0x04    // frame pointer is saved
#define PROLOG_KERNEL   0x08    // the prolog is found by the kernel
#define PROLOG_FAR      0x10    // far function return
  int lvars_size;               // size of local variables
                                // (actually this is the frame size)
  mreg_t frame;                 // frame pointer. if does not exist, then mr_none
  long frame_offset;            // frame pointer offset from frame start
  mlist_t sregs;                // saved registers. these registers are saved
                                // in the prolog and should be considered
                                // as unchanged by the function even if
                                // they are changed in the function.
  ulong purged;                 // number of bytes purged from the stack
};


// For example, IBM PC specific prolog information is described below.
// Nontrivial constructors/destructors are not allowed!
// All these fields are used only the the processor module itself.

struct intel_prolog_info_t : public prolog_info_t {
  int iflags;
#define PROLOG_LOADDS   0x0001  // DS register is loaded
#define PROLOG_LOADES   0x0002  // ES register is loaded
#define PROLOG_SAVEREG  0x0004  // All registers are saved
#define PROLOG_INCBP    0x0008  // near function in an overlaid program
#define PROLOG_WINDS    0x0010  // windows ds loading scheme is used
};


// micro virtual machine description.
struct mvm_t {
  mreg_t ss;                    // number of SS micro register
                                // If it does not exist, place mr_none
  mreg_t sp;                    // number of SP micro register
                                // If it does not exist, place mr_none
  mreg_t cs;                    // number of CS micro register
                                // This register should exist
  mreg_t fp;                    // number of frame pointer register
                                // This information is used when the kernel
                                // tries to find a function prolog
                                // if no fixed frame pointer register
                                // exists, then place mr_none here.
  mreg_t maxreg;                // the maximal micro register number
  int charbit;                  // number of bits in a 'char'
  int (*resolve_memref)(ulong ea,const mop_t &sel,const mop_t &off,int offsize,mop_t &op);
                                // check if the instruction refers to a known
                                // memory location. If so, returns its
                                // name in "op" (type will be mop_l, mop_v)
                                // If not, "op" should not be modified.
                                // returns: 1-ok, 0-failed
  char *(*get_mreg_name)(mreg_t reg,int width,char *buf,size_t bufsize);


// Create prolog information structure.
// This function returns  pointer to structure derived from prolog_info_t
// For example, IBM PC module returns pointer to intel_prolog_info_t.
// The structure should be allocated in heap.

  prolog_info_t *(*create_prolog_info)(void);


// Try to find & make prolog/epilog of the function.
// If found:
//      fill 'pi' structure
//      remove all minsns which are part of prolog/epilog
//      return PROLOG_FOUND
// otherwise return PROLOG_LAZY or PROLOG_NONE.
// If this function returns PROLOG_LAZY, the kernel will try to determine
//   the function prolog itself.

  int (*make_prolog)(mbl_array_t *mba,prolog_info_t *pi);
#define PROLOG_LAZY     0       // did not find, the kernel should try to find
#define PROLOG_FOUND    1       // the prolog is found, ok
#define PROLOG_NONE     2       // no prolog can exist here.


// Get list of temporary registers.
// The list should include registers that can not pass block boundaries.
// This function may be absent.

  mlist_t &(*get_temp_regs)(void);


// Get list of registers which may hold function return values
// (i.e. the registers that may be used to pass information to callers)
//      func_ea - the current function start linear address
// This function may be absent.

  mlist_t &(*get_func_ret_regs)(ulong func_ea);


};

//-------------------------------------------------------------------------
typedef bool (*minsn_print_func_t)(int indent,const char *format,...);
bool msg_insn(int indent,const char *format,...);

// micro instruction operand types:

enum mopt_t {
  mop_z,                // none
  mop_r,                // register
  mop_n,                // immediate
  mop_d,                // result of another instruction
  mop_l,                // stack variable
  mop_v,                // global variable
  mop_b,                // micro basic block (mblock_t)
  mop_s,                // memory pointed by register: selector
  mop_o,                // memory pointed by register: offset
};

// micro instruction operand:

struct mop_t {
  mopt_t t;
  union {
    mreg_t r;           // register number
    ulong n;            // immediate value
    minsn_t *d;         // result (destination) of another instruction
    long l;             // offset of local variable (in the stack frame)
    ulong g;            // global variable
    mblock_t *b;        // (used in jmp,call instructions)
  };
  mop_t(void) { t = mop_z; }
  mop_t(mreg_t rg) { t = mop_r; r = rg; }
  mop_t(const mop_t &);
  ~mop_t(void) { erase(); }
  void erase(void);
  char *print(ulong ea,int width,char *buf,size_t bufsize) const;
  int append_use_list(mlist_t &lst,int width) const;
  int append_def_list(mlist_t &lst,int width) const;
  int is_zero() { return t == mop_n && n == 0; }
  int is_one()  { return t == mop_n && n == 1; }
  int is_mone() { return t == mop_n && n == ulong(-1); }
  int operator == (const mop_t &r) const;
  mop_t &operator = (const mop_t &r);
  int first_half(int width);
  int second_half(int width);
};

// micro instruction:

struct minsn_t {
  minsn_t *next;
  minsn_t *prev;
  int n;        // temp
  ulong ea;
  mcode_t opcode;
  char width;           // width of operands (1,2,4,8)
  mop_t l;              // left
  mop_t r;              // right
  mop_t d;              // destination
  minsn_t(void) { init(); }
  minsn_t(const minsn_t &m);
  ~minsn_t(void) { term(); }
  void init(void);
  void term(void);
  int show(minsn_print_func_t = NULL) const; // generate insn text. returns 1 if overflow
  int build_use_list(mlist_t &lst) const;
  int build_def_list(mlist_t &lst) const;
  int is_canon(void) const;     // is in canonical form?
  int optimize(void);
  int get_left_width(void) const;
  int get_right_width(void) const;
  int get_dest_width(void) const;
  int operator == (const minsn_t &r) const;
  int perform_prop(const mlist_t &used,const minsn_t &di);
  int for_using_ops(int (*action)(minsn_t &ui,mop_t &uop,int width,void *ud),void *ud);
  void make_nop(void);
  minsn_t *find_prev_use(mreg_t reg,mlist_t &use) const;
  minsn_t *find_prev_use(const mlist_t &reg,mlist_t &use) const;
  minsn_t *find_prev_def(mreg_t reg,mlist_t &def) const;
  minsn_t *find_prev_def(const mlist_t &reg,mlist_t &def) const;
  minsn_t *prev_defining(mlist_t &use,mlist_t &def) const;
  minsn_t *prev_using(mlist_t &use,mlist_t &def) const;
  minsn_t *find_prev_ud(mreg_t reg) const;
  minsn_t *find_prev_ud(mlist_t &reg) const;
};

//-------------------------------------------------------------------------
enum mblock_type_t {
  blt_none,                     // unknown block type
  blt_stop,                     // stops execution
  blt_ret,                      // returns execution to the caller
  blt_call,                     // passes execution to another function
  blt_1way,                     // passes execution to one block
  blt_2way,                     // passes execution to two blocks
  blt_nway,                     // passes execution to many blocks
};

struct mblock_t {
  uchar flags;
#define MBL_ENTRY       0x01    // this block is the entry point to the function
#define MBL_USE32       0x02    // 32bit block
#define MBL_COMB        0x04    // needs "combine" pass
#define MBL_LIST        0x08    // use/def lists are built
  int get_sp_width(void) { return (flags & MBL_USE32) ? 4 : 2; }
  int size;                     // number of instructions
  ulong start,end;              // start & end linear addresses
  minsn_t *head;                // pointer to the first instruction
  minsn_t *tail;                // pointer to the last instruction

  mlist_t use;                  // used registers
  mlist_t def;                  // defined registers
  mlist_t dnu;                  // defined but not used registers

  mblock_type_t type;           // type of block
  union {
    mblock_t *b1;               // blt_1way, blt_2way: pointer to first block
    int n_ways;                 // blt_nway: number of ways
  };
  union {
    mblock_t *b2;               // blt_2way: pointer to the second block
    mblock_t **bn;              // blt_nway: ptr to array of ptrs to blocks
  };

  int n_inb;                    // number of inbound blocks
  union {
    mblock_t *inb;              // n_inb==1: pointer to inbound block
    mblock_t **inbs;            // n_inb>1: ptr to array of ptrs of blocks
  };

  mblock_t *next;               // next block in the chain

  mblock_t(void) { init(); }
  ~mblock_t(void) { term(); }
  int emit(mcode_t code,int width,ulong l,ulong r,ulong d);
  int emit(mcode_t code,int width,const mop_t *l,const mop_t *r,const mop_t *d);
  int showbody(minsn_print_func_t f = NULL) const; // generate insn text. return 1 if overflow
  int add_inbound(mblock_t *b);         // returns 1-ok, 0-no memory
  int is_inbound(const mblock_t *b) const;
  int optimize(void);           // build lists, propogate and combine
                                // returns: error code or number of modifications
private:
  void init(void);
  void term(void);
  int put(minsn_t &m,ulong ea,mcode_t code,int width,ulong op1,ulong op2,ulong op3);
  minsn_t *replace(minsn_t *m,mcode_t code,int width,const mop_t *l,const mop_t *r,const mop_t *d);
  minsn_t *insert(minsn_t *m,mcode_t code,int width,const mop_t *l,const mop_t *r,const mop_t *d);
  minsn_t *remove(minsn_t *m);
  minsn_t *insert_used(minsn_t *m,mlist_t &dead);
  minsn_t *disintegrate(minsn_t *m);
  void eliminate_dead_regs(minsn_t *m,mlist_t &dead);
  int is_redefined(const mlist_t &regs,const minsn_t *i1,const minsn_t *i2) const;
  int is_used(const mlist_t &regs,const minsn_t *i1,const minsn_t *i2) const;
  int is_clear(const mlist_t &use,const mlist_t &def,const minsn_t *i1,const minsn_t *i2) const;
  void link_at_tail(minsn_t *m);
  void correct_dnu(void);
  int propogate(void);
  int build_lists(void);        // build def-use lists and eliminate deads
                                // returns: <0 - error code
                                // otherwise number of eliminated registers
  int combine(minsn_t *m);      // 1-found and combined
  int combine(void);            // returns number of combined insns
};

//-------------------------------------------------------------------------
// array of mblocks - usual representation for functions.
// the first mblock is at the entry point

struct mbl_array_t {
  int qty;
  mblock_t *blocks;
  mbl_array_t(void) { init(); }
  ~mbl_array_t(void) { term(); }
  void init(void) { qty = 0; blocks = NULL; }
  void term(void);
  mblock_t *addblock(int use32);
  void show(void);
  int optimize_local(void);
  int optimize_global(void);
  mblock_t *find_block(ulong ea);
  int calc_block_types(void);
  int calc_inbounds(void);              // returns 1-ok, 0-no memory
  mblock_t *make_mop_b(mop_t *op);
  prolog_info_t *make_prolog(void);
private:
  int kernel_make_prolog(prolog_info_t *pi);
  int kernel_make_epilogs(prolog_info_t *pi);
};

//-------------------------------------------------------------------------
// Generate microcode for an instruction
// This function should be provided by IDP. The kernel calls it like this:
//      ph.ana();
//      ph.notify(IDP_MAKEMICRO,&mb);

int make_micro(insn_t &ins,mblock_t *_mb);
#define MICRO_OK        0       // ok
#define MICRO_NONE      1       // make_micro() is not implemented
#define MICRO_INSN      (-1)    // can't convert to microcode
#define MICRO_MEM       (-2)    // not enough memory
#define MICRO_ARG       (-3)    // bad argument(s)
#define MICRO_INTERR    (-4)    // internal error


// Display a warning about the error
// returns: error code

int display_micro_error(int code);


// This function uses information from 'cmd' structure
// It generates text representation of microcode.

int show_micro(void);

// temporary:

#define ZZ      0x12345678L     // not important

#pragma pack(pop)
#endif // define __MICRO_HPP
