Instruction parser finished for MOV instructions with corner cases handled
This commit is contained in:
parent
ca0742de3c
commit
a0ed11416e
156
decode.c
156
decode.c
@ -34,7 +34,18 @@ typedef struct Register
|
|||||||
u8 code;
|
u8 code;
|
||||||
} Register;
|
} Register;
|
||||||
|
|
||||||
enum OperandType {OPR_T_MEMORY, OPR_T_REGISTER, OPR_T_IMMEDIATE};
|
Register registers[8] = {
|
||||||
|
{.code = 0b000, .fullname = "ax", .bytename = "al"},
|
||||||
|
{.code = 0b001, .fullname = "cx", .bytename = "cl"},
|
||||||
|
{.code = 0b010, .fullname = "dx", .bytename = "dl"},
|
||||||
|
{.code = 0b011, .fullname = "bx", .bytename = "bl"},
|
||||||
|
{.code = 0b100, .fullname = "sp", .bytename = "ah"},
|
||||||
|
{.code = 0b101, .fullname = "bp", .bytename = "ch"},
|
||||||
|
{.code = 0b110, .fullname = "si", .bytename = "dh"},
|
||||||
|
{.code = 0b111, .fullname = "di", .bytename = "bh"},
|
||||||
|
};
|
||||||
|
|
||||||
|
enum OperandType {OPR_T_MEMORY, OPR_T_REGISTER, OPR_T_IMMEDIATE, OPR_T_DIRADDR};
|
||||||
|
|
||||||
typedef struct Operand
|
typedef struct Operand
|
||||||
{
|
{
|
||||||
@ -53,19 +64,41 @@ typedef struct Operand
|
|||||||
i16 value;
|
i16 value;
|
||||||
u8 direct;
|
u8 direct;
|
||||||
} imm;
|
} imm;
|
||||||
|
struct DirAddr {
|
||||||
|
i16 value;
|
||||||
|
} dir_addr;
|
||||||
};
|
};
|
||||||
} Operand;
|
} Operand;
|
||||||
|
|
||||||
Register registers[8] = {
|
enum ParseRegType { P_REG_NONE, P_REG_MASK, P_REG_FIXED };
|
||||||
{.code = 0b000, .fullname = "ax", .bytename = "al"},
|
typedef struct ParseReg
|
||||||
{.code = 0b001, .fullname = "cx", .bytename = "cl"},
|
{
|
||||||
{.code = 0b010, .fullname = "dx", .bytename = "dl"},
|
enum ParseRegType tag;
|
||||||
{.code = 0b011, .fullname = "bx", .bytename = "bl"},
|
union {
|
||||||
{.code = 0b100, .fullname = "sp", .bytename = "ah"},
|
u8 none;
|
||||||
{.code = 0b101, .fullname = "bp", .bytename = "ch"},
|
u8 mask;
|
||||||
{.code = 0b110, .fullname = "si", .bytename = "dh"},
|
u8 fixed;
|
||||||
{.code = 0b111, .fullname = "di", .bytename = "bh"},
|
};
|
||||||
};
|
} ParseReg;
|
||||||
|
|
||||||
|
typedef struct InstFormat
|
||||||
|
{
|
||||||
|
u16 id;
|
||||||
|
char *name;
|
||||||
|
ParseReg parse_reg;
|
||||||
|
u8 inst_enc;
|
||||||
|
u8 mask_inst;
|
||||||
|
u8 mask_w;
|
||||||
|
bool has_operands;
|
||||||
|
bool has_displacement;
|
||||||
|
bool has_data;
|
||||||
|
bool has_d;
|
||||||
|
bool has_w;
|
||||||
|
bool has_mod;
|
||||||
|
bool has_rm;
|
||||||
|
bool has_s;
|
||||||
|
bool has_SR;
|
||||||
|
} InstFormat;
|
||||||
|
|
||||||
typedef struct ParsedInstruction
|
typedef struct ParsedInstruction
|
||||||
{
|
{
|
||||||
@ -80,6 +113,7 @@ typedef struct ParsedInstruction
|
|||||||
u8_opt reg;
|
u8_opt reg;
|
||||||
u8_opt rm;
|
u8_opt rm;
|
||||||
u8_opt SR;
|
u8_opt SR;
|
||||||
|
u8 is_data_addr;
|
||||||
u8 bytes_read;
|
u8 bytes_read;
|
||||||
} ParsedInstruction;
|
} ParsedInstruction;
|
||||||
|
|
||||||
@ -92,8 +126,6 @@ typedef struct Instruction
|
|||||||
u16 id;
|
u16 id;
|
||||||
} Instruction;
|
} Instruction;
|
||||||
|
|
||||||
char *memory[65536];
|
|
||||||
|
|
||||||
/// Get Effective Address Calculation Registers
|
/// Get Effective Address Calculation Registers
|
||||||
char* get_eac_register(char rm)
|
char* get_eac_register(char rm)
|
||||||
{
|
{
|
||||||
@ -137,45 +169,25 @@ static u8 mask_and_shift(u8 value, u8 mask)
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct InstFormat
|
|
||||||
{
|
|
||||||
u16 id;
|
|
||||||
char *name;
|
|
||||||
u8 inst_enc;
|
|
||||||
u8 mask_inst;
|
|
||||||
u8 mask_w;
|
|
||||||
u8 mask_reg;
|
|
||||||
bool has_operands;
|
|
||||||
bool has_displacement;
|
|
||||||
bool has_data;
|
|
||||||
bool has_d;
|
|
||||||
bool has_w;
|
|
||||||
bool has_reg;
|
|
||||||
bool has_mod;
|
|
||||||
bool has_rm;
|
|
||||||
bool has_s;
|
|
||||||
bool has_SR;
|
|
||||||
} InstFormat;
|
|
||||||
|
|
||||||
InstFormat inst_formats[] =
|
InstFormat inst_formats[] =
|
||||||
{
|
{
|
||||||
////////
|
////////
|
||||||
// MOV
|
// MOV
|
||||||
////////
|
////////
|
||||||
// Register/memory to/from register
|
// Register/memory to/from register
|
||||||
{.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .mask_reg=0b00111000,
|
{.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .mask_w=0x1,
|
||||||
.mask_w=0x1, .has_operands=true, .has_displacement=true, .has_d=true, .has_w=true,
|
.has_operands=true, .has_displacement=true, .has_d=true, .has_w=true,
|
||||||
.has_reg=true, .has_mod=true, .has_rm=true},
|
.has_mod=true, .has_rm=true, .parse_reg={.tag = P_REG_MASK, .mask=0b00111000} },
|
||||||
// Immediate to register/memory
|
// Immediate to register/memory
|
||||||
{.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true,
|
{.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true,
|
||||||
.has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true},
|
.has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true},
|
||||||
// Immediate to register
|
// Immediate to register
|
||||||
{.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8,
|
{.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8,
|
||||||
.mask_reg=0b00000111, .has_reg=true, .has_data=true, .has_w=true},
|
.parse_reg={.tag = P_REG_MASK, .mask=0b00000111}, .has_data=true, .has_w=true},
|
||||||
// Memory to accumulator | Accumulator to memory using the `d` bit
|
// Memory to accumulator | Accumulator to memory using the `d` bit
|
||||||
// even though the manual doesn't specify it
|
// even though the manual doesn't specify it
|
||||||
{.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1,
|
{.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1, .has_data=true,
|
||||||
.has_data=true, .has_w=true, .has_d=true},
|
.has_w=true, .has_d=true, .parse_reg={.tag = P_REG_FIXED, .fixed=0x0}},
|
||||||
// Register/memory to segment register and inverse using the `d` bit
|
// Register/memory to segment register and inverse using the `d` bit
|
||||||
{.id=5, .name="mov", .inst_enc=0b10001100, .mask_inst=0x3, .has_SR=true, .has_d=true,
|
{.id=5, .name="mov", .inst_enc=0b10001100, .mask_inst=0x3, .has_SR=true, .has_d=true,
|
||||||
.has_displacement=true, .has_mod=true, .has_rm=true},
|
.has_displacement=true, .has_mod=true, .has_rm=true},
|
||||||
@ -183,14 +195,14 @@ InstFormat inst_formats[] =
|
|||||||
// ADD
|
// ADD
|
||||||
////////
|
////////
|
||||||
// Reg/memory with register or either
|
// Reg/memory with register or either
|
||||||
{.id=6, .name="add", .inst_enc=0b00000000, .mask_inst=0x3, .mask_w=0x1,
|
{.id=6, .name="add", .inst_enc=0b00000000, .mask_inst=0x3, .has_displacement=true,
|
||||||
.has_operands=true, .has_displacement=true, .has_w=true,
|
.mask_w=0x1, .has_operands=true, .has_w=true, .has_d=true, .has_mod=true,
|
||||||
.has_d=true, .has_reg=true, .has_mod=true, .has_rm=true},
|
.has_rm=true, .parse_reg={.tag = P_REG_MASK, .mask=0b00111000}},
|
||||||
// Immediate to register/memory
|
// Immediate to register/memory
|
||||||
{.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true,
|
{.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true,
|
||||||
.has_operands=true, .has_displacement=true, .has_data=true, .has_mod=true, .has_rm=true},
|
.has_operands=true, .has_displacement=true, .has_data=true, .has_mod=true, .has_rm=true},
|
||||||
{.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1,
|
{.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1,
|
||||||
.has_data=true, .has_w=true},
|
.has_data=true, .has_w=true, .parse_reg={.tag = P_REG_FIXED, .fixed=0x0}},
|
||||||
};
|
};
|
||||||
|
|
||||||
ParsedInstruction parse_instruction(u8* buf)
|
ParsedInstruction parse_instruction(u8* buf)
|
||||||
@ -218,6 +230,7 @@ ParsedInstruction parse_instruction(u8* buf)
|
|||||||
u8_opt rm_opt = none_u8();
|
u8_opt rm_opt = none_u8();
|
||||||
u16_opt data_opt = none_u16();
|
u16_opt data_opt = none_u16();
|
||||||
u16_opt displacement_opt = none_u16();
|
u16_opt displacement_opt = none_u16();
|
||||||
|
u8 is_data_addr = false;
|
||||||
|
|
||||||
u16 bytes_read = 1;
|
u16 bytes_read = 1;
|
||||||
bytes_read += fmt.has_operands ? 1 : 0;
|
bytes_read += fmt.has_operands ? 1 : 0;
|
||||||
@ -227,10 +240,15 @@ ParsedInstruction parse_instruction(u8* buf)
|
|||||||
if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
|
if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
|
||||||
if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6);
|
if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6);
|
||||||
if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w));
|
if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w));
|
||||||
if (fmt.has_reg)
|
if (fmt.parse_reg.tag == P_REG_MASK)
|
||||||
{
|
{
|
||||||
u8 reg = fmt.has_operands ? buf[1] : buf[0];
|
u8 reg = fmt.has_operands ? buf[1] : buf[0];
|
||||||
reg_opt = some_u8(mask_and_shift(reg, fmt.mask_reg));
|
reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask));
|
||||||
|
}
|
||||||
|
else if (fmt.parse_reg.tag == P_REG_FIXED)
|
||||||
|
{
|
||||||
|
reg_opt = some_u8(fmt.parse_reg.fixed);
|
||||||
|
is_data_addr = true;
|
||||||
}
|
}
|
||||||
if (fmt.has_data)
|
if (fmt.has_data)
|
||||||
{
|
{
|
||||||
@ -251,6 +269,11 @@ ParsedInstruction parse_instruction(u8* buf)
|
|||||||
displacement_opt = some_u16(disp);
|
displacement_opt = some_u16(disp);
|
||||||
bytes_read += mod_opt.value % 3;
|
bytes_read += mod_opt.value % 3;
|
||||||
}
|
}
|
||||||
|
else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6)
|
||||||
|
{
|
||||||
|
displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]);
|
||||||
|
bytes_read += 2;
|
||||||
|
}
|
||||||
|
|
||||||
return (ParsedInstruction) {
|
return (ParsedInstruction) {
|
||||||
.id = fmt.id,
|
.id = fmt.id,
|
||||||
@ -263,6 +286,7 @@ ParsedInstruction parse_instruction(u8* buf)
|
|||||||
.mod = mod_opt,
|
.mod = mod_opt,
|
||||||
.reg = reg_opt,
|
.reg = reg_opt,
|
||||||
.rm = rm_opt,
|
.rm = rm_opt,
|
||||||
|
.is_data_addr = is_data_addr,
|
||||||
.bytes_read = bytes_read,
|
.bytes_read = bytes_read,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -272,36 +296,56 @@ Instruction decode_instruction(ParsedInstruction inst)
|
|||||||
Operand opr1 , opr2 = {0};
|
Operand opr1 , opr2 = {0};
|
||||||
i16 payload = 0;
|
i16 payload = 0;
|
||||||
|
|
||||||
IF_LET_SOME(u16, data, inst.data) payload = data;
|
|
||||||
IF_LET_SOME(u8, mod, inst.mod)
|
IF_LET_SOME(u8, mod, inst.mod)
|
||||||
|
{
|
||||||
|
IF_LET_SOME(u8, reg, inst.reg)
|
||||||
{
|
{
|
||||||
opr1.tag = OPR_T_REGISTER;
|
opr1.tag = OPR_T_REGISTER;
|
||||||
opr1.reg.value = registers[(size_t)inst.reg.value];
|
opr1.reg.value = registers[(size_t)reg];
|
||||||
opr1.reg.wide = inst.w.value;
|
opr1.reg.wide = inst.w.value;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
opr1.tag = OPR_T_IMMEDIATE;
|
||||||
|
opr1.imm.value = inst.data.value;
|
||||||
|
// TODO: This is dumb, we shouldn't do it this way
|
||||||
|
opr1.imm.direct = inst.w.value + 1;
|
||||||
|
}
|
||||||
if (mod == MODE_RGSTR_MODE)
|
if (mod == MODE_RGSTR_MODE)
|
||||||
{
|
{
|
||||||
opr2.tag = OPR_T_REGISTER;
|
opr2.tag = OPR_T_REGISTER;
|
||||||
opr2.reg.value = registers[(size_t)inst.rm.value];
|
opr2.reg.value = registers[(size_t)inst.rm.value];
|
||||||
opr2.reg.wide = inst.w.value;
|
opr2.reg.wide = inst.w.value;
|
||||||
}
|
}
|
||||||
|
else if (mod == MODE_MEM_NO_DIS && inst.rm.value == 0x6)
|
||||||
|
{
|
||||||
|
opr2.tag = OPR_T_DIRADDR;
|
||||||
|
opr2.dir_addr.value = inst.displacement.value;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
opr2.tag = OPR_T_MEMORY;
|
opr2.tag = OPR_T_MEMORY;
|
||||||
opr2.mem.eac_name = get_eac_register(inst.rm.value);
|
opr2.mem.eac_name = get_eac_register(inst.rm.value);
|
||||||
opr2.mem.mode = mod;
|
opr2.mem.mode = mod;
|
||||||
if (mod == MODE_MEM_DIS_08 || mod == MODE_MEM_DIS_16)
|
|
||||||
opr2.mem.displacement = (i16)inst.displacement.value;
|
opr2.mem.displacement = (i16)inst.displacement.value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
IF_LET_SOME(u16, data, inst.data)
|
IF_LET_SOME(u16, data, inst.data)
|
||||||
|
{
|
||||||
|
if (inst.is_data_addr)
|
||||||
|
{
|
||||||
|
opr1.tag = OPR_T_DIRADDR;
|
||||||
|
opr1.dir_addr.value = (i16)data;
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
opr1.tag = OPR_T_IMMEDIATE;
|
opr1.tag = OPR_T_IMMEDIATE;
|
||||||
opr1.imm.value = (i16)data;
|
opr1.imm.value = (i16)data;
|
||||||
// TODO: Have to fix this
|
|
||||||
opr1.imm.direct = 0;
|
opr1.imm.direct = 0;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
IF_LET_SOME(u8, reg, inst.reg)
|
IF_LET_SOME(u8, reg, inst.reg)
|
||||||
{
|
{
|
||||||
opr2.tag = OPR_T_REGISTER;
|
opr2.tag = OPR_T_REGISTER;
|
||||||
@ -345,6 +389,10 @@ void get_operand_string(char* str_buf, Operand oprnd)
|
|||||||
size = oprnd.imm.direct == 1 ? "byte " : "word ";
|
size = oprnd.imm.direct == 1 ? "byte " : "word ";
|
||||||
sprintf(str_buf, "%s%d", size, oprnd.imm.value);
|
sprintf(str_buf, "%s%d", size, oprnd.imm.value);
|
||||||
}
|
}
|
||||||
|
else if (oprnd.tag == OPR_T_DIRADDR)
|
||||||
|
{
|
||||||
|
sprintf(str_buf, "[%d]", oprnd.dir_addr.value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_instr_string(char* str_buf, Instruction inst)
|
void get_instr_string(char* str_buf, Instruction inst)
|
||||||
@ -352,7 +400,7 @@ void get_instr_string(char* str_buf, Instruction inst)
|
|||||||
char src_str[32], dst_str[32];
|
char src_str[32], dst_str[32];
|
||||||
get_operand_string(src_str, inst.src_opr);
|
get_operand_string(src_str, inst.src_opr);
|
||||||
get_operand_string(dst_str, inst.dst_opr);
|
get_operand_string(dst_str, inst.dst_opr);
|
||||||
sprintf(str_buf, "%s %s, %s ; Inst id->%d", inst.operation, dst_str, src_str, inst.id);
|
sprintf(str_buf, "%s %s, %s", inst.operation, dst_str, src_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool mov_inst(FILE* f, unsigned char* buf, char inst)
|
bool mov_inst(FILE* f, unsigned char* buf, char inst)
|
||||||
@ -508,6 +556,10 @@ bool add_inst(FILE* f, unsigned char* buf, char inst)
|
|||||||
return bytes_read > 0;
|
return bytes_read > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char *memory[65536];
|
||||||
|
// Keep this global for debugging purposes
|
||||||
|
u16 inst_count = 1;
|
||||||
|
|
||||||
int main(int argc, char** argv)
|
int main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
if (argc < 2)
|
if (argc < 2)
|
||||||
@ -565,6 +617,10 @@ int main(int argc, char** argv)
|
|||||||
bytes_processed += parsed.bytes_read;
|
bytes_processed += parsed.bytes_read;
|
||||||
// printf("%s %d/%ld", inst_str_buf, bytes_processed, bytes_read);
|
// printf("%s %d/%ld", inst_str_buf, bytes_processed, bytes_read);
|
||||||
printf("%s", inst_str_buf);
|
printf("%s", inst_str_buf);
|
||||||
|
int len = strlen(inst_str_buf);
|
||||||
|
for (int i = 0; i < 32 - len; i++)
|
||||||
|
printf(" ");
|
||||||
|
printf("; %d, %d", inst_count++, inst.id);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user