From a0ed11416efa7ea5f876dc782901d2305a4a2250 Mon Sep 17 00:00:00 2001 From: Joseph Ferano Date: Tue, 16 Jan 2024 21:44:03 +0700 Subject: [PATCH] Instruction parser finished for MOV instructions with corner cases handled --- decode.c | 168 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 112 insertions(+), 56 deletions(-) diff --git a/decode.c b/decode.c index 9e40a80..367f8dd 100644 --- a/decode.c +++ b/decode.c @@ -34,7 +34,18 @@ typedef struct Register u8 code; } Register; -enum OperandType {OPR_T_MEMORY, OPR_T_REGISTER, OPR_T_IMMEDIATE}; +Register registers[8] = { + {.code = 0b000, .fullname = "ax", .bytename = "al"}, + {.code = 0b001, .fullname = "cx", .bytename = "cl"}, + {.code = 0b010, .fullname = "dx", .bytename = "dl"}, + {.code = 0b011, .fullname = "bx", .bytename = "bl"}, + {.code = 0b100, .fullname = "sp", .bytename = "ah"}, + {.code = 0b101, .fullname = "bp", .bytename = "ch"}, + {.code = 0b110, .fullname = "si", .bytename = "dh"}, + {.code = 0b111, .fullname = "di", .bytename = "bh"}, +}; + +enum OperandType {OPR_T_MEMORY, OPR_T_REGISTER, OPR_T_IMMEDIATE, OPR_T_DIRADDR}; typedef struct Operand { @@ -53,19 +64,41 @@ typedef struct Operand i16 value; u8 direct; } imm; + struct DirAddr { + i16 value; + } dir_addr; }; } Operand; -Register registers[8] = { - {.code = 0b000, .fullname = "ax", .bytename = "al"}, - {.code = 0b001, .fullname = "cx", .bytename = "cl"}, - {.code = 0b010, .fullname = "dx", .bytename = "dl"}, - {.code = 0b011, .fullname = "bx", .bytename = "bl"}, - {.code = 0b100, .fullname = "sp", .bytename = "ah"}, - {.code = 0b101, .fullname = "bp", .bytename = "ch"}, - {.code = 0b110, .fullname = "si", .bytename = "dh"}, - {.code = 0b111, .fullname = "di", .bytename = "bh"}, -}; +enum ParseRegType { P_REG_NONE, P_REG_MASK, P_REG_FIXED }; +typedef struct ParseReg +{ + enum ParseRegType tag; + union { + u8 none; + u8 mask; + u8 fixed; + }; +} ParseReg; + +typedef struct InstFormat +{ + u16 id; + char *name; + ParseReg parse_reg; + u8 inst_enc; + u8 mask_inst; + u8 mask_w; + bool has_operands; + bool has_displacement; + bool has_data; + bool has_d; + bool has_w; + bool has_mod; + bool has_rm; + bool has_s; + bool has_SR; +} InstFormat; typedef struct ParsedInstruction { @@ -80,6 +113,7 @@ typedef struct ParsedInstruction u8_opt reg; u8_opt rm; u8_opt SR; + u8 is_data_addr; u8 bytes_read; } ParsedInstruction; @@ -92,8 +126,6 @@ typedef struct Instruction u16 id; } Instruction; -char *memory[65536]; - /// Get Effective Address Calculation Registers char* get_eac_register(char rm) { @@ -137,45 +169,25 @@ static u8 mask_and_shift(u8 value, u8 mask) return value; } -typedef struct InstFormat -{ - u16 id; - char *name; - u8 inst_enc; - u8 mask_inst; - u8 mask_w; - u8 mask_reg; - bool has_operands; - bool has_displacement; - bool has_data; - bool has_d; - bool has_w; - bool has_reg; - bool has_mod; - bool has_rm; - bool has_s; - bool has_SR; -} InstFormat; - InstFormat inst_formats[] = { //////// // MOV //////// // Register/memory to/from register - {.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .mask_reg=0b00111000, - .mask_w=0x1, .has_operands=true, .has_displacement=true, .has_d=true, .has_w=true, - .has_reg=true, .has_mod=true, .has_rm=true}, + {.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .mask_w=0x1, + .has_operands=true, .has_displacement=true, .has_d=true, .has_w=true, + .has_mod=true, .has_rm=true, .parse_reg={.tag = P_REG_MASK, .mask=0b00111000} }, // Immediate to register/memory {.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true, .has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true}, // Immediate to register {.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8, - .mask_reg=0b00000111, .has_reg=true, .has_data=true, .has_w=true}, + .parse_reg={.tag = P_REG_MASK, .mask=0b00000111}, .has_data=true, .has_w=true}, // Memory to accumulator | Accumulator to memory using the `d` bit // even though the manual doesn't specify it - {.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1, - .has_data=true, .has_w=true, .has_d=true}, + {.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1, .has_data=true, + .has_w=true, .has_d=true, .parse_reg={.tag = P_REG_FIXED, .fixed=0x0}}, // Register/memory to segment register and inverse using the `d` bit {.id=5, .name="mov", .inst_enc=0b10001100, .mask_inst=0x3, .has_SR=true, .has_d=true, .has_displacement=true, .has_mod=true, .has_rm=true}, @@ -183,14 +195,14 @@ InstFormat inst_formats[] = // ADD //////// // Reg/memory with register or either - {.id=6, .name="add", .inst_enc=0b00000000, .mask_inst=0x3, .mask_w=0x1, - .has_operands=true, .has_displacement=true, .has_w=true, - .has_d=true, .has_reg=true, .has_mod=true, .has_rm=true}, + {.id=6, .name="add", .inst_enc=0b00000000, .mask_inst=0x3, .has_displacement=true, + .mask_w=0x1, .has_operands=true, .has_w=true, .has_d=true, .has_mod=true, + .has_rm=true, .parse_reg={.tag = P_REG_MASK, .mask=0b00111000}}, // Immediate to register/memory {.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true, .has_operands=true, .has_displacement=true, .has_data=true, .has_mod=true, .has_rm=true}, {.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1, - .has_data=true, .has_w=true}, + .has_data=true, .has_w=true, .parse_reg={.tag = P_REG_FIXED, .fixed=0x0}}, }; ParsedInstruction parse_instruction(u8* buf) @@ -218,6 +230,7 @@ ParsedInstruction parse_instruction(u8* buf) u8_opt rm_opt = none_u8(); u16_opt data_opt = none_u16(); u16_opt displacement_opt = none_u16(); + u8 is_data_addr = false; u16 bytes_read = 1; bytes_read += fmt.has_operands ? 1 : 0; @@ -227,10 +240,15 @@ ParsedInstruction parse_instruction(u8* buf) if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7); if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6); if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w)); - if (fmt.has_reg) + if (fmt.parse_reg.tag == P_REG_MASK) { u8 reg = fmt.has_operands ? buf[1] : buf[0]; - reg_opt = some_u8(mask_and_shift(reg, fmt.mask_reg)); + reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask)); + } + else if (fmt.parse_reg.tag == P_REG_FIXED) + { + reg_opt = some_u8(fmt.parse_reg.fixed); + is_data_addr = true; } if (fmt.has_data) { @@ -251,6 +269,11 @@ ParsedInstruction parse_instruction(u8* buf) displacement_opt = some_u16(disp); bytes_read += mod_opt.value % 3; } + else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6) + { + displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]); + bytes_read += 2; + } return (ParsedInstruction) { .id = fmt.id, @@ -263,6 +286,7 @@ ParsedInstruction parse_instruction(u8* buf) .mod = mod_opt, .reg = reg_opt, .rm = rm_opt, + .is_data_addr = is_data_addr, .bytes_read = bytes_read, }; } @@ -272,35 +296,55 @@ Instruction decode_instruction(ParsedInstruction inst) Operand opr1 , opr2 = {0}; i16 payload = 0; - IF_LET_SOME(u16, data, inst.data) payload = data; IF_LET_SOME(u8, mod, inst.mod) { - opr1.tag = OPR_T_REGISTER; - opr1.reg.value = registers[(size_t)inst.reg.value]; - opr1.reg.wide = inst.w.value; + IF_LET_SOME(u8, reg, inst.reg) + { + opr1.tag = OPR_T_REGISTER; + opr1.reg.value = registers[(size_t)reg]; + opr1.reg.wide = inst.w.value; + } + else + { + opr1.tag = OPR_T_IMMEDIATE; + opr1.imm.value = inst.data.value; + // TODO: This is dumb, we shouldn't do it this way + opr1.imm.direct = inst.w.value + 1; + } if (mod == MODE_RGSTR_MODE) { opr2.tag = OPR_T_REGISTER; opr2.reg.value = registers[(size_t)inst.rm.value]; opr2.reg.wide = inst.w.value; } + else if (mod == MODE_MEM_NO_DIS && inst.rm.value == 0x6) + { + opr2.tag = OPR_T_DIRADDR; + opr2.dir_addr.value = inst.displacement.value; + } else { opr2.tag = OPR_T_MEMORY; opr2.mem.eac_name = get_eac_register(inst.rm.value); opr2.mem.mode = mod; - if (mod == MODE_MEM_DIS_08 || mod == MODE_MEM_DIS_16) - opr2.mem.displacement = (i16)inst.displacement.value; + opr2.mem.displacement = (i16)inst.displacement.value; } } else { IF_LET_SOME(u16, data, inst.data) { - opr1.tag = OPR_T_IMMEDIATE; - opr1.imm.value = (i16)data; - // TODO: Have to fix this - opr1.imm.direct = 0; + if (inst.is_data_addr) + { + opr1.tag = OPR_T_DIRADDR; + opr1.dir_addr.value = (i16)data; + } + else + { + opr1.tag = OPR_T_IMMEDIATE; + opr1.imm.value = (i16)data; + opr1.imm.direct = 0; + } } IF_LET_SOME(u8, reg, inst.reg) { @@ -345,6 +389,10 @@ void get_operand_string(char* str_buf, Operand oprnd) size = oprnd.imm.direct == 1 ? "byte " : "word "; sprintf(str_buf, "%s%d", size, oprnd.imm.value); } + else if (oprnd.tag == OPR_T_DIRADDR) + { + sprintf(str_buf, "[%d]", oprnd.dir_addr.value); + } } void get_instr_string(char* str_buf, Instruction inst) @@ -352,7 +400,7 @@ void get_instr_string(char* str_buf, Instruction inst) char src_str[32], dst_str[32]; get_operand_string(src_str, inst.src_opr); get_operand_string(dst_str, inst.dst_opr); - sprintf(str_buf, "%s %s, %s ; Inst id->%d", inst.operation, dst_str, src_str, inst.id); + sprintf(str_buf, "%s %s, %s", inst.operation, dst_str, src_str); } bool mov_inst(FILE* f, unsigned char* buf, char inst) @@ -508,6 +556,10 @@ bool add_inst(FILE* f, unsigned char* buf, char inst) return bytes_read > 0; } +char *memory[65536]; +// Keep this global for debugging purposes +u16 inst_count = 1; + int main(int argc, char** argv) { if (argc < 2) @@ -565,6 +617,10 @@ int main(int argc, char** argv) bytes_processed += parsed.bytes_read; // printf("%s %d/%ld", inst_str_buf, bytes_processed, bytes_read); printf("%s", inst_str_buf); + int len = strlen(inst_str_buf); + for (int i = 0; i < 32 - len; i++) + printf(" "); + printf("; %d, %d", inst_count++, inst.id); } else {