From ca0742de3cb2e916c113d212dc8d10a70680f926 Mon Sep 17 00:00:00 2001 From: Joseph Ferano Date: Tue, 16 Jan 2024 18:27:19 +0700 Subject: [PATCH] WIP Refactor: Instruction parsing, decoding and printing split --- decode.c | 184 +++++++++++++++++++++++++++++++++++++++++++------------ lib.h | 6 +- 2 files changed, 148 insertions(+), 42 deletions(-) diff --git a/decode.c b/decode.c index d2c7be0..9e40a80 100644 --- a/decode.c +++ b/decode.c @@ -2,6 +2,8 @@ #include #include "lib.h" #include +#include +#include enum InstructionType { @@ -20,7 +22,6 @@ enum Mode typedef struct Register { - char code; char* fullname; char* bytename; union { @@ -30,8 +31,31 @@ typedef struct Register }; u16 full; } value; + u8 code; } Register; +enum OperandType {OPR_T_MEMORY, OPR_T_REGISTER, OPR_T_IMMEDIATE}; + +typedef struct Operand +{ + enum OperandType tag; + union { + struct Mem { + char *eac_name; + i16 displacement; + u8 mode; + } mem; + struct Reg { + Register value; + bool wide; + } reg; + struct Imm { + i16 value; + u8 direct; + } imm; + }; +} Operand; + Register registers[8] = { {.code = 0b000, .fullname = "ax", .bytename = "al"}, {.code = 0b001, .fullname = "cx", .bytename = "cl"}, @@ -43,7 +67,7 @@ Register registers[8] = { {.code = 0b111, .fullname = "di", .bytename = "bh"}, }; -typedef struct Instruction +typedef struct ParsedInstruction { u16 id; char *name; @@ -57,12 +81,21 @@ typedef struct Instruction u8_opt rm; u8_opt SR; u8 bytes_read; +} ParsedInstruction; + +typedef struct Instruction +{ + Operand src_opr; + Operand dst_opr; + i16 data; + char *operation; + u16 id; } Instruction; char *memory[65536]; /// Get Effective Address Calculation Registers -char* get_eac_registers(char rm) +char* get_eac_register(char rm) { char* reg_name; switch (rm) @@ -138,7 +171,7 @@ InstFormat inst_formats[] = .has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true}, // Immediate to register {.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8, - .mask_reg=0b00000111, .has_data=true, .has_w=true}, + .mask_reg=0b00000111, .has_reg=true, .has_data=true, .has_w=true}, // Memory to accumulator | Accumulator to memory using the `d` bit // even though the manual doesn't specify it {.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1, @@ -156,10 +189,11 @@ InstFormat inst_formats[] = // Immediate to register/memory {.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true, .has_operands=true, .has_displacement=true, .has_data=true, .has_mod=true, .has_rm=true}, - {.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1, .has_data=true, .has_w=true}, + {.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1, + .has_data=true, .has_w=true}, }; -Instruction parse_instruction(u8* buf) +ParsedInstruction parse_instruction(u8* buf) { u8 inst = buf[0]; InstFormat fmt; @@ -175,7 +209,7 @@ Instruction parse_instruction(u8* buf) } } if (!matched_inst) - return (Instruction){.bytes_read = 0}; + return (ParsedInstruction){.bytes_read = 0}; u8_opt d_opt = none_u8(); u8_opt s_opt = none_u8(); u8_opt w_opt = none_u8(); @@ -185,6 +219,9 @@ Instruction parse_instruction(u8* buf) u16_opt data_opt = none_u16(); u16_opt displacement_opt = none_u16(); + u16 bytes_read = 1; + bytes_read += fmt.has_operands ? 1 : 0; + if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1); if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1); if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7); @@ -199,25 +236,23 @@ Instruction parse_instruction(u8* buf) { u8 idx = 1; if (fmt.has_operands) idx += 1; + // This is a trick because mod == 1 and mod == 2 will displace one and two bytes + // respectively but mod == 3 wraps to 0 since it doesn't displace if (fmt.has_displacement) idx += mod_opt.value % 3; - u16 data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[0] : (sbyte)buf[idx]; + u16 data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx]; data_opt = some_u16(data); + bytes_read += w_opt.value == 0 ? 1 : 2; } if (fmt.has_displacement && mod_opt.value % 3 > 0) { u16 disp = mod_opt.value == MODE_MEM_DIS_16 ? (i16)buf[3] << 8 | buf[2] - : (sbyte)buf[3]; + : (sbyte)buf[2]; displacement_opt = some_u16(disp); + bytes_read += mod_opt.value % 3; } - u16 bytes_read = 1; - bytes_read += fmt.has_operands ? 1 : 0; - // This is a trick because mod == 1 and mod == 2 will displace one and two bytes - // respectively but mod == 3 wraps to 0 since it doesn't displace - if (fmt.has_displacement) bytes_read += mod_opt.value % 3; - if (fmt.has_data) bytes_read += w_opt.value == 0 ? 1 : 2; - return (Instruction) { + return (ParsedInstruction) { .id = fmt.id, .name = fmt.name, .data = data_opt, @@ -232,25 +267,92 @@ Instruction parse_instruction(u8* buf) }; } -void decode_instruction(char* str_buf, Instruction inst) +Instruction decode_instruction(ParsedInstruction inst) { + Operand opr1 , opr2 = {0}; + i16 payload = 0; + + IF_LET_SOME(u16, data, inst.data) payload = data; IF_LET_SOME(u8, mod, inst.mod) { + opr1.tag = OPR_T_REGISTER; + opr1.reg.value = registers[(size_t)inst.reg.value]; + opr1.reg.wide = inst.w.value; if (mod == MODE_RGSTR_MODE) { - Register reg = registers[(size_t)inst.reg.value]; - Register rm = registers[(size_t)inst.rm.value]; - Register src_reg = inst.d.value == 0 ? reg : rm; - Register dst_reg = inst.d.value == 0 ? rm : reg; - char *src_name = reg_name(src_reg, inst.w.value); - char *dst_name = reg_name(dst_reg, inst.w.value); - sprintf(str_buf, "%s %s, %s ;%d", inst.name, dst_name, src_name, inst.id); + opr2.tag = OPR_T_REGISTER; + opr2.reg.value = registers[(size_t)inst.rm.value]; + opr2.reg.wide = inst.w.value; + } + else + { + opr2.tag = OPR_T_MEMORY; + opr2.mem.eac_name = get_eac_register(inst.rm.value); + opr2.mem.mode = mod; + if (mod == MODE_MEM_DIS_08 || mod == MODE_MEM_DIS_16) + opr2.mem.displacement = (i16)inst.displacement.value; } } else { - sprintf(str_buf, "%s ;%d", inst.name, inst.id); + IF_LET_SOME(u16, data, inst.data) + { + opr1.tag = OPR_T_IMMEDIATE; + opr1.imm.value = (i16)data; + // TODO: Have to fix this + opr1.imm.direct = 0; + } + IF_LET_SOME(u8, reg, inst.reg) + { + opr2.tag = OPR_T_REGISTER; + opr2.reg.value = registers[(size_t)reg]; + opr2.reg.wide = inst.w.value; + } } + if (inst.d.tag == SOME && inst.d.value == 1) + { + Operand temp = opr1; + opr1 = opr2; + opr2 = temp; + } + return (Instruction) { + .id = inst.id, + .data = payload, + .operation = inst.name, + .src_opr = opr1, + .dst_opr = opr2, + }; +} + +void get_operand_string(char* str_buf, Operand oprnd) +{ + if (oprnd.tag == OPR_T_REGISTER) + { + strcpy(str_buf, reg_name(oprnd.reg.value, oprnd.reg.wide)); + } + else if (oprnd.tag == OPR_T_MEMORY) + { + char disp_str[16] = {'\0'}; + i16 disp = oprnd.mem.displacement; + if (disp != 0) + sprintf(disp_str, " %s %d", (disp > 0 ? "+" : "-"), abs(disp)); + sprintf(str_buf, "[%s%s]", oprnd.mem.eac_name, disp_str); + } + else if (oprnd.tag == OPR_T_IMMEDIATE) + { + char *size = ""; + if (oprnd.imm.direct > 0) + size = oprnd.imm.direct == 1 ? "byte " : "word "; + sprintf(str_buf, "%s%d", size, oprnd.imm.value); + } +} + +void get_instr_string(char* str_buf, Instruction inst) +{ + char src_str[32], dst_str[32]; + get_operand_string(src_str, inst.src_opr); + get_operand_string(dst_str, inst.dst_opr); + sprintf(str_buf, "%s %s, %s ; Inst id->%d", inst.operation, dst_str, src_str, inst.id); } bool mov_inst(FILE* f, unsigned char* buf, char inst) @@ -278,7 +380,7 @@ bool mov_inst(FILE* f, unsigned char* buf, char inst) bool is_direct_addr = mod == 0 && rm == 0b110; int bytes_to_read = is_direct_addr ? 2 : mod % 3; bytes_read = fread(buf, sizeof(char), bytes_to_read, f); - char* eac_name = is_direct_addr ? "" : get_eac_registers(rm); + char* eac_name = is_direct_addr ? "" : get_eac_register(rm); char disp_buf[16] = {'\0'}; if (bytes_to_read > 0) { @@ -303,7 +405,7 @@ bool mov_inst(FILE* f, unsigned char* buf, char inst) // Same trick from earlier, see comment bytes_to_read += mod % 3; bytes_read = fread(buf, sizeof(char), bytes_to_read, f); - char *eac_name = get_eac_registers(rm); + char *eac_name = get_eac_register(rm); i16 data = get_data(buf + (char)bytes_to_read - (w == 0 ? 1 : 2), w); char *word_str = w == 0 ? "byte" : "word"; char disp_str[16] = {'\0'}; @@ -372,13 +474,13 @@ bool add_inst(FILE* f, unsigned char* buf, char inst) } else if (mod == MODE_MEM_NO_DIS) { - if (d) printf("add %s, [%s] ;9", reg_name(rgstr, w), get_eac_registers(rm)); - else printf("add [%s], %s ;10", get_eac_registers(rm), reg_name(rgstr, w)); + if (d) printf("add %s, [%s] ;9", reg_name(rgstr, w), get_eac_register(rm)); + else printf("add [%s], %s ;10", get_eac_register(rm), reg_name(rgstr, w)); } else { - if (d) printf("add %s, [%s] ;11", reg_name(rgstr, w), get_eac_registers(rm)); - else printf("add [%s], %s ;12", get_eac_registers(rm), reg_name(rgstr, w)); + if (d) printf("add %s, [%s] ;11", reg_name(rgstr, w), get_eac_register(rm)); + else printf("add [%s], %s ;12", get_eac_register(rm), reg_name(rgstr, w)); } } else if ((inst & ~0x3) == (char)0b10000000) @@ -392,7 +494,7 @@ bool add_inst(FILE* f, unsigned char* buf, char inst) // Same trick from earlier, see comment bytes_to_read += mod % 3; bytes_read = fread(buf, sizeof(char), bytes_to_read, f); - char *eac_name = get_eac_registers(rm); + char *eac_name = get_eac_register(rm); i16 data = get_data(buf + (char)bytes_to_read - (w == 0 ? 1 : 2), w); char *word_str = w == 0 ? "byte" : "word"; char disp_str[16] = {'\0'}; @@ -454,25 +556,29 @@ int main(int argc, char** argv) u32 bytes_processed = 0; while (bytes_processed < bytes_read) { - Instruction inst = parse_instruction(buffer + bytes_processed); - // char inst = buffer[0]; - // if (mov_inst(f, buffer, inst)) goto handled; - // if (add_inst(f, buffer, inst)) goto handled; + ParsedInstruction parsed = parse_instruction(buffer + bytes_processed); - if (inst.bytes_read > 0) + if (parsed.bytes_read > 0) { - decode_instruction(inst_str_buf, inst); + Instruction inst = decode_instruction(parsed); + get_instr_string(inst_str_buf, inst); + bytes_processed += parsed.bytes_read; + // printf("%s %d/%ld", inst_str_buf, bytes_processed, bytes_read); printf("%s", inst_str_buf); - bytes_processed += inst.bytes_read; } else { bytes_processed += 1; fprintf(stderr, "___Unrecognized Instruction___"); } + // char inst = buffer[0]; + // if (mov_inst(f, buffer, inst)) goto handled; + // if (add_inst(f, buffer, inst)) goto handled; // handled: printf("\n"); } free(inst_str_buf); free(buffer); + + return 0; } diff --git a/lib.h b/lib.h index be259a9..40909ca 100644 --- a/lib.h +++ b/lib.h @@ -23,7 +23,7 @@ enum OptionTag {NONE, SOME}; enum OptionTag tag; \ union { \ char none; \ - type value; \ + type value; \ }; \ } type##_opt; \ \ @@ -34,13 +34,13 @@ enum OptionTag {NONE, SOME}; \ static inline type##_opt some_##type(type value) \ { \ - return (type##_opt){ .tag = SOME, .value = value }; \ + return (type##_opt){ .tag = SOME, .value = value }; \ } \ \ static inline int get_some_##type(type##_opt opt, type* out_value) \ { \ if (opt.tag != SOME) return 0; \ - *out_value = opt.value; \ + *out_value = opt.value; \ return 1; \ }