From c5e9cfac44116605f190119b18fa145cf3a45efb Mon Sep 17 00:00:00 2001 From: Joseph Ferano Date: Fri, 14 Feb 2025 13:08:46 +0700 Subject: [PATCH] Parse instruction function --- decode.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- decode.h | 71 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 1 deletion(-) diff --git a/decode.c b/decode.c index d59d2ae..469573c 100644 --- a/decode.c +++ b/decode.c @@ -43,10 +43,104 @@ static u8 mask_and_shift(u8 value, u8 mask) return value; } +ParsedInstruction parse_instruction_ids(u8* buf) +{ + u8 inst = buf[0]; + InstFormat fmt = {0}; + bool matched_inst = false; + // TODO: This might be a good time to learn how to make a hashtable in C + for (u16 i = 0; i < sizeof(inst_funcs) / (sizeof(inst_parser_f)*6*4); i++) + for (int j = 0; j < 6; j++) + for (int k = 0; k < 4 && inst_funcs[i][j][k] != NULL; k++) + { + printf("%p\n", (void*)(uptr)inst_funcs[i][j][k]); + // Apply inst_func_t + } + // for (int j = 0; j < 4 || ;) + return (ParsedInstruction){0}; + if (!matched_inst) + return (ParsedInstruction){.bytes_read = 0}; + u8_opt d_opt = none_u8(); + u8_opt s_opt = none_u8(); + u8_opt w_opt = none_u8(); + u8_opt reg_opt = none_u8(); + u8_opt mod_opt = none_u8(); + u8_opt rm_opt = none_u8(); + u16_opt data_opt = none_u16(); + u16_opt displacement_opt = none_u16(); + u8 is_data_addr = false; + + u16 bytes_read = 1; + bytes_read += fmt.has_operands ? 1 : 0; + + if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1); + if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1); + if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7); + if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6); + if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w)); + if (fmt.parse_reg.tag == P_REG_MASK) + { + u8 reg = fmt.has_operands ? buf[1] : buf[0]; + reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask)); + } + else if (fmt.parse_reg.tag == P_REG_FIXED) + { + reg_opt = some_u8(fmt.parse_reg.fixed); + is_data_addr = true; + } + if (fmt.has_data) + { + u8 idx = 1; + if (fmt.has_operands) idx += 1; + // This is a trick because mod == 1 and mod == 2 will displace one and two bytes + // respectively but mod == 3 wraps to 0 since it doesn't displace + if (fmt.has_displacement) idx += mod_opt.value % 3; + u16 data; + if (fmt.has_s && s_opt.value == 1) + { + data = (sbyte)buf[idx]; + bytes_read += 1; + } + else + { + data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx]; + bytes_read += w_opt.value == 0 ? 1 : 2; + } + data_opt = some_u16(data); + } + if (fmt.has_displacement && mod_opt.value % 3 > 0) + { + u16 disp = mod_opt.value == MODE_MEM_DIS_16 + ? (i16)buf[3] << 8 | buf[2] + : (sbyte)buf[2]; + displacement_opt = some_u16(disp); + bytes_read += mod_opt.value % 3; + } + else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6) + { + displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]); + bytes_read += 2; + } + + return (ParsedInstruction) { + .id = fmt.id, + .name = fmt.name, + .data = data_opt, + .displacement = displacement_opt, + .w = w_opt, + .d = d_opt, + .s = s_opt, + .mod = mod_opt, + .reg = reg_opt, + .rm = rm_opt, + .is_data_addr = is_data_addr, + .bytes_read = bytes_read, + }; +} ParsedInstruction parse_instruction(u8* buf) { u8 inst = buf[0]; - InstFormat fmt; + InstFormat fmt = {0}; bool matched_inst = false; // TODO: This might be a good time to learn how to make a hashtable in C for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++) @@ -303,6 +397,8 @@ int main(int argc, char** argv) u32 bytes_processed = 0; while (bytes_processed < bytes_read) { + ParsedInstruction _ = parse_instruction_ids(buffer + bytes_processed); + (void)_; ParsedInstruction parsed = parse_instruction(buffer + bytes_processed); if (parsed.bytes_read > 0) diff --git a/decode.h b/decode.h index 645ab1f..a21b2d2 100644 --- a/decode.h +++ b/decode.h @@ -114,6 +114,77 @@ typedef struct Instruction u16 id; } Instruction; +enum InstructionIdentifier +{ + _PREFIX_2, + _PREFIX_3, + _PREFIX_6, + _NAME, + _D, + _W, + _S, + _MOD, + _REGISTER, + _ACC, + _RM, + _DISP_LO, + _DISP_HI, + _DATA_W0, + _DATA_W1, +} InstructionIdentifier; + +typedef struct ParsedInst +{ + u64 progress; + u8 something; +} ParsedInst; + +typedef ParsedInst (*inst_parser_f)(ParsedInst); + +ParsedInst pre_2(ParsedInst pi) {return pi;} +ParsedInst pre_3(ParsedInst pi) {return pi;} +ParsedInst pre_6(ParsedInst pi) {return pi;} +ParsedInst name(ParsedInst pi) {return pi;} +ParsedInst reg(ParsedInst pi) {return pi;} +ParsedInst w(ParsedInst pi) {return pi;} +ParsedInst d(ParsedInst pi) {return pi;} +ParsedInst s(ParsedInst pi) {return pi;} + +ParsedInst mod(ParsedInst pi) {return pi;} +ParsedInst inst(ParsedInst pi) {return pi;} +ParsedInst rm(ParsedInst pi) {return pi;} + +ParsedInst disp_lo(ParsedInst pi) {return pi;} +ParsedInst disp_hi(ParsedInst pi) {return pi;} + +ParsedInst data_w0(ParsedInst pi) {return pi;} +ParsedInst data_w1(ParsedInst pi) {return pi;} + +inst_parser_f inst_funcs[][6][4] = +{ + {{pre_2, name, d, w}, {mod, reg, rm}, {disp_lo}, {disp_hi}}, + {{pre_6, s, w}, {mod, name, rm}, {disp_lo}, {disp_hi}, {data_w0}, {data_w1}}, + {{pre_6, w}, {data_w0}, {data_w1}}, +}; + +enum InstructionIdentifier inst_ids[][6][4] = +{ + {{_PREFIX_2, _NAME, _D, _W}, {_MOD, _REGISTER, _RM}, {_DISP_LO}, {_DISP_HI}}, + {{_PREFIX_6, _S, _W}, {_MOD, _NAME, _RM}, {_DISP_LO}, {_DISP_HI}, {_DATA_W0}, {_DATA_W1}}, + {{_PREFIX_6, _W}, {_DATA_W0}, {_DATA_W1}}, +}; + +typedef struct InstructionParser +{ + enum InstructionIdentifier inst_ids[6][4]; +} InstructionParser; + +// InstructionParser inst_formats[] = +// { +// {{{_PREFIX_2, _NAME, _D, _W}, {_MOD, _REGISTER, _RM}, {_DISP_LO}, {_DISP_HI}}}, +// {{{_PREFIX_6, _S, _W}, {_MOD, _NAME, _RM}, {_DISP_LO}, {_DISP_HI}, {_DATA_W0}, {_DATA_W1}}}, +// }; + InstFormat inst_formats[] = { ////////