diff --git a/decode.c b/decode.c index 73e3ad8..d2c7be0 100644 --- a/decode.c +++ b/decode.c @@ -3,8 +3,6 @@ #include "lib.h" #include -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - enum InstructionType { INST_MOV_REG_REG = 0b10001000, @@ -92,9 +90,19 @@ static i16 get_data(unsigned char* buf, char wide) // Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0]; } -#define IS_INST(value, m, inst) ((value & ~m) == (char)inst) -typedef struct InstMask {char *name; u8 mask; u8 code;} InstMask; +static u8 mask_and_shift(u8 value, u8 mask) +{ + value &= mask; + int count = 0; + while ((mask & 0x1) == 0 && count < 8) + { + value >>= 1; + mask >>= 1; + count++; + } + return value; +} typedef struct InstFormat { @@ -103,7 +111,7 @@ typedef struct InstFormat u8 inst_enc; u8 mask_inst; u8 mask_w; - u64 mask_reg; + u8 mask_reg; bool has_operands; bool has_displacement; bool has_data; @@ -122,14 +130,15 @@ InstFormat inst_formats[] = // MOV //////// // Register/memory to/from register - {.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .has_operands=true, .has_displacement=true, - .has_d=true, .has_w=true, .has_reg=true, .has_mod=true, .has_rm=true}, + {.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .mask_reg=0b00111000, + .mask_w=0x1, .has_operands=true, .has_displacement=true, .has_d=true, .has_w=true, + .has_reg=true, .has_mod=true, .has_rm=true}, // Immediate to register/memory {.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true, .has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true}, // Immediate to register {.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8, - .mask_reg=0x0b00000111, .has_data=true, .has_w=true}, + .mask_reg=0b00000111, .has_data=true, .has_w=true}, // Memory to accumulator | Accumulator to memory using the `d` bit // even though the manual doesn't specify it {.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1, @@ -153,35 +162,97 @@ InstFormat inst_formats[] = Instruction parse_instruction(u8* buf) { u8 inst = buf[0]; - InstFormat format; + InstFormat fmt; bool matched_inst = false; // TODO: This might be a good time to learn how to make a hashtable in C for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++) { if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc) { - format = inst_formats[i]; + fmt = inst_formats[i]; matched_inst = true; break; } } if (!matched_inst) return (Instruction){.bytes_read = 0}; + u8_opt d_opt = none_u8(); + u8_opt s_opt = none_u8(); + u8_opt w_opt = none_u8(); + u8_opt reg_opt = none_u8(); + u8_opt mod_opt = none_u8(); + u8_opt rm_opt = none_u8(); + u16_opt data_opt = none_u16(); + u16_opt displacement_opt = none_u16(); + + if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1); + if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1); + if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7); + if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6); + if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w)); + if (fmt.has_reg) + { + u8 reg = fmt.has_operands ? buf[1] : buf[0]; + reg_opt = some_u8(mask_and_shift(reg, fmt.mask_reg)); + } + if (fmt.has_data) + { + u8 idx = 1; + if (fmt.has_operands) idx += 1; + if (fmt.has_displacement) idx += mod_opt.value % 3; + u16 data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[0] : (sbyte)buf[idx]; + data_opt = some_u16(data); + } + if (fmt.has_displacement && mod_opt.value % 3 > 0) + { + u16 disp = mod_opt.value == MODE_MEM_DIS_16 + ? (i16)buf[3] << 8 | buf[2] + : (sbyte)buf[3]; + displacement_opt = some_u16(disp); + } + + u16 bytes_read = 1; + bytes_read += fmt.has_operands ? 1 : 0; + // This is a trick because mod == 1 and mod == 2 will displace one and two bytes + // respectively but mod == 3 wraps to 0 since it doesn't displace + if (fmt.has_displacement) bytes_read += mod_opt.value % 3; + if (fmt.has_data) bytes_read += w_opt.value == 0 ? 1 : 2; return (Instruction) { - .id = format.id, - .name = format.name, - .data = none_u16(), - .displacement = none_u16(), - .w = none_u8(), - .d = none_u8(), - .s = none_u8(), - .mod = none_u8(), - .reg = none_u8(), - .rm = none_u8(), - .bytes_read = 2, + .id = fmt.id, + .name = fmt.name, + .data = data_opt, + .displacement = displacement_opt, + .w = w_opt, + .d = d_opt, + .s = s_opt, + .mod = mod_opt, + .reg = reg_opt, + .rm = rm_opt, + .bytes_read = bytes_read, }; } +void decode_instruction(char* str_buf, Instruction inst) +{ + IF_LET_SOME(u8, mod, inst.mod) + { + if (mod == MODE_RGSTR_MODE) + { + Register reg = registers[(size_t)inst.reg.value]; + Register rm = registers[(size_t)inst.rm.value]; + Register src_reg = inst.d.value == 0 ? reg : rm; + Register dst_reg = inst.d.value == 0 ? rm : reg; + char *src_name = reg_name(src_reg, inst.w.value); + char *dst_name = reg_name(dst_reg, inst.w.value); + sprintf(str_buf, "%s %s, %s ;%d", inst.name, dst_name, src_name, inst.id); + } + } + else + { + sprintf(str_buf, "%s ;%d", inst.name, inst.id); + } +} + bool mov_inst(FILE* f, unsigned char* buf, char inst) { size_t bytes_read; @@ -205,8 +276,6 @@ bool mov_inst(FILE* f, unsigned char* buf, char inst) else { bool is_direct_addr = mod == 0 && rm == 0b110; - // This is a trick because mod == 1 and mod == 2 will displace one and two bytes - // respectively but mod == 3 wraps to 0 since it doesn't displace int bytes_to_read = is_direct_addr ? 2 : mod % 3; bytes_read = fread(buf, sizeof(char), bytes_to_read, f); char* eac_name = is_direct_addr ? "" : get_eac_registers(rm); @@ -381,21 +450,29 @@ int main(int argc, char** argv) printf("; Decoded 8086 Assembly Instructions\n\n"); printf("bits 16\n\n"); + char *inst_str_buf = malloc(sizeof(char) * 256); u32 bytes_processed = 0; while (bytes_processed < bytes_read) { - Instruction inst = parse_instruction(buffer+bytes_processed); - bytes_processed += inst.bytes_read; + Instruction inst = parse_instruction(buffer + bytes_processed); // char inst = buffer[0]; // if (mov_inst(f, buffer, inst)) goto handled; // if (add_inst(f, buffer, inst)) goto handled; - if (inst.name != NULL) - printf("%s ;%d", inst.name, inst.id); + if (inst.bytes_read > 0) + { + decode_instruction(inst_str_buf, inst); + printf("%s", inst_str_buf); + bytes_processed += inst.bytes_read; + } else + { + bytes_processed += 1; fprintf(stderr, "___Unrecognized Instruction___"); + } // handled: printf("\n"); } + free(inst_str_buf); free(buffer); } diff --git a/lib.h b/lib.h index 8323aea..be259a9 100644 --- a/lib.h +++ b/lib.h @@ -23,7 +23,7 @@ enum OptionTag {NONE, SOME}; enum OptionTag tag; \ union { \ char none; \ - type some; \ + type value; \ }; \ } type##_opt; \ \ @@ -34,13 +34,13 @@ enum OptionTag {NONE, SOME}; \ static inline type##_opt some_##type(type value) \ { \ - return (type##_opt){ .tag = SOME, .some = value }; \ + return (type##_opt){ .tag = SOME, .value = value }; \ } \ \ static inline int get_some_##type(type##_opt opt, type* out_value) \ { \ if (opt.tag != SOME) return 0; \ - *out_value = opt.some; \ + *out_value = opt.value; \ return 1; \ }