#include #include #include #include #include #include "lib.h" #include "decode.h" /// Get Effective Address Calculation Registers char* get_eac_register(char rm) { char* reg_name; switch (rm) { case 0b000: reg_name = "bx + si"; break; case 0b001: reg_name = "bx + di"; break; case 0b010: reg_name = "bp + si"; break; case 0b011: reg_name = "bp + di"; break; case 0b100: reg_name = "si"; break; case 0b101: reg_name = "di"; break; case 0b110: reg_name = "bp"; break; case 0b111: reg_name = "bx"; break; default: perror("Invalid R/M value"); exit(1); } return reg_name; } static char* reg_name(Register reg, char wide) { return wide == 1 ? reg.fullname : reg.bytename; } static u8 mask_and_shift(u8 value, u8 mask) { value &= mask; int count = 0; while ((mask & 0x1) == 0 && count < 8) { value >>= 1; mask >>= 1; count++; } return value; } ParsedInstruction parse_instruction_ids(u8* buf) { u8 inst = buf[0]; InstFormat fmt = {0}; bool matched_inst = false; // TODO: This might be a good time to learn how to make a hashtable in C for (u16 i = 0; i < sizeof(inst_funcs) / (sizeof(inst_parser_f)*6*4); i++) for (int j = 0; j < 6; j++) for (int k = 0; k < 4 && inst_funcs[i][j][k] != NULL; k++) { printf("%p\n", (void*)(uptr)inst_funcs[i][j][k]); // Apply inst_func_t } // for (int j = 0; j < 4 || ;) return (ParsedInstruction){0}; if (!matched_inst) return (ParsedInstruction){.bytes_read = 0}; u8_opt d_opt = none_u8(); u8_opt s_opt = none_u8(); u8_opt w_opt = none_u8(); u8_opt reg_opt = none_u8(); u8_opt mod_opt = none_u8(); u8_opt rm_opt = none_u8(); u16_opt data_opt = none_u16(); u16_opt displacement_opt = none_u16(); u8 is_data_addr = false; u16 bytes_read = 1; bytes_read += fmt.has_operands ? 1 : 0; if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1); if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1); if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7); if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6); if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w)); if (fmt.parse_reg.tag == P_REG_MASK) { u8 reg = fmt.has_operands ? buf[1] : buf[0]; reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask)); } else if (fmt.parse_reg.tag == P_REG_FIXED) { reg_opt = some_u8(fmt.parse_reg.fixed); is_data_addr = true; } if (fmt.has_data) { u8 idx = 1; if (fmt.has_operands) idx += 1; // This is a trick because mod == 1 and mod == 2 will displace one and two bytes // respectively but mod == 3 wraps to 0 since it doesn't displace if (fmt.has_displacement) idx += mod_opt.value % 3; u16 data; if (fmt.has_s && s_opt.value == 1) { data = (sbyte)buf[idx]; bytes_read += 1; } else { data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx]; bytes_read += w_opt.value == 0 ? 1 : 2; } data_opt = some_u16(data); } if (fmt.has_displacement && mod_opt.value % 3 > 0) { u16 disp = mod_opt.value == MODE_MEM_DIS_16 ? (i16)buf[3] << 8 | buf[2] : (sbyte)buf[2]; displacement_opt = some_u16(disp); bytes_read += mod_opt.value % 3; } else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6) { displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]); bytes_read += 2; } return (ParsedInstruction) { .id = fmt.id, .name = fmt.name, .data = data_opt, .displacement = displacement_opt, .w = w_opt, .d = d_opt, .s = s_opt, .mod = mod_opt, .reg = reg_opt, .rm = rm_opt, .is_data_addr = is_data_addr, .bytes_read = bytes_read, }; } ParsedInstruction parse_instruction(u8* buf) { u8 inst = buf[0]; InstFormat fmt = {0}; bool matched_inst = false; // TODO: This might be a good time to learn how to make a hashtable in C for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++) { if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc) { fmt = inst_formats[i]; matched_inst = true; break; } } if (!matched_inst) return (ParsedInstruction){.bytes_read = 0}; u8_opt d_opt = none_u8(); u8_opt s_opt = none_u8(); u8_opt w_opt = none_u8(); u8_opt reg_opt = none_u8(); u8_opt mod_opt = none_u8(); u8_opt rm_opt = none_u8(); u16_opt data_opt = none_u16(); u16_opt displacement_opt = none_u16(); u8 is_data_addr = false; u16 bytes_read = 1; bytes_read += fmt.has_operands ? 1 : 0; if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1); if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1); if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7); if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6); if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w)); if (fmt.parse_reg.tag == P_REG_MASK) { u8 reg = fmt.has_operands ? buf[1] : buf[0]; reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask)); } else if (fmt.parse_reg.tag == P_REG_FIXED) { reg_opt = some_u8(fmt.parse_reg.fixed); is_data_addr = true; } if (fmt.has_data) { u8 idx = 1; if (fmt.has_operands) idx += 1; // This is a trick because mod == 1 and mod == 2 will displace one and two bytes // respectively but mod == 3 wraps to 0 since it doesn't displace if (fmt.has_displacement) idx += mod_opt.value % 3; u16 data; if (fmt.has_s && s_opt.value == 1) { data = (sbyte)buf[idx]; bytes_read += 1; } else { data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx]; bytes_read += w_opt.value == 0 ? 1 : 2; } data_opt = some_u16(data); } if (fmt.has_displacement && mod_opt.value % 3 > 0) { u16 disp = mod_opt.value == MODE_MEM_DIS_16 ? (i16)buf[3] << 8 | buf[2] : (sbyte)buf[2]; displacement_opt = some_u16(disp); bytes_read += mod_opt.value % 3; } else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6) { displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]); bytes_read += 2; } return (ParsedInstruction) { .id = fmt.id, .name = fmt.name, .data = data_opt, .displacement = displacement_opt, .w = w_opt, .d = d_opt, .s = s_opt, .mod = mod_opt, .reg = reg_opt, .rm = rm_opt, .is_data_addr = is_data_addr, .bytes_read = bytes_read, }; } Instruction decode_instruction(ParsedInstruction inst) { Operand opr1 , opr2 = {0}; i16 payload = 0; IF_LET_SOME(u8, mod, inst.mod) { IF_LET_SOME(u8, reg, inst.reg) { opr1.tag = OPR_T_REGISTER; opr1.reg.value = registers[(size_t)reg]; opr1.reg.wide = inst.w.value; } else { opr1.tag = OPR_T_IMMEDIATE; opr1.imm.value = inst.data.value; // TODO: This is dumb, we shouldn't do it this way if (inst.s.value == 1) opr1.imm.direct = 0; else opr1.imm.direct = inst.w.value + 1; } if (mod == MODE_RGSTR_MODE) { opr2.tag = OPR_T_REGISTER; opr2.reg.value = registers[(size_t)inst.rm.value]; opr2.reg.wide = inst.w.value; } else if (mod == MODE_MEM_NO_DIS && inst.rm.value == 0x6) { opr2.tag = OPR_T_DIRADDR; opr2.dir_addr.value = inst.displacement.value; } else { opr2.tag = OPR_T_MEMORY; opr2.mem.eac_name = get_eac_register(inst.rm.value); opr2.mem.mode = mod; opr2.mem.displacement = (i16)inst.displacement.value; } } else { IF_LET_SOME(u16, data, inst.data) { if (inst.is_data_addr) { opr1.tag = OPR_T_DIRADDR; opr1.dir_addr.value = (i16)data; } else { opr1.tag = OPR_T_IMMEDIATE; opr1.imm.value = (i16)data; opr1.imm.direct = 0; } } IF_LET_SOME(u8, reg, inst.reg) { opr2.tag = OPR_T_REGISTER; opr2.reg.value = registers[(size_t)reg]; opr2.reg.wide = inst.w.value; } } if (inst.d.tag == SOME && inst.d.value == 1) { Operand temp = opr1; opr1 = opr2; opr2 = temp; } return (Instruction) { .id = inst.id, .data = payload, .operation = inst.name, .src_opr = opr1, .dst_opr = opr2, }; } void get_operand_string(char* str_buf, Operand oprnd) { if (oprnd.tag == OPR_T_REGISTER) { strcpy(str_buf, reg_name(oprnd.reg.value, oprnd.reg.wide)); } else if (oprnd.tag == OPR_T_MEMORY) { char disp_str[16] = {'\0'}; i16 disp = oprnd.mem.displacement; if (disp != 0) sprintf(disp_str, " %s %d", (disp > 0 ? "+" : "-"), abs(disp)); sprintf(str_buf, "[%s%s]", oprnd.mem.eac_name, disp_str); } else if (oprnd.tag == OPR_T_IMMEDIATE) { char *size = ""; if (oprnd.imm.direct > 0) size = oprnd.imm.direct == 1 ? "byte " : "word "; sprintf(str_buf, "%s%d", size, oprnd.imm.value); } else if (oprnd.tag == OPR_T_DIRADDR) { sprintf(str_buf, "[%d]", oprnd.dir_addr.value); } } void get_instr_string(char* str_buf, Instruction inst) { char src_str[32], dst_str[32]; get_operand_string(src_str, inst.src_opr); get_operand_string(dst_str, inst.dst_opr); sprintf(str_buf, "%s %s, %s", inst.operation, dst_str, src_str); } char *memory[65536]; // Keep this global for debugging purposes u16 inst_count = 1; int main(int argc, char** argv) { if (argc < 2) { printf("Usage: Please provide assembled instructions as input\n"); exit(0); } struct stat st; if (stat(argv[1], &st) == -1) { perror("Unable to get file size\n"); return EXIT_FAILURE; } unsigned char* buffer = malloc(st.st_size); if (!buffer) { perror("Unable to allocate memory for binary file"); return EXIT_FAILURE; } FILE *f = fopen(argv[1], "r"); if (!f) { perror("fopen\n"); free(buffer); return EXIT_FAILURE; } size_t bytes_read = fread(buffer, sizeof(unsigned char), st.st_size, f); if (bytes_read != (size_t)st.st_size) { fprintf(stderr, "Read of binary file to memory incomplete.\n"); free(buffer); fclose(f); return EXIT_FAILURE; } fclose(f); printf("; Decoded 8086 Assembly Instructions\n\n"); printf("bits 16\n\n"); char *inst_str_buf = malloc(sizeof(char) * 256); u32 bytes_processed = 0; while (bytes_processed < bytes_read) { ParsedInstruction _ = parse_instruction_ids(buffer + bytes_processed); (void)_; ParsedInstruction parsed = parse_instruction(buffer + bytes_processed); if (parsed.bytes_read > 0) { Instruction inst = decode_instruction(parsed); get_instr_string(inst_str_buf, inst); bytes_processed += parsed.bytes_read; // printf("%s %d/%ld", inst_str_buf, bytes_processed, bytes_read); printf("%s", inst_str_buf); int len = strlen(inst_str_buf); for (int i = 0; i < 32 - len; i++) printf(" "); printf("; %d, %d", inst_count++, inst.id); } else { bytes_processed += 1; fprintf(stderr, "___Unrecognized Instruction___"); } // char inst = buffer[0]; // if (mov_inst(f, buffer, inst)) goto handled; // if (add_inst(f, buffer, inst)) goto handled; // handled: printf("\n"); } free(inst_str_buf); free(buffer); return 0; }