#include #include #include "lib.h" #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) enum Instruction { INST_MOV_REG_REG = 0b10001000, // INST_MOV_REG_REG = 0b10001000, // INST_MOV_REG_REG = 0b10001000, }; enum Mode { MODE_MEM_NO_DIS = 0b00, MODE_MEM_DIS_08 = 0b01, MODE_MEM_DIS_16 = 0b10, MODE_RGSTR_MODE = 0b11, }; typedef struct Register { char code; char* fullname; char* bytename; union { struct { char low; char high; }; u16 full; } value; } Register; Register registers[8] = { {.code = 0b000, .fullname = "ax", .bytename = "al"}, {.code = 0b001, .fullname = "cx", .bytename = "cl"}, {.code = 0b010, .fullname = "dx", .bytename = "dl"}, {.code = 0b011, .fullname = "bx", .bytename = "bl"}, {.code = 0b100, .fullname = "sp", .bytename = "ah"}, {.code = 0b101, .fullname = "bp", .bytename = "ch"}, {.code = 0b110, .fullname = "si", .bytename = "dh"}, {.code = 0b111, .fullname = "di", .bytename = "bh"}, }; char* memory[65536]; // void inst_mov_rgmm_reg() /// Get Effective Address Calculation Registers char* get_eac_registers(char rm) { char* reg_name; switch (rm) { case 0b000: reg_name = "bx + si"; break; case 0b001: reg_name = "bx + di"; break; case 0b010: reg_name = "bp + si"; break; case 0b011: reg_name = "bp + di"; break; case 0b100: reg_name = "si"; break; case 0b101: reg_name = "di"; break; case 0b110: reg_name = "bp"; break; case 0b111: reg_name = "bx"; break; default: perror("Invalid R/M value"); exit(1); } return reg_name; } static inline char* reg_name(Register reg, char wide) { return wide == 1 ? reg.fullname : reg.bytename; } static inline i16 get_data(unsigned char* buf, char wide) { // Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0]; } int main(int argc, char** argv) { if (argc < 2) { printf("Usage: Please provide assembled instructions as input\n"); exit(0); } unsigned char buf[256]; FILE *f = fopen(argv[1], "r"); if (!f) { perror("fopen\n"); return EXIT_FAILURE; } size_t bytes_read; printf("; Decoded 8086 Assembly Instructions\n\n"); printf("bits 16\n\n"); while ((bytes_read = fread(buf, sizeof(char), 1, f)) > 0) { char inst = buf[0]; // Instruction instruction = 0; // Register/memory to/from register if ((inst & ~0x3) == (char)0b10001000) { // TODO: We should add some form of error handling here bytes_read = fread(buf, sizeof(char), 1, f); char next_byte = buf[0]; char w = inst & 0b00000001; char d = (inst & 0b00000010) >> 1; char mod = (next_byte & 0b11000000) >> 6; char reg = (next_byte & 0b00111000) >> 3; char rm = (next_byte & 0b00000111); size_t reg_idx = reg; size_t rm_idx = rm; if (mod == MODE_RGSTR_MODE) { Register src_reg = d == 0 ? registers[reg_idx] : registers[rm_idx]; Register dst_reg = d == 0 ? registers[rm_idx] : registers[reg_idx]; printf("mov %s, %s ;0", reg_name(dst_reg, w), reg_name(src_reg, w)); } else { // This is a trick because mod == 1 and mod == 2 will displace one and two bytes // respectively but mod == 3 wraps to 0 since it doesn't displace bool is_direct_addr = mod == 0 && rm == 0b110; int bytes_to_read = is_direct_addr ? 2 : mod % 3; bytes_read = fread(buf, sizeof(char), bytes_to_read, f); char* eac_name = is_direct_addr ? "" : get_eac_registers(rm); char disp_buf[16] = {'\0'}; if (bytes_to_read > 0) { i16 disp = get_data(buf, bytes_to_read - 1); if (is_direct_addr) sprintf(disp_buf, "%d", abs(disp)); else sprintf(disp_buf, " %s %d", disp >= 0 ? "+" : "-", abs(disp)); } Register rgstr = registers[reg_idx]; if (d) printf("mov %s, [%s%s] ;1", reg_name(rgstr, w), eac_name, disp_buf); else printf("mov [%s%s], %s ;2", eac_name, disp_buf, reg_name(rgstr, w)); } } // Immediate to register/memory else if ((inst & ~0x1) == (char)0b11000110) { char w = inst & 0b00000001; bytes_read = fread(buf, sizeof(char), 1, f); char mod = (buf[0] & 0b11000000) >> 6; char rm = (buf[0] & 0b00000111); int bytes_to_read = 1; bytes_to_read += w == 0 ? 0 : 1; // Same trick from earlier, see comment bytes_to_read += mod % 3; bytes_read = fread(buf, sizeof(char), bytes_to_read, f); char *eac_name = get_eac_registers(rm); char *data_ptr = (char*)buf + (char)bytes_to_read - (w == 0 ? 1 : 2); i16 data = w == 0 ? data_ptr[0] : (i16)data_ptr[1] << 8 | data_ptr[0]; char *word = w == 0 ? "byte" : "word"; char disp_str[16]; if (mod > 0 && mod < 3) { if (mod == 1) { sprintf(disp_str, " + %d", buf[1]); } else { i16 disp = (i16)buf[1] << 8 | buf[0]; sprintf(disp_str, " + %d", disp); } } else { disp_str[0] = '\0'; } printf("mov [%s%s], %s %d ;w %d mod %d rm %d", eac_name, disp_str, word, data, w, mod, rm); } // Immediate to register else if ((inst & ~0xF) == (char)0b10110000) { char w = (inst & 0b00001000) >> 3; Register reg = registers[(size_t)inst & 0b00000111]; char bytes_to_read = w == 1 ? 2 : 1; bytes_read = fread(buf, sizeof(char), bytes_to_read, f); printf("mov %s, %hd ; Immediate to register", reg_name(reg, w), get_data(buf, w)); } // Memory to accumulator else if ((inst & ~0x1) == (char)0b10100000) { printf("mov mem to acc"); } // Accumulator to memory else if ((inst & ~0x1) == (char)0b10100010) { printf("mov acc to mem"); } // Register/memory to segment register else if (inst == (char)0b10001110) { printf("mov regmem to segreg"); } // Segment register to register/memory else if (inst == (char)0b10001100) { printf("mov segreg to regmem"); } else { fprintf(stderr, "Unrecognized Instruction"); } printf("\n"); } }