#include #include #include "lib.h" #include #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) enum InstructionType { INST_MOV_REG_REG = 0b10001000, // INST_MOV_REG_REG = 0b10001000, // INST_MOV_REG_REG = 0b10001000, }; enum Mode { MODE_MEM_NO_DIS = 0b00, MODE_MEM_DIS_08 = 0b01, MODE_MEM_DIS_16 = 0b10, MODE_RGSTR_MODE = 0b11, }; typedef struct Register { char code; char* fullname; char* bytename; union { struct { char low; char high; }; u16 full; } value; } Register; Register registers[8] = { {.code = 0b000, .fullname = "ax", .bytename = "al"}, {.code = 0b001, .fullname = "cx", .bytename = "cl"}, {.code = 0b010, .fullname = "dx", .bytename = "dl"}, {.code = 0b011, .fullname = "bx", .bytename = "bl"}, {.code = 0b100, .fullname = "sp", .bytename = "ah"}, {.code = 0b101, .fullname = "bp", .bytename = "ch"}, {.code = 0b110, .fullname = "si", .bytename = "dh"}, {.code = 0b111, .fullname = "di", .bytename = "bh"}, }; typedef struct Instruction { u16 id; char *name; u16_opt data; u16_opt displacement; u8_opt w; u8_opt d; u8_opt s; u8_opt mod; u8_opt reg; u8_opt rm; u8_opt SR; u8 bytes_read; } Instruction; char *memory[65536]; /// Get Effective Address Calculation Registers char* get_eac_registers(char rm) { char* reg_name; switch (rm) { case 0b000: reg_name = "bx + si"; break; case 0b001: reg_name = "bx + di"; break; case 0b010: reg_name = "bp + si"; break; case 0b011: reg_name = "bp + di"; break; case 0b100: reg_name = "si"; break; case 0b101: reg_name = "di"; break; case 0b110: reg_name = "bp"; break; case 0b111: reg_name = "bx"; break; default: perror("Invalid R/M value"); exit(1); } return reg_name; } static char* reg_name(Register reg, char wide) { return wide == 1 ? reg.fullname : reg.bytename; } static i16 get_data(unsigned char* buf, char wide) { // Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0]; } #define IS_INST(value, m, inst) ((value & ~m) == (char)inst) typedef struct InstMask {char *name; u8 mask; u8 code;} InstMask; typedef struct InstFormat { u16 id; char *name; u8 inst_enc; u8 mask_inst; u8 mask_w; u64 mask_reg; bool has_operands; bool has_displacement; bool has_data; bool has_d; bool has_w; bool has_reg; bool has_mod; bool has_rm; bool has_s; bool has_SR; } InstFormat; InstFormat inst_formats[] = { //////// // MOV //////// // Register/memory to/from register {.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .has_operands=true, .has_displacement=true, .has_d=true, .has_w=true, .has_reg=true, .has_mod=true, .has_rm=true}, // Immediate to register/memory {.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true, .has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true}, // Immediate to register {.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8, .mask_reg=0x0b00000111, .has_data=true, .has_w=true}, // Memory to accumulator | Accumulator to memory using the `d` bit // even though the manual doesn't specify it {.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1, .has_data=true, .has_w=true, .has_d=true}, // Register/memory to segment register and inverse using the `d` bit {.id=5, .name="mov", .inst_enc=0b10001100, .mask_inst=0x3, .has_SR=true, .has_d=true, .has_displacement=true, .has_mod=true, .has_rm=true}, //////// // ADD //////// // Reg/memory with register or either {.id=6, .name="add", .inst_enc=0b00000000, .mask_inst=0x3, .mask_w=0x1, .has_operands=true, .has_displacement=true, .has_w=true, .has_d=true, .has_reg=true, .has_mod=true, .has_rm=true}, // Immediate to register/memory {.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true, .has_operands=true, .has_displacement=true, .has_data=true, .has_mod=true, .has_rm=true}, {.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1, .has_data=true, .has_w=true}, }; Instruction parse_instruction(u8* buf) { u8 inst = buf[0]; InstFormat format; bool matched_inst = false; // TODO: This might be a good time to learn how to make a hashtable in C for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++) { if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc) { format = inst_formats[i]; matched_inst = true; break; } } if (!matched_inst) return (Instruction){.bytes_read = 0}; return (Instruction) { .id = format.id, .name = format.name, .data = none_u16(), .displacement = none_u16(), .w = none_u8(), .d = none_u8(), .s = none_u8(), .mod = none_u8(), .reg = none_u8(), .rm = none_u8(), .bytes_read = 2, }; } bool mov_inst(FILE* f, unsigned char* buf, char inst) { size_t bytes_read; // Register/memory to/from register if ((inst & ~0x3) == (char)0b10001000) { // TODO: We should add some form of error handling here bytes_read = fread(buf, sizeof(char), 1, f); char next_byte = buf[0]; char w = inst & 0b00000001; char d = (inst & 0b00000010) >> 1; char mod = (next_byte & 0b11000000) >> 6; char reg = (next_byte & 0b00111000) >> 3; char rm = (next_byte & 0b00000111); if (mod == MODE_RGSTR_MODE) { Register src_reg = d == 0 ? registers[(size_t)reg] : registers[(size_t)rm]; Register dst_reg = d == 0 ? registers[(size_t)rm] : registers[(size_t)reg]; printf("mov %s, %s ;0", reg_name(dst_reg, w), reg_name(src_reg, w)); } else { bool is_direct_addr = mod == 0 && rm == 0b110; // This is a trick because mod == 1 and mod == 2 will displace one and two bytes // respectively but mod == 3 wraps to 0 since it doesn't displace int bytes_to_read = is_direct_addr ? 2 : mod % 3; bytes_read = fread(buf, sizeof(char), bytes_to_read, f); char* eac_name = is_direct_addr ? "" : get_eac_registers(rm); char disp_buf[16] = {'\0'}; if (bytes_to_read > 0) { i16 disp = get_data(buf, bytes_to_read - 1); if (is_direct_addr) sprintf(disp_buf, "%d", abs(disp)); else sprintf(disp_buf, " %s %d", disp >= 0 ? "+" : "-", abs(disp)); } Register rgstr = registers[(size_t)reg]; if (d) printf("mov %s, [%s%s] ;1", reg_name(rgstr, w), eac_name, disp_buf); else printf("mov [%s%s], %s ;2", eac_name, disp_buf, reg_name(rgstr, w)); } } // Immediate to register/memory else if ((inst & ~0x1) == (char)0b11000110) { bytes_read = fread(buf, sizeof(char), 1, f); char w = inst & 0b00000001; char mod = (buf[0] & 0b11000000) >> 6; char rm = (buf[0] & 0b00000111); int bytes_to_read = 1; bytes_to_read += w == 0 ? 0 : 1; // Same trick from earlier, see comment bytes_to_read += mod % 3; bytes_read = fread(buf, sizeof(char), bytes_to_read, f); char *eac_name = get_eac_registers(rm); i16 data = get_data(buf + (char)bytes_to_read - (w == 0 ? 1 : 2), w); char *word_str = w == 0 ? "byte" : "word"; char disp_str[16] = {'\0'}; if (mod % 3 > 1) sprintf(disp_str, " + %d", get_data(buf, (mod % 3) - 1)); printf("mov [%s%s], %s %d ;3", eac_name, disp_str, word_str, data); } // Immediate to register else if ((inst & ~0xF) == (char)0b10110000) { char w = (inst & 0b00001000) >> 3; Register reg = registers[(size_t)inst & 0b00000111]; char bytes_to_read = w == 1 ? 2 : 1; bytes_read = fread(buf, sizeof(char), bytes_to_read, f); printf("mov %s, %d ;4", reg_name(reg, w), get_data(buf, w)); } // Memory/accumulator to accumulator/memory else if ((inst & ~0x3) == (char)0b10100000) { // This instruction uses AX/AL register exclusively Register ax_al = registers[0]; char w = (inst & 0b00000001); // The manual doesn't refer to this as `d` but it acts similarly in that this bit // swaps the accumulator's src/dst position char d = (inst & 0b00000010) >> 1; char bytes_to_read = w == 1 ? 2 : 1; bytes_read = fread(buf, sizeof(char), bytes_to_read, f); if (d) printf("mov [%d], %s ;5", get_data(buf, w), reg_name(ax_al, w)); else printf("mov %s, [%d] ;6", reg_name(ax_al, w), get_data(buf, w)); } // Register/memory to segment register or segment register to register/memory else if ((inst & ~0x3) == (char)0b10001100) { // Manual doesn't refer to this as `d` but swaps like in the previous instruction char d = (inst & 0b00000010) >> 1; (void)d; printf("mov regmem to segreg"); } else { return false; } return bytes_read > 0; } bool add_inst(FILE* f, unsigned char* buf, char inst) { size_t bytes_read; if ((inst & ~0x3) == (char)0b00000000) { bytes_read = fread(buf, sizeof(char), 1, f); char next_byte = buf[0]; char w = inst & 0b00000001; char d = (inst & 0b00000010) >> 1; char mod = (next_byte & 0b11000000) >> 6; char reg = (next_byte & 0b00111000) >> 3; char rm = (next_byte & 0b00000111); // Same trick from earlier, see comment int bytes_to_read = mod % 3; if (bytes_to_read > 0) bytes_read = fread(buf, sizeof(char), bytes_to_read, f); Register rgstr = registers[(size_t)reg]; (void)rm; if (mod == MODE_RGSTR_MODE) { if (d) printf("add %s, [%d] ;7", reg_name(rgstr, w), get_data(buf, w)); else printf("add [%d], %s ;8", get_data(buf, w), reg_name(rgstr, w)); } else if (mod == MODE_MEM_NO_DIS) { if (d) printf("add %s, [%s] ;9", reg_name(rgstr, w), get_eac_registers(rm)); else printf("add [%s], %s ;10", get_eac_registers(rm), reg_name(rgstr, w)); } else { if (d) printf("add %s, [%s] ;11", reg_name(rgstr, w), get_eac_registers(rm)); else printf("add [%s], %s ;12", get_eac_registers(rm), reg_name(rgstr, w)); } } else if ((inst & ~0x3) == (char)0b10000000) { bytes_read = fread(buf, sizeof(char), 1, f); char w = inst & 0b00000001; char mod = (buf[0] & 0b11000000) >> 6; char rm = (buf[0] & 0b00000111); int bytes_to_read = 1; bytes_to_read += w == 0 ? 1 : 2; // Same trick from earlier, see comment bytes_to_read += mod % 3; bytes_read = fread(buf, sizeof(char), bytes_to_read, f); char *eac_name = get_eac_registers(rm); i16 data = get_data(buf + (char)bytes_to_read - (w == 0 ? 1 : 2), w); char *word_str = w == 0 ? "byte" : "word"; char disp_str[16] = {'\0'}; if (mod % 3 > 1) sprintf(disp_str, " + %d", get_data(buf, (mod % 3) - 1)); printf("add [%s%s], %s %d ;13", eac_name, disp_str, word_str, data); } else { return false; } return bytes_read > 0; } int main(int argc, char** argv) { if (argc < 2) { printf("Usage: Please provide assembled instructions as input\n"); exit(0); } struct stat st; if (stat(argv[1], &st) == -1) { perror("Unable to get file size\n"); return EXIT_FAILURE; } unsigned char* buffer = malloc(st.st_size); if (!buffer) { perror("Unable to allocate memory for binary file"); return EXIT_FAILURE; } FILE *f = fopen(argv[1], "r"); if (!f) { perror("fopen\n"); free(buffer); return EXIT_FAILURE; } size_t bytes_read = fread(buffer, sizeof(unsigned char), st.st_size, f); if (bytes_read != (size_t)st.st_size) { fprintf(stderr, "Read of binary file to memory incomplete.\n"); free(buffer); fclose(f); return EXIT_FAILURE; } fclose(f); printf("; Decoded 8086 Assembly Instructions\n\n"); printf("bits 16\n\n"); u32 bytes_processed = 0; while (bytes_processed < bytes_read) { Instruction inst = parse_instruction(buffer+bytes_processed); bytes_processed += inst.bytes_read; // char inst = buffer[0]; // if (mov_inst(f, buffer, inst)) goto handled; // if (add_inst(f, buffer, inst)) goto handled; if (inst.name != NULL) printf("%s ;%d", inst.name, inst.id); else fprintf(stderr, "___Unrecognized Instruction___"); // handled: printf("\n"); } free(buffer); }