diff --git a/.gitignore b/.gitignore index 46c75fa..2bef33f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ /8086_family_Users_Manual_1_.pdf /decoder8086 /performance-aware +/sim8086 diff --git a/LICENSE_CE b/asm_files/LICENSE_CE similarity index 100% rename from LICENSE_CE rename to asm_files/LICENSE_CE diff --git a/decode.c b/decode.c deleted file mode 100644 index 469573c..0000000 --- a/decode.c +++ /dev/null @@ -1,431 +0,0 @@ -#include -#include -#include -#include -#include -#include "lib.h" -#include "decode.h" - -/// Get Effective Address Calculation Registers -char* get_eac_register(char rm) -{ - char* reg_name; - switch (rm) - { - case 0b000: reg_name = "bx + si"; break; - case 0b001: reg_name = "bx + di"; break; - case 0b010: reg_name = "bp + si"; break; - case 0b011: reg_name = "bp + di"; break; - case 0b100: reg_name = "si"; break; - case 0b101: reg_name = "di"; break; - case 0b110: reg_name = "bp"; break; - case 0b111: reg_name = "bx"; break; - default: perror("Invalid R/M value"); exit(1); - } - return reg_name; -} - -static char* reg_name(Register reg, char wide) -{ - return wide == 1 ? reg.fullname : reg.bytename; -} - -static u8 mask_and_shift(u8 value, u8 mask) -{ - value &= mask; - int count = 0; - while ((mask & 0x1) == 0 && count < 8) - { - value >>= 1; - mask >>= 1; - count++; - } - return value; -} - -ParsedInstruction parse_instruction_ids(u8* buf) -{ - u8 inst = buf[0]; - InstFormat fmt = {0}; - bool matched_inst = false; - // TODO: This might be a good time to learn how to make a hashtable in C - for (u16 i = 0; i < sizeof(inst_funcs) / (sizeof(inst_parser_f)*6*4); i++) - for (int j = 0; j < 6; j++) - for (int k = 0; k < 4 && inst_funcs[i][j][k] != NULL; k++) - { - printf("%p\n", (void*)(uptr)inst_funcs[i][j][k]); - // Apply inst_func_t - } - // for (int j = 0; j < 4 || ;) - return (ParsedInstruction){0}; - if (!matched_inst) - return (ParsedInstruction){.bytes_read = 0}; - u8_opt d_opt = none_u8(); - u8_opt s_opt = none_u8(); - u8_opt w_opt = none_u8(); - u8_opt reg_opt = none_u8(); - u8_opt mod_opt = none_u8(); - u8_opt rm_opt = none_u8(); - u16_opt data_opt = none_u16(); - u16_opt displacement_opt = none_u16(); - u8 is_data_addr = false; - - u16 bytes_read = 1; - bytes_read += fmt.has_operands ? 1 : 0; - - if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1); - if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1); - if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7); - if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6); - if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w)); - if (fmt.parse_reg.tag == P_REG_MASK) - { - u8 reg = fmt.has_operands ? buf[1] : buf[0]; - reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask)); - } - else if (fmt.parse_reg.tag == P_REG_FIXED) - { - reg_opt = some_u8(fmt.parse_reg.fixed); - is_data_addr = true; - } - if (fmt.has_data) - { - u8 idx = 1; - if (fmt.has_operands) idx += 1; - // This is a trick because mod == 1 and mod == 2 will displace one and two bytes - // respectively but mod == 3 wraps to 0 since it doesn't displace - if (fmt.has_displacement) idx += mod_opt.value % 3; - u16 data; - if (fmt.has_s && s_opt.value == 1) - { - data = (sbyte)buf[idx]; - bytes_read += 1; - } - else - { - data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx]; - bytes_read += w_opt.value == 0 ? 1 : 2; - } - data_opt = some_u16(data); - } - if (fmt.has_displacement && mod_opt.value % 3 > 0) - { - u16 disp = mod_opt.value == MODE_MEM_DIS_16 - ? (i16)buf[3] << 8 | buf[2] - : (sbyte)buf[2]; - displacement_opt = some_u16(disp); - bytes_read += mod_opt.value % 3; - } - else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6) - { - displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]); - bytes_read += 2; - } - - return (ParsedInstruction) { - .id = fmt.id, - .name = fmt.name, - .data = data_opt, - .displacement = displacement_opt, - .w = w_opt, - .d = d_opt, - .s = s_opt, - .mod = mod_opt, - .reg = reg_opt, - .rm = rm_opt, - .is_data_addr = is_data_addr, - .bytes_read = bytes_read, - }; -} -ParsedInstruction parse_instruction(u8* buf) -{ - u8 inst = buf[0]; - InstFormat fmt = {0}; - bool matched_inst = false; - // TODO: This might be a good time to learn how to make a hashtable in C - for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++) - { - if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc) - { - fmt = inst_formats[i]; - matched_inst = true; - break; - } - } - if (!matched_inst) - return (ParsedInstruction){.bytes_read = 0}; - u8_opt d_opt = none_u8(); - u8_opt s_opt = none_u8(); - u8_opt w_opt = none_u8(); - u8_opt reg_opt = none_u8(); - u8_opt mod_opt = none_u8(); - u8_opt rm_opt = none_u8(); - u16_opt data_opt = none_u16(); - u16_opt displacement_opt = none_u16(); - u8 is_data_addr = false; - - u16 bytes_read = 1; - bytes_read += fmt.has_operands ? 1 : 0; - - if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1); - if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1); - if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7); - if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6); - if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w)); - if (fmt.parse_reg.tag == P_REG_MASK) - { - u8 reg = fmt.has_operands ? buf[1] : buf[0]; - reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask)); - } - else if (fmt.parse_reg.tag == P_REG_FIXED) - { - reg_opt = some_u8(fmt.parse_reg.fixed); - is_data_addr = true; - } - if (fmt.has_data) - { - u8 idx = 1; - if (fmt.has_operands) idx += 1; - // This is a trick because mod == 1 and mod == 2 will displace one and two bytes - // respectively but mod == 3 wraps to 0 since it doesn't displace - if (fmt.has_displacement) idx += mod_opt.value % 3; - u16 data; - if (fmt.has_s && s_opt.value == 1) - { - data = (sbyte)buf[idx]; - bytes_read += 1; - } - else - { - data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx]; - bytes_read += w_opt.value == 0 ? 1 : 2; - } - data_opt = some_u16(data); - } - if (fmt.has_displacement && mod_opt.value % 3 > 0) - { - u16 disp = mod_opt.value == MODE_MEM_DIS_16 - ? (i16)buf[3] << 8 | buf[2] - : (sbyte)buf[2]; - displacement_opt = some_u16(disp); - bytes_read += mod_opt.value % 3; - } - else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6) - { - displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]); - bytes_read += 2; - } - - return (ParsedInstruction) { - .id = fmt.id, - .name = fmt.name, - .data = data_opt, - .displacement = displacement_opt, - .w = w_opt, - .d = d_opt, - .s = s_opt, - .mod = mod_opt, - .reg = reg_opt, - .rm = rm_opt, - .is_data_addr = is_data_addr, - .bytes_read = bytes_read, - }; -} - -Instruction decode_instruction(ParsedInstruction inst) -{ - Operand opr1 , opr2 = {0}; - i16 payload = 0; - - IF_LET_SOME(u8, mod, inst.mod) - { - IF_LET_SOME(u8, reg, inst.reg) - { - opr1.tag = OPR_T_REGISTER; - opr1.reg.value = registers[(size_t)reg]; - opr1.reg.wide = inst.w.value; - } - else - { - opr1.tag = OPR_T_IMMEDIATE; - opr1.imm.value = inst.data.value; - // TODO: This is dumb, we shouldn't do it this way - if (inst.s.value == 1) opr1.imm.direct = 0; - else opr1.imm.direct = inst.w.value + 1; - } - if (mod == MODE_RGSTR_MODE) - { - opr2.tag = OPR_T_REGISTER; - opr2.reg.value = registers[(size_t)inst.rm.value]; - opr2.reg.wide = inst.w.value; - } - else if (mod == MODE_MEM_NO_DIS && inst.rm.value == 0x6) - { - opr2.tag = OPR_T_DIRADDR; - opr2.dir_addr.value = inst.displacement.value; - } - else - { - opr2.tag = OPR_T_MEMORY; - opr2.mem.eac_name = get_eac_register(inst.rm.value); - opr2.mem.mode = mod; - opr2.mem.displacement = (i16)inst.displacement.value; - } - } - else - { - IF_LET_SOME(u16, data, inst.data) - { - if (inst.is_data_addr) - { - opr1.tag = OPR_T_DIRADDR; - opr1.dir_addr.value = (i16)data; - } - else - { - opr1.tag = OPR_T_IMMEDIATE; - opr1.imm.value = (i16)data; - opr1.imm.direct = 0; - } - } - IF_LET_SOME(u8, reg, inst.reg) - { - opr2.tag = OPR_T_REGISTER; - opr2.reg.value = registers[(size_t)reg]; - opr2.reg.wide = inst.w.value; - } - } - if (inst.d.tag == SOME && inst.d.value == 1) - { - Operand temp = opr1; - opr1 = opr2; - opr2 = temp; - } - return (Instruction) { - .id = inst.id, - .data = payload, - .operation = inst.name, - .src_opr = opr1, - .dst_opr = opr2, - }; -} - -void get_operand_string(char* str_buf, Operand oprnd) -{ - if (oprnd.tag == OPR_T_REGISTER) - { - strcpy(str_buf, reg_name(oprnd.reg.value, oprnd.reg.wide)); - } - else if (oprnd.tag == OPR_T_MEMORY) - { - char disp_str[16] = {'\0'}; - i16 disp = oprnd.mem.displacement; - if (disp != 0) - sprintf(disp_str, " %s %d", (disp > 0 ? "+" : "-"), abs(disp)); - sprintf(str_buf, "[%s%s]", oprnd.mem.eac_name, disp_str); - } - else if (oprnd.tag == OPR_T_IMMEDIATE) - { - char *size = ""; - if (oprnd.imm.direct > 0) - size = oprnd.imm.direct == 1 ? "byte " : "word "; - sprintf(str_buf, "%s%d", size, oprnd.imm.value); - } - else if (oprnd.tag == OPR_T_DIRADDR) - { - sprintf(str_buf, "[%d]", oprnd.dir_addr.value); - } -} - -void get_instr_string(char* str_buf, Instruction inst) -{ - char src_str[32], dst_str[32]; - get_operand_string(src_str, inst.src_opr); - get_operand_string(dst_str, inst.dst_opr); - sprintf(str_buf, "%s %s, %s", inst.operation, dst_str, src_str); -} - -char *memory[65536]; -// Keep this global for debugging purposes -u16 inst_count = 1; - -int main(int argc, char** argv) -{ - if (argc < 2) - { - printf("Usage: Please provide assembled instructions as input\n"); - exit(0); - } - - struct stat st; - if (stat(argv[1], &st) == -1) - { - perror("Unable to get file size\n"); - return EXIT_FAILURE; - } - - unsigned char* buffer = malloc(st.st_size); - if (!buffer) - { - perror("Unable to allocate memory for binary file"); - return EXIT_FAILURE; - } - - FILE *f = fopen(argv[1], "r"); - if (!f) - { - perror("fopen\n"); - free(buffer); - return EXIT_FAILURE; - } - - size_t bytes_read = fread(buffer, sizeof(unsigned char), st.st_size, f); - if (bytes_read != (size_t)st.st_size) - { - fprintf(stderr, "Read of binary file to memory incomplete.\n"); - free(buffer); - fclose(f); - return EXIT_FAILURE; - } - - fclose(f); - - printf("; Decoded 8086 Assembly Instructions\n\n"); - printf("bits 16\n\n"); - - char *inst_str_buf = malloc(sizeof(char) * 256); - u32 bytes_processed = 0; - while (bytes_processed < bytes_read) - { - ParsedInstruction _ = parse_instruction_ids(buffer + bytes_processed); - (void)_; - ParsedInstruction parsed = parse_instruction(buffer + bytes_processed); - - if (parsed.bytes_read > 0) - { - Instruction inst = decode_instruction(parsed); - get_instr_string(inst_str_buf, inst); - bytes_processed += parsed.bytes_read; - // printf("%s %d/%ld", inst_str_buf, bytes_processed, bytes_read); - printf("%s", inst_str_buf); - int len = strlen(inst_str_buf); - for (int i = 0; i < 32 - len; i++) - printf(" "); - printf("; %d, %d", inst_count++, inst.id); - } - else - { - bytes_processed += 1; - fprintf(stderr, "___Unrecognized Instruction___"); - } - // char inst = buffer[0]; - // if (mov_inst(f, buffer, inst)) goto handled; - // if (add_inst(f, buffer, inst)) goto handled; - // handled: - printf("\n"); - } - free(inst_str_buf); - free(buffer); - - return 0; -} diff --git a/decode.h b/decode.h deleted file mode 100644 index a21b2d2..0000000 --- a/decode.h +++ /dev/null @@ -1,223 +0,0 @@ -#include "lib.h" - -enum Mode -{ - MODE_MEM_NO_DIS = 0b00, - MODE_MEM_DIS_08 = 0b01, - MODE_MEM_DIS_16 = 0b10, - MODE_RGSTR_MODE = 0b11, -}; - -typedef struct Register -{ - char* fullname; - char* bytename; - union { - struct { - char low; - char high; - }; - u16 full; - } value; - u8 code; -} Register; - -Register registers[8] = { - {.code = 0b000, .fullname = "ax", .bytename = "al"}, - {.code = 0b001, .fullname = "cx", .bytename = "cl"}, - {.code = 0b010, .fullname = "dx", .bytename = "dl"}, - {.code = 0b011, .fullname = "bx", .bytename = "bl"}, - {.code = 0b100, .fullname = "sp", .bytename = "ah"}, - {.code = 0b101, .fullname = "bp", .bytename = "ch"}, - {.code = 0b110, .fullname = "si", .bytename = "dh"}, - {.code = 0b111, .fullname = "di", .bytename = "bh"}, -}; - -enum OperandType {OPR_T_MEMORY, OPR_T_REGISTER, OPR_T_IMMEDIATE, OPR_T_DIRADDR}; - -typedef struct Operand -{ - enum OperandType tag; - union { - struct Mem { - char *eac_name; - i16 displacement; - u8 mode; - } mem; - struct Reg { - Register value; - bool wide; - } reg; - struct Imm { - i16 value; - u8 direct; - } imm; - struct DirAddr { - i16 value; - } dir_addr; - }; -} Operand; - -enum ParseRegType { P_REG_NONE, P_REG_MASK, P_REG_FIXED }; -typedef struct ParseReg -{ - enum ParseRegType tag; - union { - u8 none; - u8 mask; - u8 fixed; - }; -} ParseReg; - -typedef struct InstFormat -{ - u16 id; - char *name; - ParseReg parse_reg; - u8 inst_enc; - u8 mask_inst; - u8 mask_w; - bool has_operands; - bool has_displacement; - bool has_data; - bool has_d; - bool has_w; - bool has_mod; - bool has_rm; - bool has_s; - bool has_SR; -} InstFormat; - -typedef struct ParsedInstruction -{ - u16 id; - char *name; - u16_opt data; - u16_opt displacement; - u8_opt w; - u8_opt d; - u8_opt s; - u8_opt mod; - u8_opt reg; - u8_opt rm; - u8_opt SR; - u8 is_data_addr; - u8 bytes_read; -} ParsedInstruction; - -typedef struct Instruction -{ - Operand src_opr; - Operand dst_opr; - i16 data; - char *operation; - u16 id; -} Instruction; - -enum InstructionIdentifier -{ - _PREFIX_2, - _PREFIX_3, - _PREFIX_6, - _NAME, - _D, - _W, - _S, - _MOD, - _REGISTER, - _ACC, - _RM, - _DISP_LO, - _DISP_HI, - _DATA_W0, - _DATA_W1, -} InstructionIdentifier; - -typedef struct ParsedInst -{ - u64 progress; - u8 something; -} ParsedInst; - -typedef ParsedInst (*inst_parser_f)(ParsedInst); - -ParsedInst pre_2(ParsedInst pi) {return pi;} -ParsedInst pre_3(ParsedInst pi) {return pi;} -ParsedInst pre_6(ParsedInst pi) {return pi;} -ParsedInst name(ParsedInst pi) {return pi;} -ParsedInst reg(ParsedInst pi) {return pi;} -ParsedInst w(ParsedInst pi) {return pi;} -ParsedInst d(ParsedInst pi) {return pi;} -ParsedInst s(ParsedInst pi) {return pi;} - -ParsedInst mod(ParsedInst pi) {return pi;} -ParsedInst inst(ParsedInst pi) {return pi;} -ParsedInst rm(ParsedInst pi) {return pi;} - -ParsedInst disp_lo(ParsedInst pi) {return pi;} -ParsedInst disp_hi(ParsedInst pi) {return pi;} - -ParsedInst data_w0(ParsedInst pi) {return pi;} -ParsedInst data_w1(ParsedInst pi) {return pi;} - -inst_parser_f inst_funcs[][6][4] = -{ - {{pre_2, name, d, w}, {mod, reg, rm}, {disp_lo}, {disp_hi}}, - {{pre_6, s, w}, {mod, name, rm}, {disp_lo}, {disp_hi}, {data_w0}, {data_w1}}, - {{pre_6, w}, {data_w0}, {data_w1}}, -}; - -enum InstructionIdentifier inst_ids[][6][4] = -{ - {{_PREFIX_2, _NAME, _D, _W}, {_MOD, _REGISTER, _RM}, {_DISP_LO}, {_DISP_HI}}, - {{_PREFIX_6, _S, _W}, {_MOD, _NAME, _RM}, {_DISP_LO}, {_DISP_HI}, {_DATA_W0}, {_DATA_W1}}, - {{_PREFIX_6, _W}, {_DATA_W0}, {_DATA_W1}}, -}; - -typedef struct InstructionParser -{ - enum InstructionIdentifier inst_ids[6][4]; -} InstructionParser; - -// InstructionParser inst_formats[] = -// { -// {{{_PREFIX_2, _NAME, _D, _W}, {_MOD, _REGISTER, _RM}, {_DISP_LO}, {_DISP_HI}}}, -// {{{_PREFIX_6, _S, _W}, {_MOD, _NAME, _RM}, {_DISP_LO}, {_DISP_HI}, {_DATA_W0}, {_DATA_W1}}}, -// }; - -InstFormat inst_formats[] = -{ - //////// - // MOV - //////// - // Register/memory to/from register - {.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .mask_w=0x1, - .has_operands=true, .has_displacement=true, .has_d=true, .has_w=true, - .has_mod=true, .has_rm=true, .parse_reg={.tag = P_REG_MASK, .mask=0b00111000} }, - // Immediate to register/memory - {.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true, - .has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true}, - // Immediate to register - {.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8, - .parse_reg={.tag = P_REG_MASK, .mask=0b00000111}, .has_data=true, .has_w=true}, - // Memory to accumulator | Accumulator to memory using the `d` bit - // even though the manual doesn't specify it - {.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1, .has_data=true, - .has_w=true, .has_d=true, .parse_reg={.tag = P_REG_FIXED, .fixed=0x0}}, - // Register/memory to segment register and inverse using the `d` bit - {.id=5, .name="mov", .inst_enc=0b10001100, .mask_inst=0x3, .has_SR=true, .has_d=true, - .has_displacement=true, .has_mod=true, .has_rm=true}, - //////// - // ADD - //////// - // Reg/memory with register or either - {.id=6, .name="add", .inst_enc=0b00000000, .mask_inst=0x3, .has_displacement=true, - .mask_w=0x1, .has_operands=true, .has_w=true, .has_d=true, .has_mod=true, - .has_rm=true, .parse_reg={.tag = P_REG_MASK, .mask=0b00111000}}, - // Immediate to register/memory - {.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true, - .has_s=true, .has_operands=true, .has_displacement=true, - .has_data=true, .has_mod=true, .has_rm=true}, - {.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1, - .has_data=true, .has_w=true, .parse_reg={.tag = P_REG_FIXED, .fixed=0x0}}, -}; diff --git a/decoder8086.odin b/decoder8086.odin deleted file mode 100644 index 78ecdbe..0000000 --- a/decoder8086.odin +++ /dev/null @@ -1,611 +0,0 @@ -package decoder_8086 - -import "core:os" -import "core:fmt" -import "core:math" -import "core:strings" - -Register :: struct { - fullname: string, - bytename: string, - value: struct #raw_union { - using _: struct { - low, high: byte, - }, - full: u16, - }, - code: u8, -} - -WordSize :: enum { - None, - LastBit, - FourthBit, - Always8, - Always16, -} - -None :: struct {} - -Disp8 :: i8 -Disp16 :: i16 -Displacement :: union { - None, - Disp8, - Disp16 -} - -RegisterId :: distinct u8 -Immediate8 :: distinct i8 -Immediate16 :: distinct i16 -ImmediateU8 :: distinct u8 -MemoryAddr :: struct { - addr_id: u8, - displacement: Displacement, -} -DirectAddress :: distinct i16 -SegmentRegister :: distinct i8 -Jump :: distinct i8 -VariablePort :: struct {} -ShiftRotate :: distinct bool -Repeat :: string -Intersegment :: struct { - ip: i16, - cs: i16, -} -DirectWithinSegment :: distinct u16 - -Operand :: union { - None, - RegisterId, - Immediate8, - ImmediateU8, - Immediate16, - MemoryAddr, - DirectAddress, - SegmentRegister, - Jump, - VariablePort, - ShiftRotate, - Repeat, - DirectWithinSegment, - Intersegment, -} - -OperandInfo :: enum { - None, - Register, - SegmentRegister, - RegisterMemory, - Immediate, - ImmediateUnsigned, - Accumulator, - DirectAddress, - Jump, - VariablePort, - ShiftRotate, - Repeat, - DirectWithinSegment, - Intersegment, -} - -RegisterEncodingBits :: enum { - None, - FirstByteLast3, - SecondByteMiddle3, - SecondByteLast3, - FirstByteMiddle3, -} - -InstructionInfo :: struct { - mask: u8, - encoding: u8, - opname: OpName, - desc: string, - src: OperandInfo, - dst: OperandInfo, - word_size: WordSize, - reg_info: RegisterEncodingBits, - has_flip: bool, - has_sign_extension: bool, - check_second_encoding: bool, - consume_extra_bytes: int, - shift_rotate_flag: bool, -} - -RIGHT_ALIGN_AMOUNT := 35 - -registers := [8]Register { - {fullname = "ax", bytename = "al", code = 0b000}, - {fullname = "cx", bytename = "cl", code = 0b001}, - {fullname = "dx", bytename = "dl", code = 0b010}, - {fullname = "bx", bytename = "bl", code = 0b011}, - {fullname = "sp", bytename = "ah", code = 0b100}, - {fullname = "bp", bytename = "ch", code = 0b101}, - {fullname = "si", bytename = "dh", code = 0b110}, - {fullname = "di", bytename = "bh", code = 0b111}, -} - -segment_registers := [4]Register { - {fullname = "es", code = 0b000}, - {fullname = "cs", code = 0b001}, - {fullname = "ss", code = 0b010}, - {fullname = "ds", code = 0b011}, -} - -variable_port := registers[2] - -total_bytes_processed := 0 - -instruction_builder := strings.builder_make() - -get_i16 :: proc(data: []u8) -> i16 { - return (i16)(data[1]) << 8 | (i16)(data[0]) -} - -operand_is :: proc($T: typeid, opr: Operand) -> bool { - _, ok := opr.(T) - return ok -} - -calculate_effective_address :: proc(r_m: u8) -> string { - val: string - switch r_m { - case 0b000: - val = "bx + si" - case 0b001: - val = "bx + di" - case 0b010: - val = "bp + si" - case 0b011: - val = "bp + di" - case 0b100: - val = "si" - case 0b101: - val = "di" - case 0b110: - val = "bp" - case 0b111: - val = "bx" - } - return val -} - -get_memory_string :: proc(memoryAddr: MemoryAddr, has_segment: Maybe(Register)) -> string { - disp: string - switch value in memoryAddr.displacement { - case None: - disp = "" - case Disp8: - if value != 0 { - disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) - } - case Disp16: - if value != 0 { - disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) - } - } - seg_string: string - if segreg, ok := has_segment.?; ok { - seg_string = fmt.aprintf("%s:", segreg.fullname) - } - text := fmt.aprintf("%s[%s%s]", seg_string, calculate_effective_address(memoryAddr.addr_id), disp) - return text -} - -parse_displacement :: proc(data: []u8) -> (displacement: Displacement, disp_amount: int) { - mod := (data[0] & 0b11000000) >> 6 - disp: Displacement = None{} - amount: int - switch mod { - case 1: - disp = (i8)(data[1]) - amount = 1 - case 2: - disp = get_i16(data[1:]) - amount = 2 - } - return disp, amount -} - -get_displacement_string :: proc(displacement: Displacement) -> string { - disp := "" - #partial switch value in displacement { - case i8: - if value != 0 { - disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) - } - case i16: - if value != 0 { - disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) - } - } - return disp -} - -get_repeat_op :: proc(data: u8) -> Repeat { - bits := (data & 0b1110) >> 1 - w := (data & 0b1) == 1 ? "w" : "b" - rep: string - switch bits { - case 0b010: rep = "movs" - case 0b011: rep = "cmps" - case 0b101: rep = "stos" - case 0b110: rep = "lods" - case 0b111: rep = "scas" - } - return Repeat(fmt.aprintf("%s%s", rep, w)) -} - -try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) { - for inst in instructions { - if inst.encoding == (b & inst.mask) { - return inst, true - } - } - return InstructionInfo{}, false -} - -get_opname :: proc(opname: OpName, data: []u8) -> (string, bool) { - name: string - interseg: bool - if opname == .TBD2 { - switch data[1] & 0b00111000 >> 3 { - case 0b000: name = "inc" - case 0b001: name = "dec" - case 0b010: name = "call" - case 0b011: name = "call"; interseg = true - case 0b100: name = "jmp" - case 0b101: name = "jmp"; interseg = true - case 0b110: name = "push" - } - } else if opname == .TBD5 { - switch data[1] & 0b00111000 >> 3 { - case 0b000: name = "test" - case 0b001: name = "dec" - case 0b010: name = "not" - case 0b011: name = "neg" - case 0b100: name = "mul" - case 0b101: name = "imul" - case 0b110: name = "div" - case 0b111: name = "idiv" - } - } else if opname == .TBD6 { - switch data[1] & 0b00111000 >> 3 { - case 0b000: name = "rol" - case 0b001: name = "ror" - case 0b010: name = "rcl" - case 0b011: name = "rcr" - case 0b100: name = "shl" - case 0b101: name = "shr" - case 0b111: name = "sar" - } - } else { - bits: u8 - if opname == .TBD1 || opname == .TBD3 { - bits = data[0] & 0b00111000 >> 3 - } else { - bits = data[1] & 0b00111000 >> 3 - } - switch bits { - case 0b000: name = "add" - case 0b001: name = "or" - case 0b010: name = "adc" - case 0b011: name = "sbb" - case 0b100: name = "and" - case 0b101: name = "sub" - case 0b110: name = "xor" - case 0b111: name = "cmp" - } - } - return name, interseg -} - -parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, processed: ^int, word: bool, has_segreg: Maybe(Register)) -> Operand { - operand: Operand = None{} - switch opinfo { - case .None: - case .Register: - // rm: u8 = data[1] & 0b111 - // dst_opr = (RegisterId)(registers[rm].code) - reg: u8 - // Read the RegisterEncodingBits - switch inst.reg_info { - case .None: - // panic("Register is required but the encoded location is not provided") - case .FirstByteLast3: - reg = data[0] & 0b111 - case .FirstByteMiddle3: - reg = (data[0] >> 3) & 0b111 - case .SecondByteMiddle3: - reg = (data[1] >> 3) & 0b111 - case .SecondByteLast3: - reg = data[1] & 0b111 - } - operand = (RegisterId)(registers[reg].code) - case .SegmentRegister: - reg: u8 - switch inst.reg_info { - case .None: - // panic("Register is required but the encoded location is not provided") - case .FirstByteLast3: - reg = data[0] & 0b111 - case .FirstByteMiddle3: - reg = (data[0] >> 3) & 0b111 - case .SecondByteMiddle3: - reg = (data[1] >> 3) & 0b111 - case .SecondByteLast3: - reg = data[1] & 0b111 - } - operand = (SegmentRegister)(segment_registers[reg].code) - case .RegisterMemory: - mod := data[1] >> 6 - rm := data[1] & 0b111 - processed^ += 1 - op: Operand - if mod == 0 { - if rm == 0b110 { - op = (DirectAddress)(get_i16(data[2:])) - processed^ += 2 - } else { - op = MemoryAddr{ addr_id = rm , displacement = None{} } - } - } else if mod == 1 { - op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) } - processed^ += 1 - } else if mod == 2 { - op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) } - processed^ += 2 - } else if mod == 3 { - op = (RegisterId)(registers[rm].code) - } - operand = op - case .Immediate: - data_idx := processed^ - word_signed := word - if inst.has_sign_extension { - word_signed &&= data[0] & 0b0000_0010 == 0 - } - operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx])) - processed^ += word_signed ? 2 : 1 - case .ImmediateUnsigned: - operand = (ImmediateU8)(data[processed^]) - processed^ += 1 - case .Accumulator: - operand = (RegisterId)(registers[0].code) - case .DirectAddress: - operand = (DirectAddress)(get_i16(data[1:])) - processed^ += 2 - case .Jump: - processed^ += 1 - // NOTE: In order to mimic the label offset, you have to take the value you got and add two - operand = (Jump)((i8)(data[1]) + 2) - case .VariablePort: - operand = VariablePort{} - case .ShiftRotate: - v_flag := data[0] & 0b10 != 0 - operand = (ShiftRotate)(v_flag) - case .Repeat: - operand = get_repeat_op(data[1]) - processed^ += 1 - case .DirectWithinSegment: - value := (int)(get_i16(data[1:])) + total_bytes_processed + 3 - operand = (DirectWithinSegment)(value) - processed^ += 2 - case .Intersegment: - operand = Intersegment { - ip = get_i16(data[1:]), - cs = get_i16(data[3:]), - } - processed^ += 4 - } - return operand -} - -get_operand_string :: proc(operand: Operand, is_word: bool, has_segment: Maybe(Register)) -> string { - string_val: string - switch val in operand { - case None: - string_val = "" - case RegisterId: - string_val = is_word ? registers[val].fullname : registers[val].bytename - case Immediate8, ImmediateU8, Immediate16, DirectWithinSegment: - string_val = fmt.aprintf("%d", val) - case MemoryAddr: - string_val = get_memory_string(val, has_segment) - case DirectAddress: - seg_string: string - if segreg, ok := has_segment.?; ok { - seg_string = fmt.aprintf("%s:", segreg.fullname) - } - string_val = fmt.aprintf("%s[%d]", seg_string, val) - case SegmentRegister: - string_val = segment_registers[val].fullname - case Jump: - string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val) - case VariablePort: - string_val = variable_port.fullname - case ShiftRotate: - string_val = val ? registers[1].bytename : "1" - case Repeat: - string_val = (string)(val) - case Intersegment: - string_val = fmt.aprintf("%d:%d", val.cs, val.ip) - } - return string_val -} - -main :: proc() { - f,err := os.open(os.args[1]) - if err != os.ERROR_NONE { - fmt.eprintln("ERROR:", err) - os.exit(1) - } - defer os.close(f) - - data := make([]u8, 1024) - bytes_read, err2 := os.read(f, data) - if err2 != nil { - // ... - os.exit(1) - } - - if false { - os.exit(0) - } - - // asdf :u16 = 0b00000110_11011101 - // asdf2 :i16 = (i16)(asdf) - // fmt.printfln("%d", asdf2) - print_at_end := false - idx := 0 - line_count := 0 - has_lock: bool - has_segment: Maybe(Register) - last_opname: [3]byte - repeating_op_count := 0 - instruction_list := make([dynamic]string, 512) - fmt.println("bits 16\n") - for idx < bytes_read { - processed := 1 - curr_byte := data[idx] - - inst, ok := try_find_instruction(curr_byte) - if !ok { - txt := "unknown instruction" - if print_at_end { - line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte) - instruction_list[line_count] = line - line_count += 1 - } else { - fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte) - } - idx += 1 - continue - } - - // Here we check if the instruction affects the next instruction - if inst.opname == .LOCK { - has_lock = true - idx += 1 - continue - } else if inst.opname == .SEGMENT { - reg := (curr_byte & 0b11000) >> 3 - has_segment = segment_registers[reg] - idx += 1 - continue - } else if inst.opname == .AAM { - processed += 1 - } - - src_opr: Operand - dst_opr: Operand - - word: bool - flip: bool - indirect_intersegment: bool - op: Operand - - if inst.has_flip { - flip = curr_byte & 2 != 0 - } - - #partial switch inst.word_size { - case .LastBit: word = curr_byte & 1 == 1 - case .FourthBit: word = curr_byte & 0b0000_1000 != 0 - case .Always16: word = true - } - - opname: string - if inst.check_second_encoding { - opname,indirect_intersegment = get_opname(inst.opname, data[idx:]) - // NOTE: This is a special case because it matches the bit pattern of .TBD5, - // but the instruction itself is different - if opname == "test" && (curr_byte & 0xFF) == 0b11110110 { - inst = test_inst - } - } else { - opname = strings.to_lower(fmt.aprintf("%s", inst.opname)) - } - - dst_opr = parse_operand(inst, inst.dst, data[idx:], &processed, word, has_segment) - src_opr = parse_operand(inst, inst.src, data[idx:], &processed, word, has_segment) - - src_is_imm := operand_is(Immediate8, src_opr) || operand_is(Immediate16, src_opr) - dst_is_bracketed := operand_is(MemoryAddr, dst_opr) || operand_is(DirectAddress, dst_opr) - src_is_bracketed := operand_is(MemoryAddr, src_opr) || operand_is(DirectAddress, src_opr) - shiftrot := inst.src == .ShiftRotate - size_string := "" - if ((src_is_imm && dst_is_bracketed) || (dst_is_bracketed && shiftrot)) || (src_is_bracketed && operand_is(None, dst_opr)) { - size_string = word ? "word " : "byte " - } - - if flip { - src_opr, dst_opr = dst_opr, src_opr - } - - dst_str := get_operand_string(dst_opr, word, has_segment) - src_str := get_operand_string(src_opr, word, has_segment) - full_inst: string - if dst_str == "" { - interseg_string: string - if indirect_intersegment { - interseg_string = " far" - } - full_inst = fmt.aprintf("%s%s %s%s", opname, interseg_string, size_string, src_str) - } else { - // NOTE: I don't know why this is the case, but only the move has the word/byte - // keyword next to the immediate, but other instructions have it on the memory address - if opname == "mov" { - full_inst = fmt.aprintf("%s %s, %s%s", opname, dst_str, size_string, src_str) - } else { - full_inst = fmt.aprintf("%s %s%s, %s", opname, size_string, dst_str, src_str) - } - } - - processed += inst.consume_extra_bytes - - lock_string: string - if has_lock { - lock_string = "lock " - } - fmt.sbprintf(&instruction_builder, "%s%s %*[2]s", lock_string, full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;") - if has_lock { - fmt.sbprintf(&instruction_builder, " lock") - } - if _,ok := has_segment.?; ok { - fmt.sbprintf(&instruction_builder, " segment") - } - for i in 0.. 0 { - fmt.println() - } - repeating_op_count = 0 - } else { - repeating_op_count += 1 - } - copy(last_opname[:], op2[0:3]) - fmt.println(op2) - - idx += processed - strings.builder_reset(&instruction_builder) - has_lock = false - has_segment = nil - total_bytes_processed = idx - } - if print_at_end { - for i in 0.. Operand { + operand: Operand = None{} + switch opinfo { + case .None: + case .Register: + reg: u8 + switch inst.reg_info { + case .None: + panic("Register is required but the encoded location is not provided") + case .FirstByteLast3: + reg = data[0] & 0b111 + case .FirstByteMiddle3: + reg = (data[0] >> 3) & 0b111 + case .SecondByteMiddle3: + reg = (data[1] >> 3) & 0b111 + case .SecondByteLast3: + reg = data[1] & 0b111 + } + operand = (RegisterId)(registers[reg].code) + case .SegmentRegister: + reg: u8 + switch inst.reg_info { + case .None: + panic("Register is required but the encoded location is not provided") + case .FirstByteLast3: + reg = data[0] & 0b111 + case .FirstByteMiddle3: + reg = (data[0] >> 3) & 0b111 + case .SecondByteMiddle3: + reg = (data[1] >> 3) & 0b111 + case .SecondByteLast3: + reg = data[1] & 0b111 + } + operand = (SegmentRegister)(segment_registers[reg].code) + case .RegisterMemory: + mod := data[1] >> 6 + rm := data[1] & 0b111 + processed^ += 1 + op: Operand + if mod == 0 { + if rm == 0b110 { + op = (DirectAddress)(get_i16(data[2:])) + processed^ += 2 + } else { + op = MemoryAddr{ addr_id = rm , displacement = None{} } + } + } else if mod == 1 { + op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) } + processed^ += 1 + } else if mod == 2 { + op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) } + processed^ += 2 + } else if mod == 3 { + op = (RegisterId)(registers[rm].code) + } + operand = op + case .Immediate: + data_idx := processed^ + word_signed := word + if inst.has_sign_extension { + word_signed &&= data[0] & 0b0000_0010 == 0 + } + operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx])) + processed^ += word_signed ? 2 : 1 + case .ImmediateUnsigned: + operand = (ImmediateU8)(data[processed^]) + processed^ += 1 + case .Accumulator: + operand = (RegisterId)(registers[0].code) + case .DirectAddress: + operand = (DirectAddress)(get_i16(data[1:])) + processed^ += 2 + case .Jump: + processed^ += 1 + // NOTE: In order to mimic the label offset, you have to take the value you got and add two + operand = (Jump)((i8)(data[1]) + 2) + case .VariablePort: + operand = VariablePort{} + case .ShiftRotate: + v_flag := data[0] & 0b10 != 0 + operand = (ShiftRotate)(v_flag) + case .Repeat: + operand = get_repeat_op(data[1]) + processed^ += 1 + case .DirectWithinSegment: + value := (int)(get_i16(data[1:])) + total_bytes_processed + 3 + operand = (DirectWithinSegment)(value) + processed^ += 2 + case .Intersegment: + operand = Intersegment { + ip = get_i16(data[1:]), + cs = get_i16(data[3:]), + } + processed^ += 4 + } + return operand +} + +decode_data :: proc(inst_list: ^[dynamic]Instruction, data: []u8, bytes_to_read: int) { + idx := 0 + has_segment: Maybe(Register) + has_lock: bool + for idx < bytes_to_read { + instruction: Instruction + processed := 1 + curr_byte := data[idx] + + inst, ok := try_find_instruction(curr_byte) + if !ok { + instruction = { + opname = .UNKNOWN, + bytes_read = 1, + raw_data = data[idx:idx+1], + } + append(inst_list, instruction) + idx += 1 + continue + } + + // Here we check if the instruction affects the next instruction + if inst.opname == .LOCK { + has_lock = true + idx += 1 + continue + } else if inst.opname == .SEGMENT { + reg := (curr_byte & 0b11000) >> 3 + has_segment = segment_registers[reg] + idx += 1 + continue + } else if inst.opname == .AAM { + processed += 1 + } + + debug_str: string + // NOTE: This is a special case because it matches the bit pattern of .TBD5, + // but the instruction itself is different + if inst.opname == .TBD5 && (data[idx] & 0xFF) == 0b11110110 && (data[idx+1] & 0b00111000) == 0 { + inst = test_inst + } + + src_opr: Operand + dst_opr: Operand + + word: bool + flip: bool + indirect_intersegment: bool + op: Operand + + if inst.has_flip { + flip = curr_byte & 2 != 0 + } + + #partial switch inst.word_size { + case .LastBit: word = curr_byte & 1 == 1 + case .FourthBit: word = curr_byte & 0b0000_1000 != 0 + case .Always16: word = true + } + + dst_opr = parse_operand(inst, inst.dst, data[idx:], &processed, word, has_segment) + src_opr = parse_operand(inst, inst.src, data[idx:], &processed, word, has_segment) + + if flip { + src_opr, dst_opr = dst_opr, src_opr + } + + processed += inst.consume_extra_bytes + + instruction.opname = inst.opname + instruction.src = src_opr + instruction.dst = dst_opr + instruction.is_word = word + instruction.bytes_read = processed + instruction.raw_data = data[idx:idx+processed] + instruction.debug_msg = debug_str + instruction.info = inst + instruction.has_lock = has_lock + instruction.has_segment = has_segment + + // fmt.println(parsed_inst) + append(inst_list, instruction) + + idx += processed + + has_lock = false + has_segment = nil + total_bytes_processed = idx + } +} diff --git a/instructions.odin b/instructions.odin index f556adf..2dac6cf 100644 --- a/instructions.odin +++ b/instructions.odin @@ -1,6 +1,7 @@ -package decoder_8086 +package sim_8086 -OpName :: enum { +Op :: enum { + UNKNOWN, TBD1, TBD2, TBD3, @@ -100,8 +101,12 @@ OpName :: enum { // isn't that great; we return a string with the instruction name, but ideally we have all // the instructions accounted for, because eventually we will need the final parsed // instruction to contain all the information related to it +// test_inst := InstructionInfo { +// opname = .NOT, desc = "", mask = 0b11111110, encoding = 0b11110110, +// dst = .RegisterMemory, src = .Immediate, word_size = .LastBit +// } test_inst := InstructionInfo { - opname = .NOT, desc = "", mask = 0b11111110, encoding = 0b11110110, + opname = .TEST, desc = "", mask = 0b11111110, encoding = 0b11110110, dst = .RegisterMemory, src = .Immediate, word_size = .LastBit } diff --git a/lib.h b/lib.h deleted file mode 100644 index 40909ca..0000000 --- a/lib.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once -#include -#include - -typedef uint8_t u8; -typedef uint16_t u16; -typedef int16_t i16; -typedef int32_t i32; -typedef uint32_t u32; -typedef uint64_t u64; -typedef float f32; -typedef double f64; -typedef uintptr_t uptr; -typedef char sbyte; -typedef ptrdiff_t size; -typedef size_t usize; - -enum OptionTag {NONE, SOME}; - -#define OPTION(type) \ - typedef struct \ - { \ - enum OptionTag tag; \ - union { \ - char none; \ - type value; \ - }; \ - } type##_opt; \ - \ - static inline type##_opt none_##type(void) \ - { \ - return (type##_opt){ .tag = NONE, .none = 0 }; \ - } \ - \ - static inline type##_opt some_##type(type value) \ - { \ - return (type##_opt){ .tag = SOME, .value = value }; \ - } \ - \ - static inline int get_some_##type(type##_opt opt, type* out_value) \ - { \ - if (opt.tag != SOME) return 0; \ - *out_value = opt.value; \ - return 1; \ - } - -#define IF_LET_SOME(type, var, opt) \ - type var; \ - if (get_some_##type(opt, &var)) - - -OPTION(u8) -OPTION(u16) diff --git a/printing.odin b/printing.odin new file mode 100644 index 0000000..667cd2b --- /dev/null +++ b/printing.odin @@ -0,0 +1,242 @@ +package sim_8086 + +import "core:fmt" +import "core:math" +import "core:strings" + +instruction_builder := strings.builder_make() + +calculate_effective_address :: proc(r_m: u8) -> string { + val: string + switch r_m { + case 0b000: + val = "bx + si" + case 0b001: + val = "bx + di" + case 0b010: + val = "bp + si" + case 0b011: + val = "bp + di" + case 0b100: + val = "si" + case 0b101: + val = "di" + case 0b110: + val = "bp" + case 0b111: + val = "bx" + } + return val +} + +get_memory_string :: proc(memoryAddr: MemoryAddr, has_segment: Maybe(Register)) -> string { + disp: string + switch value in memoryAddr.displacement { + case None: + disp = "" + case Disp8: + if value != 0 { + disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) + } + case Disp16: + if value != 0 { + disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) + } + } + seg_string: string + if segreg, ok := has_segment.?; ok { + seg_string = fmt.aprintf("%s:", segreg.fullname) + } + text := fmt.aprintf("%s[%s%s]", seg_string, calculate_effective_address(memoryAddr.addr_id), disp) + return text +} + +get_displacement_string :: proc(displacement: Displacement) -> string { + disp := "" + #partial switch value in displacement { + case i8: + if value != 0 { + disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) + } + case i16: + if value != 0 { + disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) + } + } + return disp +} + +get_opname :: proc(inst: Instruction) -> (string, bool) { + name: string + interseg: bool + if inst.opname == .TBD2 { + switch inst.raw_data[1] & 0b00111000 >> 3 { + case 0b000: name = "inc" + case 0b001: name = "dec" + case 0b010: name = "call" + // TODO: We really have to fix this because we shouldn't be figuring out if this + // is an intersegment here + case 0b011: name = "call"; interseg = true + case 0b100: name = "jmp" + case 0b101: name = "jmp"; interseg = true + case 0b110: name = "push" + } + } else if inst.opname == .TBD5 { + switch inst.raw_data[1] & 0b00111000 >> 3 { + case 0b000: name = "test" + case 0b001: name = "dec" + case 0b010: name = "not" + case 0b011: name = "neg" + case 0b100: name = "mul" + case 0b101: name = "imul" + case 0b110: name = "div" + case 0b111: name = "idiv" + } + } else if inst.opname == .TBD6 { + switch inst.raw_data[1] & 0b00111000 >> 3 { + case 0b000: name = "rol" + case 0b001: name = "ror" + case 0b010: name = "rcl" + case 0b011: name = "rcr" + case 0b100: name = "shl" + case 0b101: name = "shr" + case 0b111: name = "sar" + } + } else { + bits: u8 + if inst.opname == .TBD1 || inst.opname == .TBD3 { + bits = inst.raw_data[0] & 0b00111000 >> 3 + } else { + bits = inst.raw_data[1] & 0b00111000 >> 3 + } + switch bits { + case 0b000: name = "add" + case 0b001: name = "or" + case 0b010: name = "adc" + case 0b011: name = "sbb" + case 0b100: name = "and" + case 0b101: name = "sub" + case 0b110: name = "xor" + case 0b111: name = "cmp" + } + } + return name, interseg +} + +get_operand_string :: proc(operand: Operand, is_word: bool, has_segment: Maybe(Register)) -> string { + string_val: string + switch val in operand { + case None: + string_val = "" + case RegisterId: + string_val = is_word ? registers[val].fullname : registers[val].bytename + case Immediate8, ImmediateU8, Immediate16, DirectWithinSegment: + string_val = fmt.aprintf("%d", val) + case MemoryAddr: + string_val = get_memory_string(val, has_segment) + case DirectAddress: + seg_string: string + if segreg, ok := has_segment.?; ok { + seg_string = fmt.aprintf("%s:", segreg.fullname) + } + string_val = fmt.aprintf("%s[%d]", seg_string, val) + case SegmentRegister: + string_val = segment_registers[val].fullname + case Jump: + string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val) + case VariablePort: + string_val = variable_port.fullname + case ShiftRotate: + string_val = val ? registers[1].bytename : "1" + case Repeat: + string_val = (string)(val) + case Intersegment: + string_val = fmt.aprintf("%d:%d", val.cs, val.ip) + } + return string_val +} + +get_unknown_inst_string :: proc(inst: Instruction) -> string { + print_at_end := false + txt := "unknown instruction" + line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", inst.raw_data[0]) + return line +} + +get_instruction_string :: proc(inst_info: InstructionInfo, instruction: Instruction) { + inst := instruction + src_is_imm := operand_is(Immediate8, inst.src) || operand_is(Immediate16, inst.src) + dst_is_bracketed := operand_is(MemoryAddr, inst.dst) || operand_is(DirectAddress, inst.dst) + src_is_bracketed := operand_is(MemoryAddr, inst.src) || operand_is(DirectAddress, inst.src) + shiftrot := operand_is(ShiftRotate, inst.src) + size_string := "" + if ((src_is_imm && dst_is_bracketed) || (dst_is_bracketed && shiftrot)) || (src_is_bracketed && operand_is(None, inst.dst)) { + size_string = inst.is_word ? "word " : "byte " + } + + if inst.has_lock { + fmt.sbprint(&instruction_builder, "lock ") + } + + dst_str := get_operand_string(inst.dst, inst.is_word, inst.has_segment) + src_str := get_operand_string(inst.src, inst.is_word, inst.has_segment) + opname: string + is_interseg: bool + if inst_info.check_second_encoding { + opname,is_interseg = get_opname(inst) + } else { + // TODO: Do the RTTI thing here with reflection + opname = strings.to_lower(fmt.aprintf("%s", inst.opname)) + } + + if dst_str == "" { + interseg_string: string + if is_interseg { + interseg_string = " far" + } + fmt.sbprintf(&instruction_builder, "%s%s %s%s", opname, interseg_string, size_string, src_str) + } else { + // note: i don't know why this is the case, but only the move has the word/byte + // keyword next to the immediate, but other instructions have it on the memory address + if opname == "mov" { + fmt.sbprintf(&instruction_builder, "%s %s, %s%s", opname, dst_str, size_string, src_str) + } else { + fmt.sbprintf(&instruction_builder, "%s %s%s, %s", opname, size_string, dst_str, src_str) + } + } + + // Prepare padding and comment to add debug info + b_len := strings.builder_len(instruction_builder) + fmt.sbprintf(&instruction_builder, "%*[0]s", RIGHT_ALIGN_AMOUNT - b_len, ";;") + + if inst.has_lock { + fmt.sbprintf(&instruction_builder, " lock") + } + if _,ok := inst.has_segment.?; ok { + fmt.sbprintf(&instruction_builder, " segment") + } + for i in 0.. 0 { + fmt.println() + } + repeating_op_count = 0 + } else { + repeating_op_count += 1 + } + copy(last_opname[:], op2[0:3]) + fmt.println(op2) + } +} diff --git a/sim8086.odin b/sim8086.odin new file mode 100644 index 0000000..49bddfe --- /dev/null +++ b/sim8086.odin @@ -0,0 +1,98 @@ +package sim_8086 + +import "core:os" +import "core:fmt" +import "core:math" +import "core:strings" + +RIGHT_ALIGN_AMOUNT := 35 + +registers := [8]Register { + {fullname = "ax", bytename = "al", code = 0b000}, + {fullname = "cx", bytename = "cl", code = 0b001}, + {fullname = "dx", bytename = "dl", code = 0b010}, + {fullname = "bx", bytename = "bl", code = 0b011}, + {fullname = "sp", bytename = "ah", code = 0b100}, + {fullname = "bp", bytename = "ch", code = 0b101}, + {fullname = "si", bytename = "dh", code = 0b110}, + {fullname = "di", bytename = "bh", code = 0b111}, +} + +segment_registers := [4]Register { + {fullname = "es", code = 0b000}, + {fullname = "cs", code = 0b001}, + {fullname = "ss", code = 0b010}, + {fullname = "ds", code = 0b011}, +} + +variable_port := registers[2] + +total_bytes_processed := 0 + +get_i16 :: proc(data: []u8) -> i16 { + return (i16)(data[1]) << 8 | (i16)(data[0]) +} + +operand_is :: proc($T: typeid, opr: Operand) -> bool { + _, ok := opr.(T) + return ok +} + +get_repeat_op :: proc(data: u8) -> Repeat { + bits := (data & 0b1110) >> 1 + w := (data & 0b1) == 1 ? "w" : "b" + rep: string + switch bits { + case 0b010: rep = "movs" + case 0b011: rep = "cmps" + case 0b101: rep = "stos" + case 0b110: rep = "lods" + case 0b111: rep = "scas" + } + return Repeat(fmt.aprintf("%s%s", rep, w)) +} + +try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) { + for inst in instructions { + if inst.encoding == (b & inst.mask) { + return inst, true + } + } + return InstructionInfo{}, false +} + +main :: proc() { + f,err := os.open(os.args[1]) + if err != os.ERROR_NONE { + fmt.eprintln("ERROR:", err) + os.exit(1) + } + defer os.close(f) + + data := make([]u8, 1024) + bytes_read, err2 := os.read(f, data) + if err2 != nil { + // ... + os.exit(1) + } + + if false { + os.exit(0) + } + + // asdf :u16 = 0b00000110_11011101 + // asdf2 :i16 = (i16)(asdf) + // fmt.printfln("%d", asdf2) + print_at_end := false + line_count := 0 + instruction_list := make([dynamic]string, 0, 512) + instructions_list := make([dynamic]Instruction, 0, 512) + + decode_data(&instructions_list, data[:], bytes_read) + // for inst in instructions_list { + // fmt.println(inst) + // } + if true { + print_instructions_stdout(instructions_list[:]) + } +} diff --git a/test_asm.sh b/test_asm.sh index 042bf78..25ba9df 100755 --- a/test_asm.sh +++ b/test_asm.sh @@ -6,9 +6,14 @@ NC='\033[0m' make asm_files > /dev/null +if [ ! "$(command -v ./sim8086)" ]; then + echo -e "\nError: 'sim8086' executable not found" + exit 1 +fi + for ASM_BIN in asm_files/*.bin; do - ./decoder8086 "$ASM_BIN" > output.asm 2> /dev/null + ./sim8086 "$ASM_BIN" > output.asm 2> /dev/null nasm output.asm -o output.bin 2> /dev/null ASM_FILE=${ASM_BIN%.*}.asm if [ ! -e output.bin ]; then diff --git a/types.odin b/types.odin new file mode 100644 index 0000000..b1f2f81 --- /dev/null +++ b/types.odin @@ -0,0 +1,123 @@ +package sim_8086 + +Register :: struct { + fullname: string, + bytename: string, + value: struct #raw_union { + using _: struct { + low, high: byte, + }, + full: u16, + }, + code: u8, +} + +WordSize :: enum { + None, + LastBit, + FourthBit, + Always8, + Always16, +} + +None :: struct {} + +Disp8 :: i8 +Disp16 :: i16 +Displacement :: union { + None, + Disp8, + Disp16 +} + +RegisterId :: distinct u8 +Immediate8 :: distinct i8 +Immediate16 :: distinct i16 +ImmediateU8 :: distinct u8 +MemoryAddr :: struct { + addr_id: u8, + displacement: Displacement, +} +DirectAddress :: distinct i16 +SegmentRegister :: distinct i8 +Jump :: distinct i8 +VariablePort :: struct {} +ShiftRotate :: distinct bool +Repeat :: string +Intersegment :: struct { + ip: i16, + cs: i16, +} +DirectWithinSegment :: distinct u16 + +Operand :: union { + None, + RegisterId, + Immediate8, + ImmediateU8, + Immediate16, + MemoryAddr, + DirectAddress, + SegmentRegister, + Jump, + VariablePort, + ShiftRotate, + Repeat, + DirectWithinSegment, + Intersegment, +} + +OperandInfo :: enum { + None, + Register, + SegmentRegister, + RegisterMemory, + Immediate, + ImmediateUnsigned, + Accumulator, + DirectAddress, + Jump, + VariablePort, + ShiftRotate, + Repeat, + DirectWithinSegment, + Intersegment, +} + +RegisterEncodingBits :: enum { + None, + FirstByteLast3, + SecondByteMiddle3, + SecondByteLast3, + FirstByteMiddle3, +} + +InstructionInfo :: struct { + mask: u8, + encoding: u8, + opname: Op, + desc: string, + src: OperandInfo, + dst: OperandInfo, + word_size: WordSize, + reg_info: RegisterEncodingBits, + has_flip: bool, + has_sign_extension: bool, + check_second_encoding: bool, + consume_extra_bytes: int, + shift_rotate_flag: bool, +} + +Instruction :: struct { + opname: Op, + src: Operand, + dst: Operand, + info: InstructionInfo, + is_word: bool, + indirect_intersegment: bool, + has_segment: Maybe(Register), + has_lock: bool, + bytes_read: int, + raw_data: []u8, + debug_msg: string, +}