From 32ddb518e969722a2e910f10d980c72d5f5d652c Mon Sep 17 00:00:00 2001 From: Joseph Ferano Date: Sat, 15 Mar 2025 17:28:51 +0700 Subject: [PATCH] Break code up into files, delete C code, organize things better --- .gitignore | 1 + LICENSE_CE => asm_files/LICENSE_CE | 0 decode.c | 431 -------------------- decode.h | 223 ----------- decoder8086.odin | 611 ----------------------------- decoding.odin | 194 +++++++++ instructions.odin | 11 +- lib.h | 53 --- printing.odin | 242 ++++++++++++ sim8086.odin | 98 +++++ test_asm.sh | 7 +- types.odin | 123 ++++++ 12 files changed, 672 insertions(+), 1322 deletions(-) rename LICENSE_CE => asm_files/LICENSE_CE (100%) delete mode 100644 decode.c delete mode 100644 decode.h delete mode 100644 decoder8086.odin create mode 100644 decoding.odin delete mode 100644 lib.h create mode 100644 printing.odin create mode 100644 sim8086.odin create mode 100644 types.odin diff --git a/.gitignore b/.gitignore index 46c75fa..2bef33f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ /8086_family_Users_Manual_1_.pdf /decoder8086 /performance-aware +/sim8086 diff --git a/LICENSE_CE b/asm_files/LICENSE_CE similarity index 100% rename from LICENSE_CE rename to asm_files/LICENSE_CE diff --git a/decode.c b/decode.c deleted file mode 100644 index 469573c..0000000 --- a/decode.c +++ /dev/null @@ -1,431 +0,0 @@ -#include -#include -#include -#include -#include -#include "lib.h" -#include "decode.h" - -/// Get Effective Address Calculation Registers -char* get_eac_register(char rm) -{ - char* reg_name; - switch (rm) - { - case 0b000: reg_name = "bx + si"; break; - case 0b001: reg_name = "bx + di"; break; - case 0b010: reg_name = "bp + si"; break; - case 0b011: reg_name = "bp + di"; break; - case 0b100: reg_name = "si"; break; - case 0b101: reg_name = "di"; break; - case 0b110: reg_name = "bp"; break; - case 0b111: reg_name = "bx"; break; - default: perror("Invalid R/M value"); exit(1); - } - return reg_name; -} - -static char* reg_name(Register reg, char wide) -{ - return wide == 1 ? reg.fullname : reg.bytename; -} - -static u8 mask_and_shift(u8 value, u8 mask) -{ - value &= mask; - int count = 0; - while ((mask & 0x1) == 0 && count < 8) - { - value >>= 1; - mask >>= 1; - count++; - } - return value; -} - -ParsedInstruction parse_instruction_ids(u8* buf) -{ - u8 inst = buf[0]; - InstFormat fmt = {0}; - bool matched_inst = false; - // TODO: This might be a good time to learn how to make a hashtable in C - for (u16 i = 0; i < sizeof(inst_funcs) / (sizeof(inst_parser_f)*6*4); i++) - for (int j = 0; j < 6; j++) - for (int k = 0; k < 4 && inst_funcs[i][j][k] != NULL; k++) - { - printf("%p\n", (void*)(uptr)inst_funcs[i][j][k]); - // Apply inst_func_t - } - // for (int j = 0; j < 4 || ;) - return (ParsedInstruction){0}; - if (!matched_inst) - return (ParsedInstruction){.bytes_read = 0}; - u8_opt d_opt = none_u8(); - u8_opt s_opt = none_u8(); - u8_opt w_opt = none_u8(); - u8_opt reg_opt = none_u8(); - u8_opt mod_opt = none_u8(); - u8_opt rm_opt = none_u8(); - u16_opt data_opt = none_u16(); - u16_opt displacement_opt = none_u16(); - u8 is_data_addr = false; - - u16 bytes_read = 1; - bytes_read += fmt.has_operands ? 1 : 0; - - if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1); - if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1); - if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7); - if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6); - if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w)); - if (fmt.parse_reg.tag == P_REG_MASK) - { - u8 reg = fmt.has_operands ? buf[1] : buf[0]; - reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask)); - } - else if (fmt.parse_reg.tag == P_REG_FIXED) - { - reg_opt = some_u8(fmt.parse_reg.fixed); - is_data_addr = true; - } - if (fmt.has_data) - { - u8 idx = 1; - if (fmt.has_operands) idx += 1; - // This is a trick because mod == 1 and mod == 2 will displace one and two bytes - // respectively but mod == 3 wraps to 0 since it doesn't displace - if (fmt.has_displacement) idx += mod_opt.value % 3; - u16 data; - if (fmt.has_s && s_opt.value == 1) - { - data = (sbyte)buf[idx]; - bytes_read += 1; - } - else - { - data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx]; - bytes_read += w_opt.value == 0 ? 1 : 2; - } - data_opt = some_u16(data); - } - if (fmt.has_displacement && mod_opt.value % 3 > 0) - { - u16 disp = mod_opt.value == MODE_MEM_DIS_16 - ? (i16)buf[3] << 8 | buf[2] - : (sbyte)buf[2]; - displacement_opt = some_u16(disp); - bytes_read += mod_opt.value % 3; - } - else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6) - { - displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]); - bytes_read += 2; - } - - return (ParsedInstruction) { - .id = fmt.id, - .name = fmt.name, - .data = data_opt, - .displacement = displacement_opt, - .w = w_opt, - .d = d_opt, - .s = s_opt, - .mod = mod_opt, - .reg = reg_opt, - .rm = rm_opt, - .is_data_addr = is_data_addr, - .bytes_read = bytes_read, - }; -} -ParsedInstruction parse_instruction(u8* buf) -{ - u8 inst = buf[0]; - InstFormat fmt = {0}; - bool matched_inst = false; - // TODO: This might be a good time to learn how to make a hashtable in C - for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++) - { - if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc) - { - fmt = inst_formats[i]; - matched_inst = true; - break; - } - } - if (!matched_inst) - return (ParsedInstruction){.bytes_read = 0}; - u8_opt d_opt = none_u8(); - u8_opt s_opt = none_u8(); - u8_opt w_opt = none_u8(); - u8_opt reg_opt = none_u8(); - u8_opt mod_opt = none_u8(); - u8_opt rm_opt = none_u8(); - u16_opt data_opt = none_u16(); - u16_opt displacement_opt = none_u16(); - u8 is_data_addr = false; - - u16 bytes_read = 1; - bytes_read += fmt.has_operands ? 1 : 0; - - if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1); - if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1); - if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7); - if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6); - if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w)); - if (fmt.parse_reg.tag == P_REG_MASK) - { - u8 reg = fmt.has_operands ? buf[1] : buf[0]; - reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask)); - } - else if (fmt.parse_reg.tag == P_REG_FIXED) - { - reg_opt = some_u8(fmt.parse_reg.fixed); - is_data_addr = true; - } - if (fmt.has_data) - { - u8 idx = 1; - if (fmt.has_operands) idx += 1; - // This is a trick because mod == 1 and mod == 2 will displace one and two bytes - // respectively but mod == 3 wraps to 0 since it doesn't displace - if (fmt.has_displacement) idx += mod_opt.value % 3; - u16 data; - if (fmt.has_s && s_opt.value == 1) - { - data = (sbyte)buf[idx]; - bytes_read += 1; - } - else - { - data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx]; - bytes_read += w_opt.value == 0 ? 1 : 2; - } - data_opt = some_u16(data); - } - if (fmt.has_displacement && mod_opt.value % 3 > 0) - { - u16 disp = mod_opt.value == MODE_MEM_DIS_16 - ? (i16)buf[3] << 8 | buf[2] - : (sbyte)buf[2]; - displacement_opt = some_u16(disp); - bytes_read += mod_opt.value % 3; - } - else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6) - { - displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]); - bytes_read += 2; - } - - return (ParsedInstruction) { - .id = fmt.id, - .name = fmt.name, - .data = data_opt, - .displacement = displacement_opt, - .w = w_opt, - .d = d_opt, - .s = s_opt, - .mod = mod_opt, - .reg = reg_opt, - .rm = rm_opt, - .is_data_addr = is_data_addr, - .bytes_read = bytes_read, - }; -} - -Instruction decode_instruction(ParsedInstruction inst) -{ - Operand opr1 , opr2 = {0}; - i16 payload = 0; - - IF_LET_SOME(u8, mod, inst.mod) - { - IF_LET_SOME(u8, reg, inst.reg) - { - opr1.tag = OPR_T_REGISTER; - opr1.reg.value = registers[(size_t)reg]; - opr1.reg.wide = inst.w.value; - } - else - { - opr1.tag = OPR_T_IMMEDIATE; - opr1.imm.value = inst.data.value; - // TODO: This is dumb, we shouldn't do it this way - if (inst.s.value == 1) opr1.imm.direct = 0; - else opr1.imm.direct = inst.w.value + 1; - } - if (mod == MODE_RGSTR_MODE) - { - opr2.tag = OPR_T_REGISTER; - opr2.reg.value = registers[(size_t)inst.rm.value]; - opr2.reg.wide = inst.w.value; - } - else if (mod == MODE_MEM_NO_DIS && inst.rm.value == 0x6) - { - opr2.tag = OPR_T_DIRADDR; - opr2.dir_addr.value = inst.displacement.value; - } - else - { - opr2.tag = OPR_T_MEMORY; - opr2.mem.eac_name = get_eac_register(inst.rm.value); - opr2.mem.mode = mod; - opr2.mem.displacement = (i16)inst.displacement.value; - } - } - else - { - IF_LET_SOME(u16, data, inst.data) - { - if (inst.is_data_addr) - { - opr1.tag = OPR_T_DIRADDR; - opr1.dir_addr.value = (i16)data; - } - else - { - opr1.tag = OPR_T_IMMEDIATE; - opr1.imm.value = (i16)data; - opr1.imm.direct = 0; - } - } - IF_LET_SOME(u8, reg, inst.reg) - { - opr2.tag = OPR_T_REGISTER; - opr2.reg.value = registers[(size_t)reg]; - opr2.reg.wide = inst.w.value; - } - } - if (inst.d.tag == SOME && inst.d.value == 1) - { - Operand temp = opr1; - opr1 = opr2; - opr2 = temp; - } - return (Instruction) { - .id = inst.id, - .data = payload, - .operation = inst.name, - .src_opr = opr1, - .dst_opr = opr2, - }; -} - -void get_operand_string(char* str_buf, Operand oprnd) -{ - if (oprnd.tag == OPR_T_REGISTER) - { - strcpy(str_buf, reg_name(oprnd.reg.value, oprnd.reg.wide)); - } - else if (oprnd.tag == OPR_T_MEMORY) - { - char disp_str[16] = {'\0'}; - i16 disp = oprnd.mem.displacement; - if (disp != 0) - sprintf(disp_str, " %s %d", (disp > 0 ? "+" : "-"), abs(disp)); - sprintf(str_buf, "[%s%s]", oprnd.mem.eac_name, disp_str); - } - else if (oprnd.tag == OPR_T_IMMEDIATE) - { - char *size = ""; - if (oprnd.imm.direct > 0) - size = oprnd.imm.direct == 1 ? "byte " : "word "; - sprintf(str_buf, "%s%d", size, oprnd.imm.value); - } - else if (oprnd.tag == OPR_T_DIRADDR) - { - sprintf(str_buf, "[%d]", oprnd.dir_addr.value); - } -} - -void get_instr_string(char* str_buf, Instruction inst) -{ - char src_str[32], dst_str[32]; - get_operand_string(src_str, inst.src_opr); - get_operand_string(dst_str, inst.dst_opr); - sprintf(str_buf, "%s %s, %s", inst.operation, dst_str, src_str); -} - -char *memory[65536]; -// Keep this global for debugging purposes -u16 inst_count = 1; - -int main(int argc, char** argv) -{ - if (argc < 2) - { - printf("Usage: Please provide assembled instructions as input\n"); - exit(0); - } - - struct stat st; - if (stat(argv[1], &st) == -1) - { - perror("Unable to get file size\n"); - return EXIT_FAILURE; - } - - unsigned char* buffer = malloc(st.st_size); - if (!buffer) - { - perror("Unable to allocate memory for binary file"); - return EXIT_FAILURE; - } - - FILE *f = fopen(argv[1], "r"); - if (!f) - { - perror("fopen\n"); - free(buffer); - return EXIT_FAILURE; - } - - size_t bytes_read = fread(buffer, sizeof(unsigned char), st.st_size, f); - if (bytes_read != (size_t)st.st_size) - { - fprintf(stderr, "Read of binary file to memory incomplete.\n"); - free(buffer); - fclose(f); - return EXIT_FAILURE; - } - - fclose(f); - - printf("; Decoded 8086 Assembly Instructions\n\n"); - printf("bits 16\n\n"); - - char *inst_str_buf = malloc(sizeof(char) * 256); - u32 bytes_processed = 0; - while (bytes_processed < bytes_read) - { - ParsedInstruction _ = parse_instruction_ids(buffer + bytes_processed); - (void)_; - ParsedInstruction parsed = parse_instruction(buffer + bytes_processed); - - if (parsed.bytes_read > 0) - { - Instruction inst = decode_instruction(parsed); - get_instr_string(inst_str_buf, inst); - bytes_processed += parsed.bytes_read; - // printf("%s %d/%ld", inst_str_buf, bytes_processed, bytes_read); - printf("%s", inst_str_buf); - int len = strlen(inst_str_buf); - for (int i = 0; i < 32 - len; i++) - printf(" "); - printf("; %d, %d", inst_count++, inst.id); - } - else - { - bytes_processed += 1; - fprintf(stderr, "___Unrecognized Instruction___"); - } - // char inst = buffer[0]; - // if (mov_inst(f, buffer, inst)) goto handled; - // if (add_inst(f, buffer, inst)) goto handled; - // handled: - printf("\n"); - } - free(inst_str_buf); - free(buffer); - - return 0; -} diff --git a/decode.h b/decode.h deleted file mode 100644 index a21b2d2..0000000 --- a/decode.h +++ /dev/null @@ -1,223 +0,0 @@ -#include "lib.h" - -enum Mode -{ - MODE_MEM_NO_DIS = 0b00, - MODE_MEM_DIS_08 = 0b01, - MODE_MEM_DIS_16 = 0b10, - MODE_RGSTR_MODE = 0b11, -}; - -typedef struct Register -{ - char* fullname; - char* bytename; - union { - struct { - char low; - char high; - }; - u16 full; - } value; - u8 code; -} Register; - -Register registers[8] = { - {.code = 0b000, .fullname = "ax", .bytename = "al"}, - {.code = 0b001, .fullname = "cx", .bytename = "cl"}, - {.code = 0b010, .fullname = "dx", .bytename = "dl"}, - {.code = 0b011, .fullname = "bx", .bytename = "bl"}, - {.code = 0b100, .fullname = "sp", .bytename = "ah"}, - {.code = 0b101, .fullname = "bp", .bytename = "ch"}, - {.code = 0b110, .fullname = "si", .bytename = "dh"}, - {.code = 0b111, .fullname = "di", .bytename = "bh"}, -}; - -enum OperandType {OPR_T_MEMORY, OPR_T_REGISTER, OPR_T_IMMEDIATE, OPR_T_DIRADDR}; - -typedef struct Operand -{ - enum OperandType tag; - union { - struct Mem { - char *eac_name; - i16 displacement; - u8 mode; - } mem; - struct Reg { - Register value; - bool wide; - } reg; - struct Imm { - i16 value; - u8 direct; - } imm; - struct DirAddr { - i16 value; - } dir_addr; - }; -} Operand; - -enum ParseRegType { P_REG_NONE, P_REG_MASK, P_REG_FIXED }; -typedef struct ParseReg -{ - enum ParseRegType tag; - union { - u8 none; - u8 mask; - u8 fixed; - }; -} ParseReg; - -typedef struct InstFormat -{ - u16 id; - char *name; - ParseReg parse_reg; - u8 inst_enc; - u8 mask_inst; - u8 mask_w; - bool has_operands; - bool has_displacement; - bool has_data; - bool has_d; - bool has_w; - bool has_mod; - bool has_rm; - bool has_s; - bool has_SR; -} InstFormat; - -typedef struct ParsedInstruction -{ - u16 id; - char *name; - u16_opt data; - u16_opt displacement; - u8_opt w; - u8_opt d; - u8_opt s; - u8_opt mod; - u8_opt reg; - u8_opt rm; - u8_opt SR; - u8 is_data_addr; - u8 bytes_read; -} ParsedInstruction; - -typedef struct Instruction -{ - Operand src_opr; - Operand dst_opr; - i16 data; - char *operation; - u16 id; -} Instruction; - -enum InstructionIdentifier -{ - _PREFIX_2, - _PREFIX_3, - _PREFIX_6, - _NAME, - _D, - _W, - _S, - _MOD, - _REGISTER, - _ACC, - _RM, - _DISP_LO, - _DISP_HI, - _DATA_W0, - _DATA_W1, -} InstructionIdentifier; - -typedef struct ParsedInst -{ - u64 progress; - u8 something; -} ParsedInst; - -typedef ParsedInst (*inst_parser_f)(ParsedInst); - -ParsedInst pre_2(ParsedInst pi) {return pi;} -ParsedInst pre_3(ParsedInst pi) {return pi;} -ParsedInst pre_6(ParsedInst pi) {return pi;} -ParsedInst name(ParsedInst pi) {return pi;} -ParsedInst reg(ParsedInst pi) {return pi;} -ParsedInst w(ParsedInst pi) {return pi;} -ParsedInst d(ParsedInst pi) {return pi;} -ParsedInst s(ParsedInst pi) {return pi;} - -ParsedInst mod(ParsedInst pi) {return pi;} -ParsedInst inst(ParsedInst pi) {return pi;} -ParsedInst rm(ParsedInst pi) {return pi;} - -ParsedInst disp_lo(ParsedInst pi) {return pi;} -ParsedInst disp_hi(ParsedInst pi) {return pi;} - -ParsedInst data_w0(ParsedInst pi) {return pi;} -ParsedInst data_w1(ParsedInst pi) {return pi;} - -inst_parser_f inst_funcs[][6][4] = -{ - {{pre_2, name, d, w}, {mod, reg, rm}, {disp_lo}, {disp_hi}}, - {{pre_6, s, w}, {mod, name, rm}, {disp_lo}, {disp_hi}, {data_w0}, {data_w1}}, - {{pre_6, w}, {data_w0}, {data_w1}}, -}; - -enum InstructionIdentifier inst_ids[][6][4] = -{ - {{_PREFIX_2, _NAME, _D, _W}, {_MOD, _REGISTER, _RM}, {_DISP_LO}, {_DISP_HI}}, - {{_PREFIX_6, _S, _W}, {_MOD, _NAME, _RM}, {_DISP_LO}, {_DISP_HI}, {_DATA_W0}, {_DATA_W1}}, - {{_PREFIX_6, _W}, {_DATA_W0}, {_DATA_W1}}, -}; - -typedef struct InstructionParser -{ - enum InstructionIdentifier inst_ids[6][4]; -} InstructionParser; - -// InstructionParser inst_formats[] = -// { -// {{{_PREFIX_2, _NAME, _D, _W}, {_MOD, _REGISTER, _RM}, {_DISP_LO}, {_DISP_HI}}}, -// {{{_PREFIX_6, _S, _W}, {_MOD, _NAME, _RM}, {_DISP_LO}, {_DISP_HI}, {_DATA_W0}, {_DATA_W1}}}, -// }; - -InstFormat inst_formats[] = -{ - //////// - // MOV - //////// - // Register/memory to/from register - {.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .mask_w=0x1, - .has_operands=true, .has_displacement=true, .has_d=true, .has_w=true, - .has_mod=true, .has_rm=true, .parse_reg={.tag = P_REG_MASK, .mask=0b00111000} }, - // Immediate to register/memory - {.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true, - .has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true}, - // Immediate to register - {.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8, - .parse_reg={.tag = P_REG_MASK, .mask=0b00000111}, .has_data=true, .has_w=true}, - // Memory to accumulator | Accumulator to memory using the `d` bit - // even though the manual doesn't specify it - {.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1, .has_data=true, - .has_w=true, .has_d=true, .parse_reg={.tag = P_REG_FIXED, .fixed=0x0}}, - // Register/memory to segment register and inverse using the `d` bit - {.id=5, .name="mov", .inst_enc=0b10001100, .mask_inst=0x3, .has_SR=true, .has_d=true, - .has_displacement=true, .has_mod=true, .has_rm=true}, - //////// - // ADD - //////// - // Reg/memory with register or either - {.id=6, .name="add", .inst_enc=0b00000000, .mask_inst=0x3, .has_displacement=true, - .mask_w=0x1, .has_operands=true, .has_w=true, .has_d=true, .has_mod=true, - .has_rm=true, .parse_reg={.tag = P_REG_MASK, .mask=0b00111000}}, - // Immediate to register/memory - {.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true, - .has_s=true, .has_operands=true, .has_displacement=true, - .has_data=true, .has_mod=true, .has_rm=true}, - {.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1, - .has_data=true, .has_w=true, .parse_reg={.tag = P_REG_FIXED, .fixed=0x0}}, -}; diff --git a/decoder8086.odin b/decoder8086.odin deleted file mode 100644 index 78ecdbe..0000000 --- a/decoder8086.odin +++ /dev/null @@ -1,611 +0,0 @@ -package decoder_8086 - -import "core:os" -import "core:fmt" -import "core:math" -import "core:strings" - -Register :: struct { - fullname: string, - bytename: string, - value: struct #raw_union { - using _: struct { - low, high: byte, - }, - full: u16, - }, - code: u8, -} - -WordSize :: enum { - None, - LastBit, - FourthBit, - Always8, - Always16, -} - -None :: struct {} - -Disp8 :: i8 -Disp16 :: i16 -Displacement :: union { - None, - Disp8, - Disp16 -} - -RegisterId :: distinct u8 -Immediate8 :: distinct i8 -Immediate16 :: distinct i16 -ImmediateU8 :: distinct u8 -MemoryAddr :: struct { - addr_id: u8, - displacement: Displacement, -} -DirectAddress :: distinct i16 -SegmentRegister :: distinct i8 -Jump :: distinct i8 -VariablePort :: struct {} -ShiftRotate :: distinct bool -Repeat :: string -Intersegment :: struct { - ip: i16, - cs: i16, -} -DirectWithinSegment :: distinct u16 - -Operand :: union { - None, - RegisterId, - Immediate8, - ImmediateU8, - Immediate16, - MemoryAddr, - DirectAddress, - SegmentRegister, - Jump, - VariablePort, - ShiftRotate, - Repeat, - DirectWithinSegment, - Intersegment, -} - -OperandInfo :: enum { - None, - Register, - SegmentRegister, - RegisterMemory, - Immediate, - ImmediateUnsigned, - Accumulator, - DirectAddress, - Jump, - VariablePort, - ShiftRotate, - Repeat, - DirectWithinSegment, - Intersegment, -} - -RegisterEncodingBits :: enum { - None, - FirstByteLast3, - SecondByteMiddle3, - SecondByteLast3, - FirstByteMiddle3, -} - -InstructionInfo :: struct { - mask: u8, - encoding: u8, - opname: OpName, - desc: string, - src: OperandInfo, - dst: OperandInfo, - word_size: WordSize, - reg_info: RegisterEncodingBits, - has_flip: bool, - has_sign_extension: bool, - check_second_encoding: bool, - consume_extra_bytes: int, - shift_rotate_flag: bool, -} - -RIGHT_ALIGN_AMOUNT := 35 - -registers := [8]Register { - {fullname = "ax", bytename = "al", code = 0b000}, - {fullname = "cx", bytename = "cl", code = 0b001}, - {fullname = "dx", bytename = "dl", code = 0b010}, - {fullname = "bx", bytename = "bl", code = 0b011}, - {fullname = "sp", bytename = "ah", code = 0b100}, - {fullname = "bp", bytename = "ch", code = 0b101}, - {fullname = "si", bytename = "dh", code = 0b110}, - {fullname = "di", bytename = "bh", code = 0b111}, -} - -segment_registers := [4]Register { - {fullname = "es", code = 0b000}, - {fullname = "cs", code = 0b001}, - {fullname = "ss", code = 0b010}, - {fullname = "ds", code = 0b011}, -} - -variable_port := registers[2] - -total_bytes_processed := 0 - -instruction_builder := strings.builder_make() - -get_i16 :: proc(data: []u8) -> i16 { - return (i16)(data[1]) << 8 | (i16)(data[0]) -} - -operand_is :: proc($T: typeid, opr: Operand) -> bool { - _, ok := opr.(T) - return ok -} - -calculate_effective_address :: proc(r_m: u8) -> string { - val: string - switch r_m { - case 0b000: - val = "bx + si" - case 0b001: - val = "bx + di" - case 0b010: - val = "bp + si" - case 0b011: - val = "bp + di" - case 0b100: - val = "si" - case 0b101: - val = "di" - case 0b110: - val = "bp" - case 0b111: - val = "bx" - } - return val -} - -get_memory_string :: proc(memoryAddr: MemoryAddr, has_segment: Maybe(Register)) -> string { - disp: string - switch value in memoryAddr.displacement { - case None: - disp = "" - case Disp8: - if value != 0 { - disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) - } - case Disp16: - if value != 0 { - disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) - } - } - seg_string: string - if segreg, ok := has_segment.?; ok { - seg_string = fmt.aprintf("%s:", segreg.fullname) - } - text := fmt.aprintf("%s[%s%s]", seg_string, calculate_effective_address(memoryAddr.addr_id), disp) - return text -} - -parse_displacement :: proc(data: []u8) -> (displacement: Displacement, disp_amount: int) { - mod := (data[0] & 0b11000000) >> 6 - disp: Displacement = None{} - amount: int - switch mod { - case 1: - disp = (i8)(data[1]) - amount = 1 - case 2: - disp = get_i16(data[1:]) - amount = 2 - } - return disp, amount -} - -get_displacement_string :: proc(displacement: Displacement) -> string { - disp := "" - #partial switch value in displacement { - case i8: - if value != 0 { - disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) - } - case i16: - if value != 0 { - disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) - } - } - return disp -} - -get_repeat_op :: proc(data: u8) -> Repeat { - bits := (data & 0b1110) >> 1 - w := (data & 0b1) == 1 ? "w" : "b" - rep: string - switch bits { - case 0b010: rep = "movs" - case 0b011: rep = "cmps" - case 0b101: rep = "stos" - case 0b110: rep = "lods" - case 0b111: rep = "scas" - } - return Repeat(fmt.aprintf("%s%s", rep, w)) -} - -try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) { - for inst in instructions { - if inst.encoding == (b & inst.mask) { - return inst, true - } - } - return InstructionInfo{}, false -} - -get_opname :: proc(opname: OpName, data: []u8) -> (string, bool) { - name: string - interseg: bool - if opname == .TBD2 { - switch data[1] & 0b00111000 >> 3 { - case 0b000: name = "inc" - case 0b001: name = "dec" - case 0b010: name = "call" - case 0b011: name = "call"; interseg = true - case 0b100: name = "jmp" - case 0b101: name = "jmp"; interseg = true - case 0b110: name = "push" - } - } else if opname == .TBD5 { - switch data[1] & 0b00111000 >> 3 { - case 0b000: name = "test" - case 0b001: name = "dec" - case 0b010: name = "not" - case 0b011: name = "neg" - case 0b100: name = "mul" - case 0b101: name = "imul" - case 0b110: name = "div" - case 0b111: name = "idiv" - } - } else if opname == .TBD6 { - switch data[1] & 0b00111000 >> 3 { - case 0b000: name = "rol" - case 0b001: name = "ror" - case 0b010: name = "rcl" - case 0b011: name = "rcr" - case 0b100: name = "shl" - case 0b101: name = "shr" - case 0b111: name = "sar" - } - } else { - bits: u8 - if opname == .TBD1 || opname == .TBD3 { - bits = data[0] & 0b00111000 >> 3 - } else { - bits = data[1] & 0b00111000 >> 3 - } - switch bits { - case 0b000: name = "add" - case 0b001: name = "or" - case 0b010: name = "adc" - case 0b011: name = "sbb" - case 0b100: name = "and" - case 0b101: name = "sub" - case 0b110: name = "xor" - case 0b111: name = "cmp" - } - } - return name, interseg -} - -parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, processed: ^int, word: bool, has_segreg: Maybe(Register)) -> Operand { - operand: Operand = None{} - switch opinfo { - case .None: - case .Register: - // rm: u8 = data[1] & 0b111 - // dst_opr = (RegisterId)(registers[rm].code) - reg: u8 - // Read the RegisterEncodingBits - switch inst.reg_info { - case .None: - // panic("Register is required but the encoded location is not provided") - case .FirstByteLast3: - reg = data[0] & 0b111 - case .FirstByteMiddle3: - reg = (data[0] >> 3) & 0b111 - case .SecondByteMiddle3: - reg = (data[1] >> 3) & 0b111 - case .SecondByteLast3: - reg = data[1] & 0b111 - } - operand = (RegisterId)(registers[reg].code) - case .SegmentRegister: - reg: u8 - switch inst.reg_info { - case .None: - // panic("Register is required but the encoded location is not provided") - case .FirstByteLast3: - reg = data[0] & 0b111 - case .FirstByteMiddle3: - reg = (data[0] >> 3) & 0b111 - case .SecondByteMiddle3: - reg = (data[1] >> 3) & 0b111 - case .SecondByteLast3: - reg = data[1] & 0b111 - } - operand = (SegmentRegister)(segment_registers[reg].code) - case .RegisterMemory: - mod := data[1] >> 6 - rm := data[1] & 0b111 - processed^ += 1 - op: Operand - if mod == 0 { - if rm == 0b110 { - op = (DirectAddress)(get_i16(data[2:])) - processed^ += 2 - } else { - op = MemoryAddr{ addr_id = rm , displacement = None{} } - } - } else if mod == 1 { - op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) } - processed^ += 1 - } else if mod == 2 { - op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) } - processed^ += 2 - } else if mod == 3 { - op = (RegisterId)(registers[rm].code) - } - operand = op - case .Immediate: - data_idx := processed^ - word_signed := word - if inst.has_sign_extension { - word_signed &&= data[0] & 0b0000_0010 == 0 - } - operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx])) - processed^ += word_signed ? 2 : 1 - case .ImmediateUnsigned: - operand = (ImmediateU8)(data[processed^]) - processed^ += 1 - case .Accumulator: - operand = (RegisterId)(registers[0].code) - case .DirectAddress: - operand = (DirectAddress)(get_i16(data[1:])) - processed^ += 2 - case .Jump: - processed^ += 1 - // NOTE: In order to mimic the label offset, you have to take the value you got and add two - operand = (Jump)((i8)(data[1]) + 2) - case .VariablePort: - operand = VariablePort{} - case .ShiftRotate: - v_flag := data[0] & 0b10 != 0 - operand = (ShiftRotate)(v_flag) - case .Repeat: - operand = get_repeat_op(data[1]) - processed^ += 1 - case .DirectWithinSegment: - value := (int)(get_i16(data[1:])) + total_bytes_processed + 3 - operand = (DirectWithinSegment)(value) - processed^ += 2 - case .Intersegment: - operand = Intersegment { - ip = get_i16(data[1:]), - cs = get_i16(data[3:]), - } - processed^ += 4 - } - return operand -} - -get_operand_string :: proc(operand: Operand, is_word: bool, has_segment: Maybe(Register)) -> string { - string_val: string - switch val in operand { - case None: - string_val = "" - case RegisterId: - string_val = is_word ? registers[val].fullname : registers[val].bytename - case Immediate8, ImmediateU8, Immediate16, DirectWithinSegment: - string_val = fmt.aprintf("%d", val) - case MemoryAddr: - string_val = get_memory_string(val, has_segment) - case DirectAddress: - seg_string: string - if segreg, ok := has_segment.?; ok { - seg_string = fmt.aprintf("%s:", segreg.fullname) - } - string_val = fmt.aprintf("%s[%d]", seg_string, val) - case SegmentRegister: - string_val = segment_registers[val].fullname - case Jump: - string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val) - case VariablePort: - string_val = variable_port.fullname - case ShiftRotate: - string_val = val ? registers[1].bytename : "1" - case Repeat: - string_val = (string)(val) - case Intersegment: - string_val = fmt.aprintf("%d:%d", val.cs, val.ip) - } - return string_val -} - -main :: proc() { - f,err := os.open(os.args[1]) - if err != os.ERROR_NONE { - fmt.eprintln("ERROR:", err) - os.exit(1) - } - defer os.close(f) - - data := make([]u8, 1024) - bytes_read, err2 := os.read(f, data) - if err2 != nil { - // ... - os.exit(1) - } - - if false { - os.exit(0) - } - - // asdf :u16 = 0b00000110_11011101 - // asdf2 :i16 = (i16)(asdf) - // fmt.printfln("%d", asdf2) - print_at_end := false - idx := 0 - line_count := 0 - has_lock: bool - has_segment: Maybe(Register) - last_opname: [3]byte - repeating_op_count := 0 - instruction_list := make([dynamic]string, 512) - fmt.println("bits 16\n") - for idx < bytes_read { - processed := 1 - curr_byte := data[idx] - - inst, ok := try_find_instruction(curr_byte) - if !ok { - txt := "unknown instruction" - if print_at_end { - line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte) - instruction_list[line_count] = line - line_count += 1 - } else { - fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte) - } - idx += 1 - continue - } - - // Here we check if the instruction affects the next instruction - if inst.opname == .LOCK { - has_lock = true - idx += 1 - continue - } else if inst.opname == .SEGMENT { - reg := (curr_byte & 0b11000) >> 3 - has_segment = segment_registers[reg] - idx += 1 - continue - } else if inst.opname == .AAM { - processed += 1 - } - - src_opr: Operand - dst_opr: Operand - - word: bool - flip: bool - indirect_intersegment: bool - op: Operand - - if inst.has_flip { - flip = curr_byte & 2 != 0 - } - - #partial switch inst.word_size { - case .LastBit: word = curr_byte & 1 == 1 - case .FourthBit: word = curr_byte & 0b0000_1000 != 0 - case .Always16: word = true - } - - opname: string - if inst.check_second_encoding { - opname,indirect_intersegment = get_opname(inst.opname, data[idx:]) - // NOTE: This is a special case because it matches the bit pattern of .TBD5, - // but the instruction itself is different - if opname == "test" && (curr_byte & 0xFF) == 0b11110110 { - inst = test_inst - } - } else { - opname = strings.to_lower(fmt.aprintf("%s", inst.opname)) - } - - dst_opr = parse_operand(inst, inst.dst, data[idx:], &processed, word, has_segment) - src_opr = parse_operand(inst, inst.src, data[idx:], &processed, word, has_segment) - - src_is_imm := operand_is(Immediate8, src_opr) || operand_is(Immediate16, src_opr) - dst_is_bracketed := operand_is(MemoryAddr, dst_opr) || operand_is(DirectAddress, dst_opr) - src_is_bracketed := operand_is(MemoryAddr, src_opr) || operand_is(DirectAddress, src_opr) - shiftrot := inst.src == .ShiftRotate - size_string := "" - if ((src_is_imm && dst_is_bracketed) || (dst_is_bracketed && shiftrot)) || (src_is_bracketed && operand_is(None, dst_opr)) { - size_string = word ? "word " : "byte " - } - - if flip { - src_opr, dst_opr = dst_opr, src_opr - } - - dst_str := get_operand_string(dst_opr, word, has_segment) - src_str := get_operand_string(src_opr, word, has_segment) - full_inst: string - if dst_str == "" { - interseg_string: string - if indirect_intersegment { - interseg_string = " far" - } - full_inst = fmt.aprintf("%s%s %s%s", opname, interseg_string, size_string, src_str) - } else { - // NOTE: I don't know why this is the case, but only the move has the word/byte - // keyword next to the immediate, but other instructions have it on the memory address - if opname == "mov" { - full_inst = fmt.aprintf("%s %s, %s%s", opname, dst_str, size_string, src_str) - } else { - full_inst = fmt.aprintf("%s %s%s, %s", opname, size_string, dst_str, src_str) - } - } - - processed += inst.consume_extra_bytes - - lock_string: string - if has_lock { - lock_string = "lock " - } - fmt.sbprintf(&instruction_builder, "%s%s %*[2]s", lock_string, full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;") - if has_lock { - fmt.sbprintf(&instruction_builder, " lock") - } - if _,ok := has_segment.?; ok { - fmt.sbprintf(&instruction_builder, " segment") - } - for i in 0.. 0 { - fmt.println() - } - repeating_op_count = 0 - } else { - repeating_op_count += 1 - } - copy(last_opname[:], op2[0:3]) - fmt.println(op2) - - idx += processed - strings.builder_reset(&instruction_builder) - has_lock = false - has_segment = nil - total_bytes_processed = idx - } - if print_at_end { - for i in 0.. Operand { + operand: Operand = None{} + switch opinfo { + case .None: + case .Register: + reg: u8 + switch inst.reg_info { + case .None: + panic("Register is required but the encoded location is not provided") + case .FirstByteLast3: + reg = data[0] & 0b111 + case .FirstByteMiddle3: + reg = (data[0] >> 3) & 0b111 + case .SecondByteMiddle3: + reg = (data[1] >> 3) & 0b111 + case .SecondByteLast3: + reg = data[1] & 0b111 + } + operand = (RegisterId)(registers[reg].code) + case .SegmentRegister: + reg: u8 + switch inst.reg_info { + case .None: + panic("Register is required but the encoded location is not provided") + case .FirstByteLast3: + reg = data[0] & 0b111 + case .FirstByteMiddle3: + reg = (data[0] >> 3) & 0b111 + case .SecondByteMiddle3: + reg = (data[1] >> 3) & 0b111 + case .SecondByteLast3: + reg = data[1] & 0b111 + } + operand = (SegmentRegister)(segment_registers[reg].code) + case .RegisterMemory: + mod := data[1] >> 6 + rm := data[1] & 0b111 + processed^ += 1 + op: Operand + if mod == 0 { + if rm == 0b110 { + op = (DirectAddress)(get_i16(data[2:])) + processed^ += 2 + } else { + op = MemoryAddr{ addr_id = rm , displacement = None{} } + } + } else if mod == 1 { + op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) } + processed^ += 1 + } else if mod == 2 { + op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) } + processed^ += 2 + } else if mod == 3 { + op = (RegisterId)(registers[rm].code) + } + operand = op + case .Immediate: + data_idx := processed^ + word_signed := word + if inst.has_sign_extension { + word_signed &&= data[0] & 0b0000_0010 == 0 + } + operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx])) + processed^ += word_signed ? 2 : 1 + case .ImmediateUnsigned: + operand = (ImmediateU8)(data[processed^]) + processed^ += 1 + case .Accumulator: + operand = (RegisterId)(registers[0].code) + case .DirectAddress: + operand = (DirectAddress)(get_i16(data[1:])) + processed^ += 2 + case .Jump: + processed^ += 1 + // NOTE: In order to mimic the label offset, you have to take the value you got and add two + operand = (Jump)((i8)(data[1]) + 2) + case .VariablePort: + operand = VariablePort{} + case .ShiftRotate: + v_flag := data[0] & 0b10 != 0 + operand = (ShiftRotate)(v_flag) + case .Repeat: + operand = get_repeat_op(data[1]) + processed^ += 1 + case .DirectWithinSegment: + value := (int)(get_i16(data[1:])) + total_bytes_processed + 3 + operand = (DirectWithinSegment)(value) + processed^ += 2 + case .Intersegment: + operand = Intersegment { + ip = get_i16(data[1:]), + cs = get_i16(data[3:]), + } + processed^ += 4 + } + return operand +} + +decode_data :: proc(inst_list: ^[dynamic]Instruction, data: []u8, bytes_to_read: int) { + idx := 0 + has_segment: Maybe(Register) + has_lock: bool + for idx < bytes_to_read { + instruction: Instruction + processed := 1 + curr_byte := data[idx] + + inst, ok := try_find_instruction(curr_byte) + if !ok { + instruction = { + opname = .UNKNOWN, + bytes_read = 1, + raw_data = data[idx:idx+1], + } + append(inst_list, instruction) + idx += 1 + continue + } + + // Here we check if the instruction affects the next instruction + if inst.opname == .LOCK { + has_lock = true + idx += 1 + continue + } else if inst.opname == .SEGMENT { + reg := (curr_byte & 0b11000) >> 3 + has_segment = segment_registers[reg] + idx += 1 + continue + } else if inst.opname == .AAM { + processed += 1 + } + + debug_str: string + // NOTE: This is a special case because it matches the bit pattern of .TBD5, + // but the instruction itself is different + if inst.opname == .TBD5 && (data[idx] & 0xFF) == 0b11110110 && (data[idx+1] & 0b00111000) == 0 { + inst = test_inst + } + + src_opr: Operand + dst_opr: Operand + + word: bool + flip: bool + indirect_intersegment: bool + op: Operand + + if inst.has_flip { + flip = curr_byte & 2 != 0 + } + + #partial switch inst.word_size { + case .LastBit: word = curr_byte & 1 == 1 + case .FourthBit: word = curr_byte & 0b0000_1000 != 0 + case .Always16: word = true + } + + dst_opr = parse_operand(inst, inst.dst, data[idx:], &processed, word, has_segment) + src_opr = parse_operand(inst, inst.src, data[idx:], &processed, word, has_segment) + + if flip { + src_opr, dst_opr = dst_opr, src_opr + } + + processed += inst.consume_extra_bytes + + instruction.opname = inst.opname + instruction.src = src_opr + instruction.dst = dst_opr + instruction.is_word = word + instruction.bytes_read = processed + instruction.raw_data = data[idx:idx+processed] + instruction.debug_msg = debug_str + instruction.info = inst + instruction.has_lock = has_lock + instruction.has_segment = has_segment + + // fmt.println(parsed_inst) + append(inst_list, instruction) + + idx += processed + + has_lock = false + has_segment = nil + total_bytes_processed = idx + } +} diff --git a/instructions.odin b/instructions.odin index f556adf..2dac6cf 100644 --- a/instructions.odin +++ b/instructions.odin @@ -1,6 +1,7 @@ -package decoder_8086 +package sim_8086 -OpName :: enum { +Op :: enum { + UNKNOWN, TBD1, TBD2, TBD3, @@ -100,8 +101,12 @@ OpName :: enum { // isn't that great; we return a string with the instruction name, but ideally we have all // the instructions accounted for, because eventually we will need the final parsed // instruction to contain all the information related to it +// test_inst := InstructionInfo { +// opname = .NOT, desc = "", mask = 0b11111110, encoding = 0b11110110, +// dst = .RegisterMemory, src = .Immediate, word_size = .LastBit +// } test_inst := InstructionInfo { - opname = .NOT, desc = "", mask = 0b11111110, encoding = 0b11110110, + opname = .TEST, desc = "", mask = 0b11111110, encoding = 0b11110110, dst = .RegisterMemory, src = .Immediate, word_size = .LastBit } diff --git a/lib.h b/lib.h deleted file mode 100644 index 40909ca..0000000 --- a/lib.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once -#include -#include - -typedef uint8_t u8; -typedef uint16_t u16; -typedef int16_t i16; -typedef int32_t i32; -typedef uint32_t u32; -typedef uint64_t u64; -typedef float f32; -typedef double f64; -typedef uintptr_t uptr; -typedef char sbyte; -typedef ptrdiff_t size; -typedef size_t usize; - -enum OptionTag {NONE, SOME}; - -#define OPTION(type) \ - typedef struct \ - { \ - enum OptionTag tag; \ - union { \ - char none; \ - type value; \ - }; \ - } type##_opt; \ - \ - static inline type##_opt none_##type(void) \ - { \ - return (type##_opt){ .tag = NONE, .none = 0 }; \ - } \ - \ - static inline type##_opt some_##type(type value) \ - { \ - return (type##_opt){ .tag = SOME, .value = value }; \ - } \ - \ - static inline int get_some_##type(type##_opt opt, type* out_value) \ - { \ - if (opt.tag != SOME) return 0; \ - *out_value = opt.value; \ - return 1; \ - } - -#define IF_LET_SOME(type, var, opt) \ - type var; \ - if (get_some_##type(opt, &var)) - - -OPTION(u8) -OPTION(u16) diff --git a/printing.odin b/printing.odin new file mode 100644 index 0000000..667cd2b --- /dev/null +++ b/printing.odin @@ -0,0 +1,242 @@ +package sim_8086 + +import "core:fmt" +import "core:math" +import "core:strings" + +instruction_builder := strings.builder_make() + +calculate_effective_address :: proc(r_m: u8) -> string { + val: string + switch r_m { + case 0b000: + val = "bx + si" + case 0b001: + val = "bx + di" + case 0b010: + val = "bp + si" + case 0b011: + val = "bp + di" + case 0b100: + val = "si" + case 0b101: + val = "di" + case 0b110: + val = "bp" + case 0b111: + val = "bx" + } + return val +} + +get_memory_string :: proc(memoryAddr: MemoryAddr, has_segment: Maybe(Register)) -> string { + disp: string + switch value in memoryAddr.displacement { + case None: + disp = "" + case Disp8: + if value != 0 { + disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) + } + case Disp16: + if value != 0 { + disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) + } + } + seg_string: string + if segreg, ok := has_segment.?; ok { + seg_string = fmt.aprintf("%s:", segreg.fullname) + } + text := fmt.aprintf("%s[%s%s]", seg_string, calculate_effective_address(memoryAddr.addr_id), disp) + return text +} + +get_displacement_string :: proc(displacement: Displacement) -> string { + disp := "" + #partial switch value in displacement { + case i8: + if value != 0 { + disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) + } + case i16: + if value != 0 { + disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) + } + } + return disp +} + +get_opname :: proc(inst: Instruction) -> (string, bool) { + name: string + interseg: bool + if inst.opname == .TBD2 { + switch inst.raw_data[1] & 0b00111000 >> 3 { + case 0b000: name = "inc" + case 0b001: name = "dec" + case 0b010: name = "call" + // TODO: We really have to fix this because we shouldn't be figuring out if this + // is an intersegment here + case 0b011: name = "call"; interseg = true + case 0b100: name = "jmp" + case 0b101: name = "jmp"; interseg = true + case 0b110: name = "push" + } + } else if inst.opname == .TBD5 { + switch inst.raw_data[1] & 0b00111000 >> 3 { + case 0b000: name = "test" + case 0b001: name = "dec" + case 0b010: name = "not" + case 0b011: name = "neg" + case 0b100: name = "mul" + case 0b101: name = "imul" + case 0b110: name = "div" + case 0b111: name = "idiv" + } + } else if inst.opname == .TBD6 { + switch inst.raw_data[1] & 0b00111000 >> 3 { + case 0b000: name = "rol" + case 0b001: name = "ror" + case 0b010: name = "rcl" + case 0b011: name = "rcr" + case 0b100: name = "shl" + case 0b101: name = "shr" + case 0b111: name = "sar" + } + } else { + bits: u8 + if inst.opname == .TBD1 || inst.opname == .TBD3 { + bits = inst.raw_data[0] & 0b00111000 >> 3 + } else { + bits = inst.raw_data[1] & 0b00111000 >> 3 + } + switch bits { + case 0b000: name = "add" + case 0b001: name = "or" + case 0b010: name = "adc" + case 0b011: name = "sbb" + case 0b100: name = "and" + case 0b101: name = "sub" + case 0b110: name = "xor" + case 0b111: name = "cmp" + } + } + return name, interseg +} + +get_operand_string :: proc(operand: Operand, is_word: bool, has_segment: Maybe(Register)) -> string { + string_val: string + switch val in operand { + case None: + string_val = "" + case RegisterId: + string_val = is_word ? registers[val].fullname : registers[val].bytename + case Immediate8, ImmediateU8, Immediate16, DirectWithinSegment: + string_val = fmt.aprintf("%d", val) + case MemoryAddr: + string_val = get_memory_string(val, has_segment) + case DirectAddress: + seg_string: string + if segreg, ok := has_segment.?; ok { + seg_string = fmt.aprintf("%s:", segreg.fullname) + } + string_val = fmt.aprintf("%s[%d]", seg_string, val) + case SegmentRegister: + string_val = segment_registers[val].fullname + case Jump: + string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val) + case VariablePort: + string_val = variable_port.fullname + case ShiftRotate: + string_val = val ? registers[1].bytename : "1" + case Repeat: + string_val = (string)(val) + case Intersegment: + string_val = fmt.aprintf("%d:%d", val.cs, val.ip) + } + return string_val +} + +get_unknown_inst_string :: proc(inst: Instruction) -> string { + print_at_end := false + txt := "unknown instruction" + line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", inst.raw_data[0]) + return line +} + +get_instruction_string :: proc(inst_info: InstructionInfo, instruction: Instruction) { + inst := instruction + src_is_imm := operand_is(Immediate8, inst.src) || operand_is(Immediate16, inst.src) + dst_is_bracketed := operand_is(MemoryAddr, inst.dst) || operand_is(DirectAddress, inst.dst) + src_is_bracketed := operand_is(MemoryAddr, inst.src) || operand_is(DirectAddress, inst.src) + shiftrot := operand_is(ShiftRotate, inst.src) + size_string := "" + if ((src_is_imm && dst_is_bracketed) || (dst_is_bracketed && shiftrot)) || (src_is_bracketed && operand_is(None, inst.dst)) { + size_string = inst.is_word ? "word " : "byte " + } + + if inst.has_lock { + fmt.sbprint(&instruction_builder, "lock ") + } + + dst_str := get_operand_string(inst.dst, inst.is_word, inst.has_segment) + src_str := get_operand_string(inst.src, inst.is_word, inst.has_segment) + opname: string + is_interseg: bool + if inst_info.check_second_encoding { + opname,is_interseg = get_opname(inst) + } else { + // TODO: Do the RTTI thing here with reflection + opname = strings.to_lower(fmt.aprintf("%s", inst.opname)) + } + + if dst_str == "" { + interseg_string: string + if is_interseg { + interseg_string = " far" + } + fmt.sbprintf(&instruction_builder, "%s%s %s%s", opname, interseg_string, size_string, src_str) + } else { + // note: i don't know why this is the case, but only the move has the word/byte + // keyword next to the immediate, but other instructions have it on the memory address + if opname == "mov" { + fmt.sbprintf(&instruction_builder, "%s %s, %s%s", opname, dst_str, size_string, src_str) + } else { + fmt.sbprintf(&instruction_builder, "%s %s%s, %s", opname, size_string, dst_str, src_str) + } + } + + // Prepare padding and comment to add debug info + b_len := strings.builder_len(instruction_builder) + fmt.sbprintf(&instruction_builder, "%*[0]s", RIGHT_ALIGN_AMOUNT - b_len, ";;") + + if inst.has_lock { + fmt.sbprintf(&instruction_builder, " lock") + } + if _,ok := inst.has_segment.?; ok { + fmt.sbprintf(&instruction_builder, " segment") + } + for i in 0.. 0 { + fmt.println() + } + repeating_op_count = 0 + } else { + repeating_op_count += 1 + } + copy(last_opname[:], op2[0:3]) + fmt.println(op2) + } +} diff --git a/sim8086.odin b/sim8086.odin new file mode 100644 index 0000000..49bddfe --- /dev/null +++ b/sim8086.odin @@ -0,0 +1,98 @@ +package sim_8086 + +import "core:os" +import "core:fmt" +import "core:math" +import "core:strings" + +RIGHT_ALIGN_AMOUNT := 35 + +registers := [8]Register { + {fullname = "ax", bytename = "al", code = 0b000}, + {fullname = "cx", bytename = "cl", code = 0b001}, + {fullname = "dx", bytename = "dl", code = 0b010}, + {fullname = "bx", bytename = "bl", code = 0b011}, + {fullname = "sp", bytename = "ah", code = 0b100}, + {fullname = "bp", bytename = "ch", code = 0b101}, + {fullname = "si", bytename = "dh", code = 0b110}, + {fullname = "di", bytename = "bh", code = 0b111}, +} + +segment_registers := [4]Register { + {fullname = "es", code = 0b000}, + {fullname = "cs", code = 0b001}, + {fullname = "ss", code = 0b010}, + {fullname = "ds", code = 0b011}, +} + +variable_port := registers[2] + +total_bytes_processed := 0 + +get_i16 :: proc(data: []u8) -> i16 { + return (i16)(data[1]) << 8 | (i16)(data[0]) +} + +operand_is :: proc($T: typeid, opr: Operand) -> bool { + _, ok := opr.(T) + return ok +} + +get_repeat_op :: proc(data: u8) -> Repeat { + bits := (data & 0b1110) >> 1 + w := (data & 0b1) == 1 ? "w" : "b" + rep: string + switch bits { + case 0b010: rep = "movs" + case 0b011: rep = "cmps" + case 0b101: rep = "stos" + case 0b110: rep = "lods" + case 0b111: rep = "scas" + } + return Repeat(fmt.aprintf("%s%s", rep, w)) +} + +try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) { + for inst in instructions { + if inst.encoding == (b & inst.mask) { + return inst, true + } + } + return InstructionInfo{}, false +} + +main :: proc() { + f,err := os.open(os.args[1]) + if err != os.ERROR_NONE { + fmt.eprintln("ERROR:", err) + os.exit(1) + } + defer os.close(f) + + data := make([]u8, 1024) + bytes_read, err2 := os.read(f, data) + if err2 != nil { + // ... + os.exit(1) + } + + if false { + os.exit(0) + } + + // asdf :u16 = 0b00000110_11011101 + // asdf2 :i16 = (i16)(asdf) + // fmt.printfln("%d", asdf2) + print_at_end := false + line_count := 0 + instruction_list := make([dynamic]string, 0, 512) + instructions_list := make([dynamic]Instruction, 0, 512) + + decode_data(&instructions_list, data[:], bytes_read) + // for inst in instructions_list { + // fmt.println(inst) + // } + if true { + print_instructions_stdout(instructions_list[:]) + } +} diff --git a/test_asm.sh b/test_asm.sh index 042bf78..25ba9df 100755 --- a/test_asm.sh +++ b/test_asm.sh @@ -6,9 +6,14 @@ NC='\033[0m' make asm_files > /dev/null +if [ ! "$(command -v ./sim8086)" ]; then + echo -e "\nError: 'sim8086' executable not found" + exit 1 +fi + for ASM_BIN in asm_files/*.bin; do - ./decoder8086 "$ASM_BIN" > output.asm 2> /dev/null + ./sim8086 "$ASM_BIN" > output.asm 2> /dev/null nasm output.asm -o output.bin 2> /dev/null ASM_FILE=${ASM_BIN%.*}.asm if [ ! -e output.bin ]; then diff --git a/types.odin b/types.odin new file mode 100644 index 0000000..b1f2f81 --- /dev/null +++ b/types.odin @@ -0,0 +1,123 @@ +package sim_8086 + +Register :: struct { + fullname: string, + bytename: string, + value: struct #raw_union { + using _: struct { + low, high: byte, + }, + full: u16, + }, + code: u8, +} + +WordSize :: enum { + None, + LastBit, + FourthBit, + Always8, + Always16, +} + +None :: struct {} + +Disp8 :: i8 +Disp16 :: i16 +Displacement :: union { + None, + Disp8, + Disp16 +} + +RegisterId :: distinct u8 +Immediate8 :: distinct i8 +Immediate16 :: distinct i16 +ImmediateU8 :: distinct u8 +MemoryAddr :: struct { + addr_id: u8, + displacement: Displacement, +} +DirectAddress :: distinct i16 +SegmentRegister :: distinct i8 +Jump :: distinct i8 +VariablePort :: struct {} +ShiftRotate :: distinct bool +Repeat :: string +Intersegment :: struct { + ip: i16, + cs: i16, +} +DirectWithinSegment :: distinct u16 + +Operand :: union { + None, + RegisterId, + Immediate8, + ImmediateU8, + Immediate16, + MemoryAddr, + DirectAddress, + SegmentRegister, + Jump, + VariablePort, + ShiftRotate, + Repeat, + DirectWithinSegment, + Intersegment, +} + +OperandInfo :: enum { + None, + Register, + SegmentRegister, + RegisterMemory, + Immediate, + ImmediateUnsigned, + Accumulator, + DirectAddress, + Jump, + VariablePort, + ShiftRotate, + Repeat, + DirectWithinSegment, + Intersegment, +} + +RegisterEncodingBits :: enum { + None, + FirstByteLast3, + SecondByteMiddle3, + SecondByteLast3, + FirstByteMiddle3, +} + +InstructionInfo :: struct { + mask: u8, + encoding: u8, + opname: Op, + desc: string, + src: OperandInfo, + dst: OperandInfo, + word_size: WordSize, + reg_info: RegisterEncodingBits, + has_flip: bool, + has_sign_extension: bool, + check_second_encoding: bool, + consume_extra_bytes: int, + shift_rotate_flag: bool, +} + +Instruction :: struct { + opname: Op, + src: Operand, + dst: Operand, + info: InstructionInfo, + is_word: bool, + indirect_intersegment: bool, + has_segment: Maybe(Register), + has_lock: bool, + bytes_read: int, + raw_data: []u8, + debug_msg: string, +}