Break code up into files, delete C code, organize things better

This commit is contained in:
Joseph Ferano 2025-03-15 17:28:51 +07:00
parent 6909f75b35
commit 32ddb518e9
12 changed files with 672 additions and 1322 deletions

1
.gitignore vendored
View File

@ -4,3 +4,4 @@
/8086_family_Users_Manual_1_.pdf
/decoder8086
/performance-aware
/sim8086

431
decode.c
View File

@ -1,431 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <string.h>
#include <assert.h>
#include "lib.h"
#include "decode.h"
/// Get Effective Address Calculation Registers
char* get_eac_register(char rm)
{
char* reg_name;
switch (rm)
{
case 0b000: reg_name = "bx + si"; break;
case 0b001: reg_name = "bx + di"; break;
case 0b010: reg_name = "bp + si"; break;
case 0b011: reg_name = "bp + di"; break;
case 0b100: reg_name = "si"; break;
case 0b101: reg_name = "di"; break;
case 0b110: reg_name = "bp"; break;
case 0b111: reg_name = "bx"; break;
default: perror("Invalid R/M value"); exit(1);
}
return reg_name;
}
static char* reg_name(Register reg, char wide)
{
return wide == 1 ? reg.fullname : reg.bytename;
}
static u8 mask_and_shift(u8 value, u8 mask)
{
value &= mask;
int count = 0;
while ((mask & 0x1) == 0 && count < 8)
{
value >>= 1;
mask >>= 1;
count++;
}
return value;
}
ParsedInstruction parse_instruction_ids(u8* buf)
{
u8 inst = buf[0];
InstFormat fmt = {0};
bool matched_inst = false;
// TODO: This might be a good time to learn how to make a hashtable in C
for (u16 i = 0; i < sizeof(inst_funcs) / (sizeof(inst_parser_f)*6*4); i++)
for (int j = 0; j < 6; j++)
for (int k = 0; k < 4 && inst_funcs[i][j][k] != NULL; k++)
{
printf("%p\n", (void*)(uptr)inst_funcs[i][j][k]);
// Apply inst_func_t
}
// for (int j = 0; j < 4 || ;)
return (ParsedInstruction){0};
if (!matched_inst)
return (ParsedInstruction){.bytes_read = 0};
u8_opt d_opt = none_u8();
u8_opt s_opt = none_u8();
u8_opt w_opt = none_u8();
u8_opt reg_opt = none_u8();
u8_opt mod_opt = none_u8();
u8_opt rm_opt = none_u8();
u16_opt data_opt = none_u16();
u16_opt displacement_opt = none_u16();
u8 is_data_addr = false;
u16 bytes_read = 1;
bytes_read += fmt.has_operands ? 1 : 0;
if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1);
if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1);
if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6);
if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w));
if (fmt.parse_reg.tag == P_REG_MASK)
{
u8 reg = fmt.has_operands ? buf[1] : buf[0];
reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask));
}
else if (fmt.parse_reg.tag == P_REG_FIXED)
{
reg_opt = some_u8(fmt.parse_reg.fixed);
is_data_addr = true;
}
if (fmt.has_data)
{
u8 idx = 1;
if (fmt.has_operands) idx += 1;
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
// respectively but mod == 3 wraps to 0 since it doesn't displace
if (fmt.has_displacement) idx += mod_opt.value % 3;
u16 data;
if (fmt.has_s && s_opt.value == 1)
{
data = (sbyte)buf[idx];
bytes_read += 1;
}
else
{
data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx];
bytes_read += w_opt.value == 0 ? 1 : 2;
}
data_opt = some_u16(data);
}
if (fmt.has_displacement && mod_opt.value % 3 > 0)
{
u16 disp = mod_opt.value == MODE_MEM_DIS_16
? (i16)buf[3] << 8 | buf[2]
: (sbyte)buf[2];
displacement_opt = some_u16(disp);
bytes_read += mod_opt.value % 3;
}
else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6)
{
displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]);
bytes_read += 2;
}
return (ParsedInstruction) {
.id = fmt.id,
.name = fmt.name,
.data = data_opt,
.displacement = displacement_opt,
.w = w_opt,
.d = d_opt,
.s = s_opt,
.mod = mod_opt,
.reg = reg_opt,
.rm = rm_opt,
.is_data_addr = is_data_addr,
.bytes_read = bytes_read,
};
}
ParsedInstruction parse_instruction(u8* buf)
{
u8 inst = buf[0];
InstFormat fmt = {0};
bool matched_inst = false;
// TODO: This might be a good time to learn how to make a hashtable in C
for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++)
{
if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc)
{
fmt = inst_formats[i];
matched_inst = true;
break;
}
}
if (!matched_inst)
return (ParsedInstruction){.bytes_read = 0};
u8_opt d_opt = none_u8();
u8_opt s_opt = none_u8();
u8_opt w_opt = none_u8();
u8_opt reg_opt = none_u8();
u8_opt mod_opt = none_u8();
u8_opt rm_opt = none_u8();
u16_opt data_opt = none_u16();
u16_opt displacement_opt = none_u16();
u8 is_data_addr = false;
u16 bytes_read = 1;
bytes_read += fmt.has_operands ? 1 : 0;
if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1);
if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1);
if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6);
if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w));
if (fmt.parse_reg.tag == P_REG_MASK)
{
u8 reg = fmt.has_operands ? buf[1] : buf[0];
reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask));
}
else if (fmt.parse_reg.tag == P_REG_FIXED)
{
reg_opt = some_u8(fmt.parse_reg.fixed);
is_data_addr = true;
}
if (fmt.has_data)
{
u8 idx = 1;
if (fmt.has_operands) idx += 1;
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
// respectively but mod == 3 wraps to 0 since it doesn't displace
if (fmt.has_displacement) idx += mod_opt.value % 3;
u16 data;
if (fmt.has_s && s_opt.value == 1)
{
data = (sbyte)buf[idx];
bytes_read += 1;
}
else
{
data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx];
bytes_read += w_opt.value == 0 ? 1 : 2;
}
data_opt = some_u16(data);
}
if (fmt.has_displacement && mod_opt.value % 3 > 0)
{
u16 disp = mod_opt.value == MODE_MEM_DIS_16
? (i16)buf[3] << 8 | buf[2]
: (sbyte)buf[2];
displacement_opt = some_u16(disp);
bytes_read += mod_opt.value % 3;
}
else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6)
{
displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]);
bytes_read += 2;
}
return (ParsedInstruction) {
.id = fmt.id,
.name = fmt.name,
.data = data_opt,
.displacement = displacement_opt,
.w = w_opt,
.d = d_opt,
.s = s_opt,
.mod = mod_opt,
.reg = reg_opt,
.rm = rm_opt,
.is_data_addr = is_data_addr,
.bytes_read = bytes_read,
};
}
Instruction decode_instruction(ParsedInstruction inst)
{
Operand opr1 , opr2 = {0};
i16 payload = 0;
IF_LET_SOME(u8, mod, inst.mod)
{
IF_LET_SOME(u8, reg, inst.reg)
{
opr1.tag = OPR_T_REGISTER;
opr1.reg.value = registers[(size_t)reg];
opr1.reg.wide = inst.w.value;
}
else
{
opr1.tag = OPR_T_IMMEDIATE;
opr1.imm.value = inst.data.value;
// TODO: This is dumb, we shouldn't do it this way
if (inst.s.value == 1) opr1.imm.direct = 0;
else opr1.imm.direct = inst.w.value + 1;
}
if (mod == MODE_RGSTR_MODE)
{
opr2.tag = OPR_T_REGISTER;
opr2.reg.value = registers[(size_t)inst.rm.value];
opr2.reg.wide = inst.w.value;
}
else if (mod == MODE_MEM_NO_DIS && inst.rm.value == 0x6)
{
opr2.tag = OPR_T_DIRADDR;
opr2.dir_addr.value = inst.displacement.value;
}
else
{
opr2.tag = OPR_T_MEMORY;
opr2.mem.eac_name = get_eac_register(inst.rm.value);
opr2.mem.mode = mod;
opr2.mem.displacement = (i16)inst.displacement.value;
}
}
else
{
IF_LET_SOME(u16, data, inst.data)
{
if (inst.is_data_addr)
{
opr1.tag = OPR_T_DIRADDR;
opr1.dir_addr.value = (i16)data;
}
else
{
opr1.tag = OPR_T_IMMEDIATE;
opr1.imm.value = (i16)data;
opr1.imm.direct = 0;
}
}
IF_LET_SOME(u8, reg, inst.reg)
{
opr2.tag = OPR_T_REGISTER;
opr2.reg.value = registers[(size_t)reg];
opr2.reg.wide = inst.w.value;
}
}
if (inst.d.tag == SOME && inst.d.value == 1)
{
Operand temp = opr1;
opr1 = opr2;
opr2 = temp;
}
return (Instruction) {
.id = inst.id,
.data = payload,
.operation = inst.name,
.src_opr = opr1,
.dst_opr = opr2,
};
}
void get_operand_string(char* str_buf, Operand oprnd)
{
if (oprnd.tag == OPR_T_REGISTER)
{
strcpy(str_buf, reg_name(oprnd.reg.value, oprnd.reg.wide));
}
else if (oprnd.tag == OPR_T_MEMORY)
{
char disp_str[16] = {'\0'};
i16 disp = oprnd.mem.displacement;
if (disp != 0)
sprintf(disp_str, " %s %d", (disp > 0 ? "+" : "-"), abs(disp));
sprintf(str_buf, "[%s%s]", oprnd.mem.eac_name, disp_str);
}
else if (oprnd.tag == OPR_T_IMMEDIATE)
{
char *size = "";
if (oprnd.imm.direct > 0)
size = oprnd.imm.direct == 1 ? "byte " : "word ";
sprintf(str_buf, "%s%d", size, oprnd.imm.value);
}
else if (oprnd.tag == OPR_T_DIRADDR)
{
sprintf(str_buf, "[%d]", oprnd.dir_addr.value);
}
}
void get_instr_string(char* str_buf, Instruction inst)
{
char src_str[32], dst_str[32];
get_operand_string(src_str, inst.src_opr);
get_operand_string(dst_str, inst.dst_opr);
sprintf(str_buf, "%s %s, %s", inst.operation, dst_str, src_str);
}
char *memory[65536];
// Keep this global for debugging purposes
u16 inst_count = 1;
int main(int argc, char** argv)
{
if (argc < 2)
{
printf("Usage: Please provide assembled instructions as input\n");
exit(0);
}
struct stat st;
if (stat(argv[1], &st) == -1)
{
perror("Unable to get file size\n");
return EXIT_FAILURE;
}
unsigned char* buffer = malloc(st.st_size);
if (!buffer)
{
perror("Unable to allocate memory for binary file");
return EXIT_FAILURE;
}
FILE *f = fopen(argv[1], "r");
if (!f)
{
perror("fopen\n");
free(buffer);
return EXIT_FAILURE;
}
size_t bytes_read = fread(buffer, sizeof(unsigned char), st.st_size, f);
if (bytes_read != (size_t)st.st_size)
{
fprintf(stderr, "Read of binary file to memory incomplete.\n");
free(buffer);
fclose(f);
return EXIT_FAILURE;
}
fclose(f);
printf("; Decoded 8086 Assembly Instructions\n\n");
printf("bits 16\n\n");
char *inst_str_buf = malloc(sizeof(char) * 256);
u32 bytes_processed = 0;
while (bytes_processed < bytes_read)
{
ParsedInstruction _ = parse_instruction_ids(buffer + bytes_processed);
(void)_;
ParsedInstruction parsed = parse_instruction(buffer + bytes_processed);
if (parsed.bytes_read > 0)
{
Instruction inst = decode_instruction(parsed);
get_instr_string(inst_str_buf, inst);
bytes_processed += parsed.bytes_read;
// printf("%s %d/%ld", inst_str_buf, bytes_processed, bytes_read);
printf("%s", inst_str_buf);
int len = strlen(inst_str_buf);
for (int i = 0; i < 32 - len; i++)
printf(" ");
printf("; %d, %d", inst_count++, inst.id);
}
else
{
bytes_processed += 1;
fprintf(stderr, "___Unrecognized Instruction___");
}
// char inst = buffer[0];
// if (mov_inst(f, buffer, inst)) goto handled;
// if (add_inst(f, buffer, inst)) goto handled;
// handled:
printf("\n");
}
free(inst_str_buf);
free(buffer);
return 0;
}

223
decode.h
View File

@ -1,223 +0,0 @@
#include "lib.h"
enum Mode
{
MODE_MEM_NO_DIS = 0b00,
MODE_MEM_DIS_08 = 0b01,
MODE_MEM_DIS_16 = 0b10,
MODE_RGSTR_MODE = 0b11,
};
typedef struct Register
{
char* fullname;
char* bytename;
union {
struct {
char low;
char high;
};
u16 full;
} value;
u8 code;
} Register;
Register registers[8] = {
{.code = 0b000, .fullname = "ax", .bytename = "al"},
{.code = 0b001, .fullname = "cx", .bytename = "cl"},
{.code = 0b010, .fullname = "dx", .bytename = "dl"},
{.code = 0b011, .fullname = "bx", .bytename = "bl"},
{.code = 0b100, .fullname = "sp", .bytename = "ah"},
{.code = 0b101, .fullname = "bp", .bytename = "ch"},
{.code = 0b110, .fullname = "si", .bytename = "dh"},
{.code = 0b111, .fullname = "di", .bytename = "bh"},
};
enum OperandType {OPR_T_MEMORY, OPR_T_REGISTER, OPR_T_IMMEDIATE, OPR_T_DIRADDR};
typedef struct Operand
{
enum OperandType tag;
union {
struct Mem {
char *eac_name;
i16 displacement;
u8 mode;
} mem;
struct Reg {
Register value;
bool wide;
} reg;
struct Imm {
i16 value;
u8 direct;
} imm;
struct DirAddr {
i16 value;
} dir_addr;
};
} Operand;
enum ParseRegType { P_REG_NONE, P_REG_MASK, P_REG_FIXED };
typedef struct ParseReg
{
enum ParseRegType tag;
union {
u8 none;
u8 mask;
u8 fixed;
};
} ParseReg;
typedef struct InstFormat
{
u16 id;
char *name;
ParseReg parse_reg;
u8 inst_enc;
u8 mask_inst;
u8 mask_w;
bool has_operands;
bool has_displacement;
bool has_data;
bool has_d;
bool has_w;
bool has_mod;
bool has_rm;
bool has_s;
bool has_SR;
} InstFormat;
typedef struct ParsedInstruction
{
u16 id;
char *name;
u16_opt data;
u16_opt displacement;
u8_opt w;
u8_opt d;
u8_opt s;
u8_opt mod;
u8_opt reg;
u8_opt rm;
u8_opt SR;
u8 is_data_addr;
u8 bytes_read;
} ParsedInstruction;
typedef struct Instruction
{
Operand src_opr;
Operand dst_opr;
i16 data;
char *operation;
u16 id;
} Instruction;
enum InstructionIdentifier
{
_PREFIX_2,
_PREFIX_3,
_PREFIX_6,
_NAME,
_D,
_W,
_S,
_MOD,
_REGISTER,
_ACC,
_RM,
_DISP_LO,
_DISP_HI,
_DATA_W0,
_DATA_W1,
} InstructionIdentifier;
typedef struct ParsedInst
{
u64 progress;
u8 something;
} ParsedInst;
typedef ParsedInst (*inst_parser_f)(ParsedInst);
ParsedInst pre_2(ParsedInst pi) {return pi;}
ParsedInst pre_3(ParsedInst pi) {return pi;}
ParsedInst pre_6(ParsedInst pi) {return pi;}
ParsedInst name(ParsedInst pi) {return pi;}
ParsedInst reg(ParsedInst pi) {return pi;}
ParsedInst w(ParsedInst pi) {return pi;}
ParsedInst d(ParsedInst pi) {return pi;}
ParsedInst s(ParsedInst pi) {return pi;}
ParsedInst mod(ParsedInst pi) {return pi;}
ParsedInst inst(ParsedInst pi) {return pi;}
ParsedInst rm(ParsedInst pi) {return pi;}
ParsedInst disp_lo(ParsedInst pi) {return pi;}
ParsedInst disp_hi(ParsedInst pi) {return pi;}
ParsedInst data_w0(ParsedInst pi) {return pi;}
ParsedInst data_w1(ParsedInst pi) {return pi;}
inst_parser_f inst_funcs[][6][4] =
{
{{pre_2, name, d, w}, {mod, reg, rm}, {disp_lo}, {disp_hi}},
{{pre_6, s, w}, {mod, name, rm}, {disp_lo}, {disp_hi}, {data_w0}, {data_w1}},
{{pre_6, w}, {data_w0}, {data_w1}},
};
enum InstructionIdentifier inst_ids[][6][4] =
{
{{_PREFIX_2, _NAME, _D, _W}, {_MOD, _REGISTER, _RM}, {_DISP_LO}, {_DISP_HI}},
{{_PREFIX_6, _S, _W}, {_MOD, _NAME, _RM}, {_DISP_LO}, {_DISP_HI}, {_DATA_W0}, {_DATA_W1}},
{{_PREFIX_6, _W}, {_DATA_W0}, {_DATA_W1}},
};
typedef struct InstructionParser
{
enum InstructionIdentifier inst_ids[6][4];
} InstructionParser;
// InstructionParser inst_formats[] =
// {
// {{{_PREFIX_2, _NAME, _D, _W}, {_MOD, _REGISTER, _RM}, {_DISP_LO}, {_DISP_HI}}},
// {{{_PREFIX_6, _S, _W}, {_MOD, _NAME, _RM}, {_DISP_LO}, {_DISP_HI}, {_DATA_W0}, {_DATA_W1}}},
// };
InstFormat inst_formats[] =
{
////////
// MOV
////////
// Register/memory to/from register
{.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .mask_w=0x1,
.has_operands=true, .has_displacement=true, .has_d=true, .has_w=true,
.has_mod=true, .has_rm=true, .parse_reg={.tag = P_REG_MASK, .mask=0b00111000} },
// Immediate to register/memory
{.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true,
.has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true},
// Immediate to register
{.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8,
.parse_reg={.tag = P_REG_MASK, .mask=0b00000111}, .has_data=true, .has_w=true},
// Memory to accumulator | Accumulator to memory using the `d` bit
// even though the manual doesn't specify it
{.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1, .has_data=true,
.has_w=true, .has_d=true, .parse_reg={.tag = P_REG_FIXED, .fixed=0x0}},
// Register/memory to segment register and inverse using the `d` bit
{.id=5, .name="mov", .inst_enc=0b10001100, .mask_inst=0x3, .has_SR=true, .has_d=true,
.has_displacement=true, .has_mod=true, .has_rm=true},
////////
// ADD
////////
// Reg/memory with register or either
{.id=6, .name="add", .inst_enc=0b00000000, .mask_inst=0x3, .has_displacement=true,
.mask_w=0x1, .has_operands=true, .has_w=true, .has_d=true, .has_mod=true,
.has_rm=true, .parse_reg={.tag = P_REG_MASK, .mask=0b00111000}},
// Immediate to register/memory
{.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true,
.has_s=true, .has_operands=true, .has_displacement=true,
.has_data=true, .has_mod=true, .has_rm=true},
{.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1,
.has_data=true, .has_w=true, .parse_reg={.tag = P_REG_FIXED, .fixed=0x0}},
};

View File

@ -1,611 +0,0 @@
package decoder_8086
import "core:os"
import "core:fmt"
import "core:math"
import "core:strings"
Register :: struct {
fullname: string,
bytename: string,
value: struct #raw_union {
using _: struct {
low, high: byte,
},
full: u16,
},
code: u8,
}
WordSize :: enum {
None,
LastBit,
FourthBit,
Always8,
Always16,
}
None :: struct {}
Disp8 :: i8
Disp16 :: i16
Displacement :: union {
None,
Disp8,
Disp16
}
RegisterId :: distinct u8
Immediate8 :: distinct i8
Immediate16 :: distinct i16
ImmediateU8 :: distinct u8
MemoryAddr :: struct {
addr_id: u8,
displacement: Displacement,
}
DirectAddress :: distinct i16
SegmentRegister :: distinct i8
Jump :: distinct i8
VariablePort :: struct {}
ShiftRotate :: distinct bool
Repeat :: string
Intersegment :: struct {
ip: i16,
cs: i16,
}
DirectWithinSegment :: distinct u16
Operand :: union {
None,
RegisterId,
Immediate8,
ImmediateU8,
Immediate16,
MemoryAddr,
DirectAddress,
SegmentRegister,
Jump,
VariablePort,
ShiftRotate,
Repeat,
DirectWithinSegment,
Intersegment,
}
OperandInfo :: enum {
None,
Register,
SegmentRegister,
RegisterMemory,
Immediate,
ImmediateUnsigned,
Accumulator,
DirectAddress,
Jump,
VariablePort,
ShiftRotate,
Repeat,
DirectWithinSegment,
Intersegment,
}
RegisterEncodingBits :: enum {
None,
FirstByteLast3,
SecondByteMiddle3,
SecondByteLast3,
FirstByteMiddle3,
}
InstructionInfo :: struct {
mask: u8,
encoding: u8,
opname: OpName,
desc: string,
src: OperandInfo,
dst: OperandInfo,
word_size: WordSize,
reg_info: RegisterEncodingBits,
has_flip: bool,
has_sign_extension: bool,
check_second_encoding: bool,
consume_extra_bytes: int,
shift_rotate_flag: bool,
}
RIGHT_ALIGN_AMOUNT := 35
registers := [8]Register {
{fullname = "ax", bytename = "al", code = 0b000},
{fullname = "cx", bytename = "cl", code = 0b001},
{fullname = "dx", bytename = "dl", code = 0b010},
{fullname = "bx", bytename = "bl", code = 0b011},
{fullname = "sp", bytename = "ah", code = 0b100},
{fullname = "bp", bytename = "ch", code = 0b101},
{fullname = "si", bytename = "dh", code = 0b110},
{fullname = "di", bytename = "bh", code = 0b111},
}
segment_registers := [4]Register {
{fullname = "es", code = 0b000},
{fullname = "cs", code = 0b001},
{fullname = "ss", code = 0b010},
{fullname = "ds", code = 0b011},
}
variable_port := registers[2]
total_bytes_processed := 0
instruction_builder := strings.builder_make()
get_i16 :: proc(data: []u8) -> i16 {
return (i16)(data[1]) << 8 | (i16)(data[0])
}
operand_is :: proc($T: typeid, opr: Operand) -> bool {
_, ok := opr.(T)
return ok
}
calculate_effective_address :: proc(r_m: u8) -> string {
val: string
switch r_m {
case 0b000:
val = "bx + si"
case 0b001:
val = "bx + di"
case 0b010:
val = "bp + si"
case 0b011:
val = "bp + di"
case 0b100:
val = "si"
case 0b101:
val = "di"
case 0b110:
val = "bp"
case 0b111:
val = "bx"
}
return val
}
get_memory_string :: proc(memoryAddr: MemoryAddr, has_segment: Maybe(Register)) -> string {
disp: string
switch value in memoryAddr.displacement {
case None:
disp = ""
case Disp8:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
case Disp16:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
}
seg_string: string
if segreg, ok := has_segment.?; ok {
seg_string = fmt.aprintf("%s:", segreg.fullname)
}
text := fmt.aprintf("%s[%s%s]", seg_string, calculate_effective_address(memoryAddr.addr_id), disp)
return text
}
parse_displacement :: proc(data: []u8) -> (displacement: Displacement, disp_amount: int) {
mod := (data[0] & 0b11000000) >> 6
disp: Displacement = None{}
amount: int
switch mod {
case 1:
disp = (i8)(data[1])
amount = 1
case 2:
disp = get_i16(data[1:])
amount = 2
}
return disp, amount
}
get_displacement_string :: proc(displacement: Displacement) -> string {
disp := ""
#partial switch value in displacement {
case i8:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
case i16:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
}
return disp
}
get_repeat_op :: proc(data: u8) -> Repeat {
bits := (data & 0b1110) >> 1
w := (data & 0b1) == 1 ? "w" : "b"
rep: string
switch bits {
case 0b010: rep = "movs"
case 0b011: rep = "cmps"
case 0b101: rep = "stos"
case 0b110: rep = "lods"
case 0b111: rep = "scas"
}
return Repeat(fmt.aprintf("%s%s", rep, w))
}
try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) {
for inst in instructions {
if inst.encoding == (b & inst.mask) {
return inst, true
}
}
return InstructionInfo{}, false
}
get_opname :: proc(opname: OpName, data: []u8) -> (string, bool) {
name: string
interseg: bool
if opname == .TBD2 {
switch data[1] & 0b00111000 >> 3 {
case 0b000: name = "inc"
case 0b001: name = "dec"
case 0b010: name = "call"
case 0b011: name = "call"; interseg = true
case 0b100: name = "jmp"
case 0b101: name = "jmp"; interseg = true
case 0b110: name = "push"
}
} else if opname == .TBD5 {
switch data[1] & 0b00111000 >> 3 {
case 0b000: name = "test"
case 0b001: name = "dec"
case 0b010: name = "not"
case 0b011: name = "neg"
case 0b100: name = "mul"
case 0b101: name = "imul"
case 0b110: name = "div"
case 0b111: name = "idiv"
}
} else if opname == .TBD6 {
switch data[1] & 0b00111000 >> 3 {
case 0b000: name = "rol"
case 0b001: name = "ror"
case 0b010: name = "rcl"
case 0b011: name = "rcr"
case 0b100: name = "shl"
case 0b101: name = "shr"
case 0b111: name = "sar"
}
} else {
bits: u8
if opname == .TBD1 || opname == .TBD3 {
bits = data[0] & 0b00111000 >> 3
} else {
bits = data[1] & 0b00111000 >> 3
}
switch bits {
case 0b000: name = "add"
case 0b001: name = "or"
case 0b010: name = "adc"
case 0b011: name = "sbb"
case 0b100: name = "and"
case 0b101: name = "sub"
case 0b110: name = "xor"
case 0b111: name = "cmp"
}
}
return name, interseg
}
parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, processed: ^int, word: bool, has_segreg: Maybe(Register)) -> Operand {
operand: Operand = None{}
switch opinfo {
case .None:
case .Register:
// rm: u8 = data[1] & 0b111
// dst_opr = (RegisterId)(registers[rm].code)
reg: u8
// Read the RegisterEncodingBits
switch inst.reg_info {
case .None:
// panic("Register is required but the encoded location is not provided")
case .FirstByteLast3:
reg = data[0] & 0b111
case .FirstByteMiddle3:
reg = (data[0] >> 3) & 0b111
case .SecondByteMiddle3:
reg = (data[1] >> 3) & 0b111
case .SecondByteLast3:
reg = data[1] & 0b111
}
operand = (RegisterId)(registers[reg].code)
case .SegmentRegister:
reg: u8
switch inst.reg_info {
case .None:
// panic("Register is required but the encoded location is not provided")
case .FirstByteLast3:
reg = data[0] & 0b111
case .FirstByteMiddle3:
reg = (data[0] >> 3) & 0b111
case .SecondByteMiddle3:
reg = (data[1] >> 3) & 0b111
case .SecondByteLast3:
reg = data[1] & 0b111
}
operand = (SegmentRegister)(segment_registers[reg].code)
case .RegisterMemory:
mod := data[1] >> 6
rm := data[1] & 0b111
processed^ += 1
op: Operand
if mod == 0 {
if rm == 0b110 {
op = (DirectAddress)(get_i16(data[2:]))
processed^ += 2
} else {
op = MemoryAddr{ addr_id = rm , displacement = None{} }
}
} else if mod == 1 {
op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) }
processed^ += 1
} else if mod == 2 {
op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) }
processed^ += 2
} else if mod == 3 {
op = (RegisterId)(registers[rm].code)
}
operand = op
case .Immediate:
data_idx := processed^
word_signed := word
if inst.has_sign_extension {
word_signed &&= data[0] & 0b0000_0010 == 0
}
operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx]))
processed^ += word_signed ? 2 : 1
case .ImmediateUnsigned:
operand = (ImmediateU8)(data[processed^])
processed^ += 1
case .Accumulator:
operand = (RegisterId)(registers[0].code)
case .DirectAddress:
operand = (DirectAddress)(get_i16(data[1:]))
processed^ += 2
case .Jump:
processed^ += 1
// NOTE: In order to mimic the label offset, you have to take the value you got and add two
operand = (Jump)((i8)(data[1]) + 2)
case .VariablePort:
operand = VariablePort{}
case .ShiftRotate:
v_flag := data[0] & 0b10 != 0
operand = (ShiftRotate)(v_flag)
case .Repeat:
operand = get_repeat_op(data[1])
processed^ += 1
case .DirectWithinSegment:
value := (int)(get_i16(data[1:])) + total_bytes_processed + 3
operand = (DirectWithinSegment)(value)
processed^ += 2
case .Intersegment:
operand = Intersegment {
ip = get_i16(data[1:]),
cs = get_i16(data[3:]),
}
processed^ += 4
}
return operand
}
get_operand_string :: proc(operand: Operand, is_word: bool, has_segment: Maybe(Register)) -> string {
string_val: string
switch val in operand {
case None:
string_val = ""
case RegisterId:
string_val = is_word ? registers[val].fullname : registers[val].bytename
case Immediate8, ImmediateU8, Immediate16, DirectWithinSegment:
string_val = fmt.aprintf("%d", val)
case MemoryAddr:
string_val = get_memory_string(val, has_segment)
case DirectAddress:
seg_string: string
if segreg, ok := has_segment.?; ok {
seg_string = fmt.aprintf("%s:", segreg.fullname)
}
string_val = fmt.aprintf("%s[%d]", seg_string, val)
case SegmentRegister:
string_val = segment_registers[val].fullname
case Jump:
string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val)
case VariablePort:
string_val = variable_port.fullname
case ShiftRotate:
string_val = val ? registers[1].bytename : "1"
case Repeat:
string_val = (string)(val)
case Intersegment:
string_val = fmt.aprintf("%d:%d", val.cs, val.ip)
}
return string_val
}
main :: proc() {
f,err := os.open(os.args[1])
if err != os.ERROR_NONE {
fmt.eprintln("ERROR:", err)
os.exit(1)
}
defer os.close(f)
data := make([]u8, 1024)
bytes_read, err2 := os.read(f, data)
if err2 != nil {
// ...
os.exit(1)
}
if false {
os.exit(0)
}
// asdf :u16 = 0b00000110_11011101
// asdf2 :i16 = (i16)(asdf)
// fmt.printfln("%d", asdf2)
print_at_end := false
idx := 0
line_count := 0
has_lock: bool
has_segment: Maybe(Register)
last_opname: [3]byte
repeating_op_count := 0
instruction_list := make([dynamic]string, 512)
fmt.println("bits 16\n")
for idx < bytes_read {
processed := 1
curr_byte := data[idx]
inst, ok := try_find_instruction(curr_byte)
if !ok {
txt := "unknown instruction"
if print_at_end {
line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
instruction_list[line_count] = line
line_count += 1
} else {
fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
}
idx += 1
continue
}
// Here we check if the instruction affects the next instruction
if inst.opname == .LOCK {
has_lock = true
idx += 1
continue
} else if inst.opname == .SEGMENT {
reg := (curr_byte & 0b11000) >> 3
has_segment = segment_registers[reg]
idx += 1
continue
} else if inst.opname == .AAM {
processed += 1
}
src_opr: Operand
dst_opr: Operand
word: bool
flip: bool
indirect_intersegment: bool
op: Operand
if inst.has_flip {
flip = curr_byte & 2 != 0
}
#partial switch inst.word_size {
case .LastBit: word = curr_byte & 1 == 1
case .FourthBit: word = curr_byte & 0b0000_1000 != 0
case .Always16: word = true
}
opname: string
if inst.check_second_encoding {
opname,indirect_intersegment = get_opname(inst.opname, data[idx:])
// NOTE: This is a special case because it matches the bit pattern of .TBD5,
// but the instruction itself is different
if opname == "test" && (curr_byte & 0xFF) == 0b11110110 {
inst = test_inst
}
} else {
opname = strings.to_lower(fmt.aprintf("%s", inst.opname))
}
dst_opr = parse_operand(inst, inst.dst, data[idx:], &processed, word, has_segment)
src_opr = parse_operand(inst, inst.src, data[idx:], &processed, word, has_segment)
src_is_imm := operand_is(Immediate8, src_opr) || operand_is(Immediate16, src_opr)
dst_is_bracketed := operand_is(MemoryAddr, dst_opr) || operand_is(DirectAddress, dst_opr)
src_is_bracketed := operand_is(MemoryAddr, src_opr) || operand_is(DirectAddress, src_opr)
shiftrot := inst.src == .ShiftRotate
size_string := ""
if ((src_is_imm && dst_is_bracketed) || (dst_is_bracketed && shiftrot)) || (src_is_bracketed && operand_is(None, dst_opr)) {
size_string = word ? "word " : "byte "
}
if flip {
src_opr, dst_opr = dst_opr, src_opr
}
dst_str := get_operand_string(dst_opr, word, has_segment)
src_str := get_operand_string(src_opr, word, has_segment)
full_inst: string
if dst_str == "" {
interseg_string: string
if indirect_intersegment {
interseg_string = " far"
}
full_inst = fmt.aprintf("%s%s %s%s", opname, interseg_string, size_string, src_str)
} else {
// NOTE: I don't know why this is the case, but only the move has the word/byte
// keyword next to the immediate, but other instructions have it on the memory address
if opname == "mov" {
full_inst = fmt.aprintf("%s %s, %s%s", opname, dst_str, size_string, src_str)
} else {
full_inst = fmt.aprintf("%s %s%s, %s", opname, size_string, dst_str, src_str)
}
}
processed += inst.consume_extra_bytes
lock_string: string
if has_lock {
lock_string = "lock "
}
fmt.sbprintf(&instruction_builder, "%s%s %*[2]s", lock_string, full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;")
if has_lock {
fmt.sbprintf(&instruction_builder, " lock")
}
if _,ok := has_segment.?; ok {
fmt.sbprintf(&instruction_builder, " segment")
}
for i in 0..<processed {
fmt.sbprintf(&instruction_builder, " %08b", data[idx + i])
}
op2 := strings.to_string(instruction_builder)
if op2[0:3] != string(last_opname[:]) {
if repeating_op_count > 0 {
fmt.println()
}
repeating_op_count = 0
} else {
repeating_op_count += 1
}
copy(last_opname[:], op2[0:3])
fmt.println(op2)
idx += processed
strings.builder_reset(&instruction_builder)
has_lock = false
has_segment = nil
total_bytes_processed = idx
}
if print_at_end {
for i in 0..<line_count {
opname := instruction_list[i]
if !strings.has_prefix(opname, string(last_opname[:])) {
fmt.println()
}
copy(last_opname[:], opname[0:3])
fmt.println(instruction_list[i])
}
}
}

194
decoding.odin Normal file
View File

@ -0,0 +1,194 @@
package sim_8086
import "core:fmt"
import "core:math"
import "core:strings"
parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, processed: ^int, word: bool, has_segreg: Maybe(Register)) -> Operand {
operand: Operand = None{}
switch opinfo {
case .None:
case .Register:
reg: u8
switch inst.reg_info {
case .None:
panic("Register is required but the encoded location is not provided")
case .FirstByteLast3:
reg = data[0] & 0b111
case .FirstByteMiddle3:
reg = (data[0] >> 3) & 0b111
case .SecondByteMiddle3:
reg = (data[1] >> 3) & 0b111
case .SecondByteLast3:
reg = data[1] & 0b111
}
operand = (RegisterId)(registers[reg].code)
case .SegmentRegister:
reg: u8
switch inst.reg_info {
case .None:
panic("Register is required but the encoded location is not provided")
case .FirstByteLast3:
reg = data[0] & 0b111
case .FirstByteMiddle3:
reg = (data[0] >> 3) & 0b111
case .SecondByteMiddle3:
reg = (data[1] >> 3) & 0b111
case .SecondByteLast3:
reg = data[1] & 0b111
}
operand = (SegmentRegister)(segment_registers[reg].code)
case .RegisterMemory:
mod := data[1] >> 6
rm := data[1] & 0b111
processed^ += 1
op: Operand
if mod == 0 {
if rm == 0b110 {
op = (DirectAddress)(get_i16(data[2:]))
processed^ += 2
} else {
op = MemoryAddr{ addr_id = rm , displacement = None{} }
}
} else if mod == 1 {
op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) }
processed^ += 1
} else if mod == 2 {
op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) }
processed^ += 2
} else if mod == 3 {
op = (RegisterId)(registers[rm].code)
}
operand = op
case .Immediate:
data_idx := processed^
word_signed := word
if inst.has_sign_extension {
word_signed &&= data[0] & 0b0000_0010 == 0
}
operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx]))
processed^ += word_signed ? 2 : 1
case .ImmediateUnsigned:
operand = (ImmediateU8)(data[processed^])
processed^ += 1
case .Accumulator:
operand = (RegisterId)(registers[0].code)
case .DirectAddress:
operand = (DirectAddress)(get_i16(data[1:]))
processed^ += 2
case .Jump:
processed^ += 1
// NOTE: In order to mimic the label offset, you have to take the value you got and add two
operand = (Jump)((i8)(data[1]) + 2)
case .VariablePort:
operand = VariablePort{}
case .ShiftRotate:
v_flag := data[0] & 0b10 != 0
operand = (ShiftRotate)(v_flag)
case .Repeat:
operand = get_repeat_op(data[1])
processed^ += 1
case .DirectWithinSegment:
value := (int)(get_i16(data[1:])) + total_bytes_processed + 3
operand = (DirectWithinSegment)(value)
processed^ += 2
case .Intersegment:
operand = Intersegment {
ip = get_i16(data[1:]),
cs = get_i16(data[3:]),
}
processed^ += 4
}
return operand
}
decode_data :: proc(inst_list: ^[dynamic]Instruction, data: []u8, bytes_to_read: int) {
idx := 0
has_segment: Maybe(Register)
has_lock: bool
for idx < bytes_to_read {
instruction: Instruction
processed := 1
curr_byte := data[idx]
inst, ok := try_find_instruction(curr_byte)
if !ok {
instruction = {
opname = .UNKNOWN,
bytes_read = 1,
raw_data = data[idx:idx+1],
}
append(inst_list, instruction)
idx += 1
continue
}
// Here we check if the instruction affects the next instruction
if inst.opname == .LOCK {
has_lock = true
idx += 1
continue
} else if inst.opname == .SEGMENT {
reg := (curr_byte & 0b11000) >> 3
has_segment = segment_registers[reg]
idx += 1
continue
} else if inst.opname == .AAM {
processed += 1
}
debug_str: string
// NOTE: This is a special case because it matches the bit pattern of .TBD5,
// but the instruction itself is different
if inst.opname == .TBD5 && (data[idx] & 0xFF) == 0b11110110 && (data[idx+1] & 0b00111000) == 0 {
inst = test_inst
}
src_opr: Operand
dst_opr: Operand
word: bool
flip: bool
indirect_intersegment: bool
op: Operand
if inst.has_flip {
flip = curr_byte & 2 != 0
}
#partial switch inst.word_size {
case .LastBit: word = curr_byte & 1 == 1
case .FourthBit: word = curr_byte & 0b0000_1000 != 0
case .Always16: word = true
}
dst_opr = parse_operand(inst, inst.dst, data[idx:], &processed, word, has_segment)
src_opr = parse_operand(inst, inst.src, data[idx:], &processed, word, has_segment)
if flip {
src_opr, dst_opr = dst_opr, src_opr
}
processed += inst.consume_extra_bytes
instruction.opname = inst.opname
instruction.src = src_opr
instruction.dst = dst_opr
instruction.is_word = word
instruction.bytes_read = processed
instruction.raw_data = data[idx:idx+processed]
instruction.debug_msg = debug_str
instruction.info = inst
instruction.has_lock = has_lock
instruction.has_segment = has_segment
// fmt.println(parsed_inst)
append(inst_list, instruction)
idx += processed
has_lock = false
has_segment = nil
total_bytes_processed = idx
}
}

View File

@ -1,6 +1,7 @@
package decoder_8086
package sim_8086
OpName :: enum {
Op :: enum {
UNKNOWN,
TBD1,
TBD2,
TBD3,
@ -100,8 +101,12 @@ OpName :: enum {
// isn't that great; we return a string with the instruction name, but ideally we have all
// the instructions accounted for, because eventually we will need the final parsed
// instruction to contain all the information related to it
// test_inst := InstructionInfo {
// opname = .NOT, desc = "", mask = 0b11111110, encoding = 0b11110110,
// dst = .RegisterMemory, src = .Immediate, word_size = .LastBit
// }
test_inst := InstructionInfo {
opname = .NOT, desc = "", mask = 0b11111110, encoding = 0b11110110,
opname = .TEST, desc = "", mask = 0b11111110, encoding = 0b11110110,
dst = .RegisterMemory, src = .Immediate, word_size = .LastBit
}

53
lib.h
View File

@ -1,53 +0,0 @@
#pragma once
#include <stdint.h>
#include <stddef.h>
typedef uint8_t u8;
typedef uint16_t u16;
typedef int16_t i16;
typedef int32_t i32;
typedef uint32_t u32;
typedef uint64_t u64;
typedef float f32;
typedef double f64;
typedef uintptr_t uptr;
typedef char sbyte;
typedef ptrdiff_t size;
typedef size_t usize;
enum OptionTag {NONE, SOME};
#define OPTION(type) \
typedef struct \
{ \
enum OptionTag tag; \
union { \
char none; \
type value; \
}; \
} type##_opt; \
\
static inline type##_opt none_##type(void) \
{ \
return (type##_opt){ .tag = NONE, .none = 0 }; \
} \
\
static inline type##_opt some_##type(type value) \
{ \
return (type##_opt){ .tag = SOME, .value = value }; \
} \
\
static inline int get_some_##type(type##_opt opt, type* out_value) \
{ \
if (opt.tag != SOME) return 0; \
*out_value = opt.value; \
return 1; \
}
#define IF_LET_SOME(type, var, opt) \
type var; \
if (get_some_##type(opt, &var))
OPTION(u8)
OPTION(u16)

242
printing.odin Normal file
View File

@ -0,0 +1,242 @@
package sim_8086
import "core:fmt"
import "core:math"
import "core:strings"
instruction_builder := strings.builder_make()
calculate_effective_address :: proc(r_m: u8) -> string {
val: string
switch r_m {
case 0b000:
val = "bx + si"
case 0b001:
val = "bx + di"
case 0b010:
val = "bp + si"
case 0b011:
val = "bp + di"
case 0b100:
val = "si"
case 0b101:
val = "di"
case 0b110:
val = "bp"
case 0b111:
val = "bx"
}
return val
}
get_memory_string :: proc(memoryAddr: MemoryAddr, has_segment: Maybe(Register)) -> string {
disp: string
switch value in memoryAddr.displacement {
case None:
disp = ""
case Disp8:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
case Disp16:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
}
seg_string: string
if segreg, ok := has_segment.?; ok {
seg_string = fmt.aprintf("%s:", segreg.fullname)
}
text := fmt.aprintf("%s[%s%s]", seg_string, calculate_effective_address(memoryAddr.addr_id), disp)
return text
}
get_displacement_string :: proc(displacement: Displacement) -> string {
disp := ""
#partial switch value in displacement {
case i8:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
case i16:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
}
return disp
}
get_opname :: proc(inst: Instruction) -> (string, bool) {
name: string
interseg: bool
if inst.opname == .TBD2 {
switch inst.raw_data[1] & 0b00111000 >> 3 {
case 0b000: name = "inc"
case 0b001: name = "dec"
case 0b010: name = "call"
// TODO: We really have to fix this because we shouldn't be figuring out if this
// is an intersegment here
case 0b011: name = "call"; interseg = true
case 0b100: name = "jmp"
case 0b101: name = "jmp"; interseg = true
case 0b110: name = "push"
}
} else if inst.opname == .TBD5 {
switch inst.raw_data[1] & 0b00111000 >> 3 {
case 0b000: name = "test"
case 0b001: name = "dec"
case 0b010: name = "not"
case 0b011: name = "neg"
case 0b100: name = "mul"
case 0b101: name = "imul"
case 0b110: name = "div"
case 0b111: name = "idiv"
}
} else if inst.opname == .TBD6 {
switch inst.raw_data[1] & 0b00111000 >> 3 {
case 0b000: name = "rol"
case 0b001: name = "ror"
case 0b010: name = "rcl"
case 0b011: name = "rcr"
case 0b100: name = "shl"
case 0b101: name = "shr"
case 0b111: name = "sar"
}
} else {
bits: u8
if inst.opname == .TBD1 || inst.opname == .TBD3 {
bits = inst.raw_data[0] & 0b00111000 >> 3
} else {
bits = inst.raw_data[1] & 0b00111000 >> 3
}
switch bits {
case 0b000: name = "add"
case 0b001: name = "or"
case 0b010: name = "adc"
case 0b011: name = "sbb"
case 0b100: name = "and"
case 0b101: name = "sub"
case 0b110: name = "xor"
case 0b111: name = "cmp"
}
}
return name, interseg
}
get_operand_string :: proc(operand: Operand, is_word: bool, has_segment: Maybe(Register)) -> string {
string_val: string
switch val in operand {
case None:
string_val = ""
case RegisterId:
string_val = is_word ? registers[val].fullname : registers[val].bytename
case Immediate8, ImmediateU8, Immediate16, DirectWithinSegment:
string_val = fmt.aprintf("%d", val)
case MemoryAddr:
string_val = get_memory_string(val, has_segment)
case DirectAddress:
seg_string: string
if segreg, ok := has_segment.?; ok {
seg_string = fmt.aprintf("%s:", segreg.fullname)
}
string_val = fmt.aprintf("%s[%d]", seg_string, val)
case SegmentRegister:
string_val = segment_registers[val].fullname
case Jump:
string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val)
case VariablePort:
string_val = variable_port.fullname
case ShiftRotate:
string_val = val ? registers[1].bytename : "1"
case Repeat:
string_val = (string)(val)
case Intersegment:
string_val = fmt.aprintf("%d:%d", val.cs, val.ip)
}
return string_val
}
get_unknown_inst_string :: proc(inst: Instruction) -> string {
print_at_end := false
txt := "unknown instruction"
line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", inst.raw_data[0])
return line
}
get_instruction_string :: proc(inst_info: InstructionInfo, instruction: Instruction) {
inst := instruction
src_is_imm := operand_is(Immediate8, inst.src) || operand_is(Immediate16, inst.src)
dst_is_bracketed := operand_is(MemoryAddr, inst.dst) || operand_is(DirectAddress, inst.dst)
src_is_bracketed := operand_is(MemoryAddr, inst.src) || operand_is(DirectAddress, inst.src)
shiftrot := operand_is(ShiftRotate, inst.src)
size_string := ""
if ((src_is_imm && dst_is_bracketed) || (dst_is_bracketed && shiftrot)) || (src_is_bracketed && operand_is(None, inst.dst)) {
size_string = inst.is_word ? "word " : "byte "
}
if inst.has_lock {
fmt.sbprint(&instruction_builder, "lock ")
}
dst_str := get_operand_string(inst.dst, inst.is_word, inst.has_segment)
src_str := get_operand_string(inst.src, inst.is_word, inst.has_segment)
opname: string
is_interseg: bool
if inst_info.check_second_encoding {
opname,is_interseg = get_opname(inst)
} else {
// TODO: Do the RTTI thing here with reflection
opname = strings.to_lower(fmt.aprintf("%s", inst.opname))
}
if dst_str == "" {
interseg_string: string
if is_interseg {
interseg_string = " far"
}
fmt.sbprintf(&instruction_builder, "%s%s %s%s", opname, interseg_string, size_string, src_str)
} else {
// note: i don't know why this is the case, but only the move has the word/byte
// keyword next to the immediate, but other instructions have it on the memory address
if opname == "mov" {
fmt.sbprintf(&instruction_builder, "%s %s, %s%s", opname, dst_str, size_string, src_str)
} else {
fmt.sbprintf(&instruction_builder, "%s %s%s, %s", opname, size_string, dst_str, src_str)
}
}
// Prepare padding and comment to add debug info
b_len := strings.builder_len(instruction_builder)
fmt.sbprintf(&instruction_builder, "%*[0]s", RIGHT_ALIGN_AMOUNT - b_len, ";;")
if inst.has_lock {
fmt.sbprintf(&instruction_builder, " lock")
}
if _,ok := inst.has_segment.?; ok {
fmt.sbprintf(&instruction_builder, " segment")
}
for i in 0..<inst.bytes_read {
fmt.sbprintf(&instruction_builder, " %08b", inst.raw_data[i])
}
}
print_instructions_stdout :: proc(instructions: []Instruction) {
last_opname: [3]byte
repeating_op_count := 0
fmt.println("bits 16\n")
for inst in instructions {
strings.builder_reset(&instruction_builder)
get_instruction_string(inst.info, inst)
op2 := strings.to_string(instruction_builder)
if op2[0:3] != string(last_opname[:]) {
if repeating_op_count > 0 {
fmt.println()
}
repeating_op_count = 0
} else {
repeating_op_count += 1
}
copy(last_opname[:], op2[0:3])
fmt.println(op2)
}
}

98
sim8086.odin Normal file
View File

@ -0,0 +1,98 @@
package sim_8086
import "core:os"
import "core:fmt"
import "core:math"
import "core:strings"
RIGHT_ALIGN_AMOUNT := 35
registers := [8]Register {
{fullname = "ax", bytename = "al", code = 0b000},
{fullname = "cx", bytename = "cl", code = 0b001},
{fullname = "dx", bytename = "dl", code = 0b010},
{fullname = "bx", bytename = "bl", code = 0b011},
{fullname = "sp", bytename = "ah", code = 0b100},
{fullname = "bp", bytename = "ch", code = 0b101},
{fullname = "si", bytename = "dh", code = 0b110},
{fullname = "di", bytename = "bh", code = 0b111},
}
segment_registers := [4]Register {
{fullname = "es", code = 0b000},
{fullname = "cs", code = 0b001},
{fullname = "ss", code = 0b010},
{fullname = "ds", code = 0b011},
}
variable_port := registers[2]
total_bytes_processed := 0
get_i16 :: proc(data: []u8) -> i16 {
return (i16)(data[1]) << 8 | (i16)(data[0])
}
operand_is :: proc($T: typeid, opr: Operand) -> bool {
_, ok := opr.(T)
return ok
}
get_repeat_op :: proc(data: u8) -> Repeat {
bits := (data & 0b1110) >> 1
w := (data & 0b1) == 1 ? "w" : "b"
rep: string
switch bits {
case 0b010: rep = "movs"
case 0b011: rep = "cmps"
case 0b101: rep = "stos"
case 0b110: rep = "lods"
case 0b111: rep = "scas"
}
return Repeat(fmt.aprintf("%s%s", rep, w))
}
try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) {
for inst in instructions {
if inst.encoding == (b & inst.mask) {
return inst, true
}
}
return InstructionInfo{}, false
}
main :: proc() {
f,err := os.open(os.args[1])
if err != os.ERROR_NONE {
fmt.eprintln("ERROR:", err)
os.exit(1)
}
defer os.close(f)
data := make([]u8, 1024)
bytes_read, err2 := os.read(f, data)
if err2 != nil {
// ...
os.exit(1)
}
if false {
os.exit(0)
}
// asdf :u16 = 0b00000110_11011101
// asdf2 :i16 = (i16)(asdf)
// fmt.printfln("%d", asdf2)
print_at_end := false
line_count := 0
instruction_list := make([dynamic]string, 0, 512)
instructions_list := make([dynamic]Instruction, 0, 512)
decode_data(&instructions_list, data[:], bytes_read)
// for inst in instructions_list {
// fmt.println(inst)
// }
if true {
print_instructions_stdout(instructions_list[:])
}
}

View File

@ -6,9 +6,14 @@ NC='\033[0m'
make asm_files > /dev/null
if [ ! "$(command -v ./sim8086)" ]; then
echo -e "\nError: 'sim8086' executable not found"
exit 1
fi
for ASM_BIN in asm_files/*.bin;
do
./decoder8086 "$ASM_BIN" > output.asm 2> /dev/null
./sim8086 "$ASM_BIN" > output.asm 2> /dev/null
nasm output.asm -o output.bin 2> /dev/null
ASM_FILE=${ASM_BIN%.*}.asm
if [ ! -e output.bin ]; then

123
types.odin Normal file
View File

@ -0,0 +1,123 @@
package sim_8086
Register :: struct {
fullname: string,
bytename: string,
value: struct #raw_union {
using _: struct {
low, high: byte,
},
full: u16,
},
code: u8,
}
WordSize :: enum {
None,
LastBit,
FourthBit,
Always8,
Always16,
}
None :: struct {}
Disp8 :: i8
Disp16 :: i16
Displacement :: union {
None,
Disp8,
Disp16
}
RegisterId :: distinct u8
Immediate8 :: distinct i8
Immediate16 :: distinct i16
ImmediateU8 :: distinct u8
MemoryAddr :: struct {
addr_id: u8,
displacement: Displacement,
}
DirectAddress :: distinct i16
SegmentRegister :: distinct i8
Jump :: distinct i8
VariablePort :: struct {}
ShiftRotate :: distinct bool
Repeat :: string
Intersegment :: struct {
ip: i16,
cs: i16,
}
DirectWithinSegment :: distinct u16
Operand :: union {
None,
RegisterId,
Immediate8,
ImmediateU8,
Immediate16,
MemoryAddr,
DirectAddress,
SegmentRegister,
Jump,
VariablePort,
ShiftRotate,
Repeat,
DirectWithinSegment,
Intersegment,
}
OperandInfo :: enum {
None,
Register,
SegmentRegister,
RegisterMemory,
Immediate,
ImmediateUnsigned,
Accumulator,
DirectAddress,
Jump,
VariablePort,
ShiftRotate,
Repeat,
DirectWithinSegment,
Intersegment,
}
RegisterEncodingBits :: enum {
None,
FirstByteLast3,
SecondByteMiddle3,
SecondByteLast3,
FirstByteMiddle3,
}
InstructionInfo :: struct {
mask: u8,
encoding: u8,
opname: Op,
desc: string,
src: OperandInfo,
dst: OperandInfo,
word_size: WordSize,
reg_info: RegisterEncodingBits,
has_flip: bool,
has_sign_extension: bool,
check_second_encoding: bool,
consume_extra_bytes: int,
shift_rotate_flag: bool,
}
Instruction :: struct {
opname: Op,
src: Operand,
dst: Operand,
info: InstructionInfo,
is_word: bool,
indirect_intersegment: bool,
has_segment: Maybe(Register),
has_lock: bool,
bytes_read: int,
raw_data: []u8,
debug_msg: string,
}