Break code up into files, delete C code, organize things better
This commit is contained in:
parent
6909f75b35
commit
32ddb518e9
1
.gitignore
vendored
1
.gitignore
vendored
@ -4,3 +4,4 @@
|
||||
/8086_family_Users_Manual_1_.pdf
|
||||
/decoder8086
|
||||
/performance-aware
|
||||
/sim8086
|
||||
|
431
decode.c
431
decode.c
@ -1,431 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "lib.h"
|
||||
#include "decode.h"
|
||||
|
||||
/// Get Effective Address Calculation Registers
|
||||
char* get_eac_register(char rm)
|
||||
{
|
||||
char* reg_name;
|
||||
switch (rm)
|
||||
{
|
||||
case 0b000: reg_name = "bx + si"; break;
|
||||
case 0b001: reg_name = "bx + di"; break;
|
||||
case 0b010: reg_name = "bp + si"; break;
|
||||
case 0b011: reg_name = "bp + di"; break;
|
||||
case 0b100: reg_name = "si"; break;
|
||||
case 0b101: reg_name = "di"; break;
|
||||
case 0b110: reg_name = "bp"; break;
|
||||
case 0b111: reg_name = "bx"; break;
|
||||
default: perror("Invalid R/M value"); exit(1);
|
||||
}
|
||||
return reg_name;
|
||||
}
|
||||
|
||||
static char* reg_name(Register reg, char wide)
|
||||
{
|
||||
return wide == 1 ? reg.fullname : reg.bytename;
|
||||
}
|
||||
|
||||
static u8 mask_and_shift(u8 value, u8 mask)
|
||||
{
|
||||
value &= mask;
|
||||
int count = 0;
|
||||
while ((mask & 0x1) == 0 && count < 8)
|
||||
{
|
||||
value >>= 1;
|
||||
mask >>= 1;
|
||||
count++;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
ParsedInstruction parse_instruction_ids(u8* buf)
|
||||
{
|
||||
u8 inst = buf[0];
|
||||
InstFormat fmt = {0};
|
||||
bool matched_inst = false;
|
||||
// TODO: This might be a good time to learn how to make a hashtable in C
|
||||
for (u16 i = 0; i < sizeof(inst_funcs) / (sizeof(inst_parser_f)*6*4); i++)
|
||||
for (int j = 0; j < 6; j++)
|
||||
for (int k = 0; k < 4 && inst_funcs[i][j][k] != NULL; k++)
|
||||
{
|
||||
printf("%p\n", (void*)(uptr)inst_funcs[i][j][k]);
|
||||
// Apply inst_func_t
|
||||
}
|
||||
// for (int j = 0; j < 4 || ;)
|
||||
return (ParsedInstruction){0};
|
||||
if (!matched_inst)
|
||||
return (ParsedInstruction){.bytes_read = 0};
|
||||
u8_opt d_opt = none_u8();
|
||||
u8_opt s_opt = none_u8();
|
||||
u8_opt w_opt = none_u8();
|
||||
u8_opt reg_opt = none_u8();
|
||||
u8_opt mod_opt = none_u8();
|
||||
u8_opt rm_opt = none_u8();
|
||||
u16_opt data_opt = none_u16();
|
||||
u16_opt displacement_opt = none_u16();
|
||||
u8 is_data_addr = false;
|
||||
|
||||
u16 bytes_read = 1;
|
||||
bytes_read += fmt.has_operands ? 1 : 0;
|
||||
|
||||
if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1);
|
||||
if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1);
|
||||
if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
|
||||
if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6);
|
||||
if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w));
|
||||
if (fmt.parse_reg.tag == P_REG_MASK)
|
||||
{
|
||||
u8 reg = fmt.has_operands ? buf[1] : buf[0];
|
||||
reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask));
|
||||
}
|
||||
else if (fmt.parse_reg.tag == P_REG_FIXED)
|
||||
{
|
||||
reg_opt = some_u8(fmt.parse_reg.fixed);
|
||||
is_data_addr = true;
|
||||
}
|
||||
if (fmt.has_data)
|
||||
{
|
||||
u8 idx = 1;
|
||||
if (fmt.has_operands) idx += 1;
|
||||
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
|
||||
// respectively but mod == 3 wraps to 0 since it doesn't displace
|
||||
if (fmt.has_displacement) idx += mod_opt.value % 3;
|
||||
u16 data;
|
||||
if (fmt.has_s && s_opt.value == 1)
|
||||
{
|
||||
data = (sbyte)buf[idx];
|
||||
bytes_read += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx];
|
||||
bytes_read += w_opt.value == 0 ? 1 : 2;
|
||||
}
|
||||
data_opt = some_u16(data);
|
||||
}
|
||||
if (fmt.has_displacement && mod_opt.value % 3 > 0)
|
||||
{
|
||||
u16 disp = mod_opt.value == MODE_MEM_DIS_16
|
||||
? (i16)buf[3] << 8 | buf[2]
|
||||
: (sbyte)buf[2];
|
||||
displacement_opt = some_u16(disp);
|
||||
bytes_read += mod_opt.value % 3;
|
||||
}
|
||||
else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6)
|
||||
{
|
||||
displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]);
|
||||
bytes_read += 2;
|
||||
}
|
||||
|
||||
return (ParsedInstruction) {
|
||||
.id = fmt.id,
|
||||
.name = fmt.name,
|
||||
.data = data_opt,
|
||||
.displacement = displacement_opt,
|
||||
.w = w_opt,
|
||||
.d = d_opt,
|
||||
.s = s_opt,
|
||||
.mod = mod_opt,
|
||||
.reg = reg_opt,
|
||||
.rm = rm_opt,
|
||||
.is_data_addr = is_data_addr,
|
||||
.bytes_read = bytes_read,
|
||||
};
|
||||
}
|
||||
ParsedInstruction parse_instruction(u8* buf)
|
||||
{
|
||||
u8 inst = buf[0];
|
||||
InstFormat fmt = {0};
|
||||
bool matched_inst = false;
|
||||
// TODO: This might be a good time to learn how to make a hashtable in C
|
||||
for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++)
|
||||
{
|
||||
if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc)
|
||||
{
|
||||
fmt = inst_formats[i];
|
||||
matched_inst = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!matched_inst)
|
||||
return (ParsedInstruction){.bytes_read = 0};
|
||||
u8_opt d_opt = none_u8();
|
||||
u8_opt s_opt = none_u8();
|
||||
u8_opt w_opt = none_u8();
|
||||
u8_opt reg_opt = none_u8();
|
||||
u8_opt mod_opt = none_u8();
|
||||
u8_opt rm_opt = none_u8();
|
||||
u16_opt data_opt = none_u16();
|
||||
u16_opt displacement_opt = none_u16();
|
||||
u8 is_data_addr = false;
|
||||
|
||||
u16 bytes_read = 1;
|
||||
bytes_read += fmt.has_operands ? 1 : 0;
|
||||
|
||||
if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1);
|
||||
if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1);
|
||||
if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
|
||||
if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6);
|
||||
if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w));
|
||||
if (fmt.parse_reg.tag == P_REG_MASK)
|
||||
{
|
||||
u8 reg = fmt.has_operands ? buf[1] : buf[0];
|
||||
reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask));
|
||||
}
|
||||
else if (fmt.parse_reg.tag == P_REG_FIXED)
|
||||
{
|
||||
reg_opt = some_u8(fmt.parse_reg.fixed);
|
||||
is_data_addr = true;
|
||||
}
|
||||
if (fmt.has_data)
|
||||
{
|
||||
u8 idx = 1;
|
||||
if (fmt.has_operands) idx += 1;
|
||||
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
|
||||
// respectively but mod == 3 wraps to 0 since it doesn't displace
|
||||
if (fmt.has_displacement) idx += mod_opt.value % 3;
|
||||
u16 data;
|
||||
if (fmt.has_s && s_opt.value == 1)
|
||||
{
|
||||
data = (sbyte)buf[idx];
|
||||
bytes_read += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx];
|
||||
bytes_read += w_opt.value == 0 ? 1 : 2;
|
||||
}
|
||||
data_opt = some_u16(data);
|
||||
}
|
||||
if (fmt.has_displacement && mod_opt.value % 3 > 0)
|
||||
{
|
||||
u16 disp = mod_opt.value == MODE_MEM_DIS_16
|
||||
? (i16)buf[3] << 8 | buf[2]
|
||||
: (sbyte)buf[2];
|
||||
displacement_opt = some_u16(disp);
|
||||
bytes_read += mod_opt.value % 3;
|
||||
}
|
||||
else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6)
|
||||
{
|
||||
displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]);
|
||||
bytes_read += 2;
|
||||
}
|
||||
|
||||
return (ParsedInstruction) {
|
||||
.id = fmt.id,
|
||||
.name = fmt.name,
|
||||
.data = data_opt,
|
||||
.displacement = displacement_opt,
|
||||
.w = w_opt,
|
||||
.d = d_opt,
|
||||
.s = s_opt,
|
||||
.mod = mod_opt,
|
||||
.reg = reg_opt,
|
||||
.rm = rm_opt,
|
||||
.is_data_addr = is_data_addr,
|
||||
.bytes_read = bytes_read,
|
||||
};
|
||||
}
|
||||
|
||||
Instruction decode_instruction(ParsedInstruction inst)
|
||||
{
|
||||
Operand opr1 , opr2 = {0};
|
||||
i16 payload = 0;
|
||||
|
||||
IF_LET_SOME(u8, mod, inst.mod)
|
||||
{
|
||||
IF_LET_SOME(u8, reg, inst.reg)
|
||||
{
|
||||
opr1.tag = OPR_T_REGISTER;
|
||||
opr1.reg.value = registers[(size_t)reg];
|
||||
opr1.reg.wide = inst.w.value;
|
||||
}
|
||||
else
|
||||
{
|
||||
opr1.tag = OPR_T_IMMEDIATE;
|
||||
opr1.imm.value = inst.data.value;
|
||||
// TODO: This is dumb, we shouldn't do it this way
|
||||
if (inst.s.value == 1) opr1.imm.direct = 0;
|
||||
else opr1.imm.direct = inst.w.value + 1;
|
||||
}
|
||||
if (mod == MODE_RGSTR_MODE)
|
||||
{
|
||||
opr2.tag = OPR_T_REGISTER;
|
||||
opr2.reg.value = registers[(size_t)inst.rm.value];
|
||||
opr2.reg.wide = inst.w.value;
|
||||
}
|
||||
else if (mod == MODE_MEM_NO_DIS && inst.rm.value == 0x6)
|
||||
{
|
||||
opr2.tag = OPR_T_DIRADDR;
|
||||
opr2.dir_addr.value = inst.displacement.value;
|
||||
}
|
||||
else
|
||||
{
|
||||
opr2.tag = OPR_T_MEMORY;
|
||||
opr2.mem.eac_name = get_eac_register(inst.rm.value);
|
||||
opr2.mem.mode = mod;
|
||||
opr2.mem.displacement = (i16)inst.displacement.value;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
IF_LET_SOME(u16, data, inst.data)
|
||||
{
|
||||
if (inst.is_data_addr)
|
||||
{
|
||||
opr1.tag = OPR_T_DIRADDR;
|
||||
opr1.dir_addr.value = (i16)data;
|
||||
}
|
||||
else
|
||||
{
|
||||
opr1.tag = OPR_T_IMMEDIATE;
|
||||
opr1.imm.value = (i16)data;
|
||||
opr1.imm.direct = 0;
|
||||
}
|
||||
}
|
||||
IF_LET_SOME(u8, reg, inst.reg)
|
||||
{
|
||||
opr2.tag = OPR_T_REGISTER;
|
||||
opr2.reg.value = registers[(size_t)reg];
|
||||
opr2.reg.wide = inst.w.value;
|
||||
}
|
||||
}
|
||||
if (inst.d.tag == SOME && inst.d.value == 1)
|
||||
{
|
||||
Operand temp = opr1;
|
||||
opr1 = opr2;
|
||||
opr2 = temp;
|
||||
}
|
||||
return (Instruction) {
|
||||
.id = inst.id,
|
||||
.data = payload,
|
||||
.operation = inst.name,
|
||||
.src_opr = opr1,
|
||||
.dst_opr = opr2,
|
||||
};
|
||||
}
|
||||
|
||||
void get_operand_string(char* str_buf, Operand oprnd)
|
||||
{
|
||||
if (oprnd.tag == OPR_T_REGISTER)
|
||||
{
|
||||
strcpy(str_buf, reg_name(oprnd.reg.value, oprnd.reg.wide));
|
||||
}
|
||||
else if (oprnd.tag == OPR_T_MEMORY)
|
||||
{
|
||||
char disp_str[16] = {'\0'};
|
||||
i16 disp = oprnd.mem.displacement;
|
||||
if (disp != 0)
|
||||
sprintf(disp_str, " %s %d", (disp > 0 ? "+" : "-"), abs(disp));
|
||||
sprintf(str_buf, "[%s%s]", oprnd.mem.eac_name, disp_str);
|
||||
}
|
||||
else if (oprnd.tag == OPR_T_IMMEDIATE)
|
||||
{
|
||||
char *size = "";
|
||||
if (oprnd.imm.direct > 0)
|
||||
size = oprnd.imm.direct == 1 ? "byte " : "word ";
|
||||
sprintf(str_buf, "%s%d", size, oprnd.imm.value);
|
||||
}
|
||||
else if (oprnd.tag == OPR_T_DIRADDR)
|
||||
{
|
||||
sprintf(str_buf, "[%d]", oprnd.dir_addr.value);
|
||||
}
|
||||
}
|
||||
|
||||
void get_instr_string(char* str_buf, Instruction inst)
|
||||
{
|
||||
char src_str[32], dst_str[32];
|
||||
get_operand_string(src_str, inst.src_opr);
|
||||
get_operand_string(dst_str, inst.dst_opr);
|
||||
sprintf(str_buf, "%s %s, %s", inst.operation, dst_str, src_str);
|
||||
}
|
||||
|
||||
char *memory[65536];
|
||||
// Keep this global for debugging purposes
|
||||
u16 inst_count = 1;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc < 2)
|
||||
{
|
||||
printf("Usage: Please provide assembled instructions as input\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
struct stat st;
|
||||
if (stat(argv[1], &st) == -1)
|
||||
{
|
||||
perror("Unable to get file size\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
unsigned char* buffer = malloc(st.st_size);
|
||||
if (!buffer)
|
||||
{
|
||||
perror("Unable to allocate memory for binary file");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
FILE *f = fopen(argv[1], "r");
|
||||
if (!f)
|
||||
{
|
||||
perror("fopen\n");
|
||||
free(buffer);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
size_t bytes_read = fread(buffer, sizeof(unsigned char), st.st_size, f);
|
||||
if (bytes_read != (size_t)st.st_size)
|
||||
{
|
||||
fprintf(stderr, "Read of binary file to memory incomplete.\n");
|
||||
free(buffer);
|
||||
fclose(f);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
|
||||
printf("; Decoded 8086 Assembly Instructions\n\n");
|
||||
printf("bits 16\n\n");
|
||||
|
||||
char *inst_str_buf = malloc(sizeof(char) * 256);
|
||||
u32 bytes_processed = 0;
|
||||
while (bytes_processed < bytes_read)
|
||||
{
|
||||
ParsedInstruction _ = parse_instruction_ids(buffer + bytes_processed);
|
||||
(void)_;
|
||||
ParsedInstruction parsed = parse_instruction(buffer + bytes_processed);
|
||||
|
||||
if (parsed.bytes_read > 0)
|
||||
{
|
||||
Instruction inst = decode_instruction(parsed);
|
||||
get_instr_string(inst_str_buf, inst);
|
||||
bytes_processed += parsed.bytes_read;
|
||||
// printf("%s %d/%ld", inst_str_buf, bytes_processed, bytes_read);
|
||||
printf("%s", inst_str_buf);
|
||||
int len = strlen(inst_str_buf);
|
||||
for (int i = 0; i < 32 - len; i++)
|
||||
printf(" ");
|
||||
printf("; %d, %d", inst_count++, inst.id);
|
||||
}
|
||||
else
|
||||
{
|
||||
bytes_processed += 1;
|
||||
fprintf(stderr, "___Unrecognized Instruction___");
|
||||
}
|
||||
// char inst = buffer[0];
|
||||
// if (mov_inst(f, buffer, inst)) goto handled;
|
||||
// if (add_inst(f, buffer, inst)) goto handled;
|
||||
// handled:
|
||||
printf("\n");
|
||||
}
|
||||
free(inst_str_buf);
|
||||
free(buffer);
|
||||
|
||||
return 0;
|
||||
}
|
223
decode.h
223
decode.h
@ -1,223 +0,0 @@
|
||||
#include "lib.h"
|
||||
|
||||
enum Mode
|
||||
{
|
||||
MODE_MEM_NO_DIS = 0b00,
|
||||
MODE_MEM_DIS_08 = 0b01,
|
||||
MODE_MEM_DIS_16 = 0b10,
|
||||
MODE_RGSTR_MODE = 0b11,
|
||||
};
|
||||
|
||||
typedef struct Register
|
||||
{
|
||||
char* fullname;
|
||||
char* bytename;
|
||||
union {
|
||||
struct {
|
||||
char low;
|
||||
char high;
|
||||
};
|
||||
u16 full;
|
||||
} value;
|
||||
u8 code;
|
||||
} Register;
|
||||
|
||||
Register registers[8] = {
|
||||
{.code = 0b000, .fullname = "ax", .bytename = "al"},
|
||||
{.code = 0b001, .fullname = "cx", .bytename = "cl"},
|
||||
{.code = 0b010, .fullname = "dx", .bytename = "dl"},
|
||||
{.code = 0b011, .fullname = "bx", .bytename = "bl"},
|
||||
{.code = 0b100, .fullname = "sp", .bytename = "ah"},
|
||||
{.code = 0b101, .fullname = "bp", .bytename = "ch"},
|
||||
{.code = 0b110, .fullname = "si", .bytename = "dh"},
|
||||
{.code = 0b111, .fullname = "di", .bytename = "bh"},
|
||||
};
|
||||
|
||||
enum OperandType {OPR_T_MEMORY, OPR_T_REGISTER, OPR_T_IMMEDIATE, OPR_T_DIRADDR};
|
||||
|
||||
typedef struct Operand
|
||||
{
|
||||
enum OperandType tag;
|
||||
union {
|
||||
struct Mem {
|
||||
char *eac_name;
|
||||
i16 displacement;
|
||||
u8 mode;
|
||||
} mem;
|
||||
struct Reg {
|
||||
Register value;
|
||||
bool wide;
|
||||
} reg;
|
||||
struct Imm {
|
||||
i16 value;
|
||||
u8 direct;
|
||||
} imm;
|
||||
struct DirAddr {
|
||||
i16 value;
|
||||
} dir_addr;
|
||||
};
|
||||
} Operand;
|
||||
|
||||
enum ParseRegType { P_REG_NONE, P_REG_MASK, P_REG_FIXED };
|
||||
typedef struct ParseReg
|
||||
{
|
||||
enum ParseRegType tag;
|
||||
union {
|
||||
u8 none;
|
||||
u8 mask;
|
||||
u8 fixed;
|
||||
};
|
||||
} ParseReg;
|
||||
|
||||
typedef struct InstFormat
|
||||
{
|
||||
u16 id;
|
||||
char *name;
|
||||
ParseReg parse_reg;
|
||||
u8 inst_enc;
|
||||
u8 mask_inst;
|
||||
u8 mask_w;
|
||||
bool has_operands;
|
||||
bool has_displacement;
|
||||
bool has_data;
|
||||
bool has_d;
|
||||
bool has_w;
|
||||
bool has_mod;
|
||||
bool has_rm;
|
||||
bool has_s;
|
||||
bool has_SR;
|
||||
} InstFormat;
|
||||
|
||||
typedef struct ParsedInstruction
|
||||
{
|
||||
u16 id;
|
||||
char *name;
|
||||
u16_opt data;
|
||||
u16_opt displacement;
|
||||
u8_opt w;
|
||||
u8_opt d;
|
||||
u8_opt s;
|
||||
u8_opt mod;
|
||||
u8_opt reg;
|
||||
u8_opt rm;
|
||||
u8_opt SR;
|
||||
u8 is_data_addr;
|
||||
u8 bytes_read;
|
||||
} ParsedInstruction;
|
||||
|
||||
typedef struct Instruction
|
||||
{
|
||||
Operand src_opr;
|
||||
Operand dst_opr;
|
||||
i16 data;
|
||||
char *operation;
|
||||
u16 id;
|
||||
} Instruction;
|
||||
|
||||
enum InstructionIdentifier
|
||||
{
|
||||
_PREFIX_2,
|
||||
_PREFIX_3,
|
||||
_PREFIX_6,
|
||||
_NAME,
|
||||
_D,
|
||||
_W,
|
||||
_S,
|
||||
_MOD,
|
||||
_REGISTER,
|
||||
_ACC,
|
||||
_RM,
|
||||
_DISP_LO,
|
||||
_DISP_HI,
|
||||
_DATA_W0,
|
||||
_DATA_W1,
|
||||
} InstructionIdentifier;
|
||||
|
||||
typedef struct ParsedInst
|
||||
{
|
||||
u64 progress;
|
||||
u8 something;
|
||||
} ParsedInst;
|
||||
|
||||
typedef ParsedInst (*inst_parser_f)(ParsedInst);
|
||||
|
||||
ParsedInst pre_2(ParsedInst pi) {return pi;}
|
||||
ParsedInst pre_3(ParsedInst pi) {return pi;}
|
||||
ParsedInst pre_6(ParsedInst pi) {return pi;}
|
||||
ParsedInst name(ParsedInst pi) {return pi;}
|
||||
ParsedInst reg(ParsedInst pi) {return pi;}
|
||||
ParsedInst w(ParsedInst pi) {return pi;}
|
||||
ParsedInst d(ParsedInst pi) {return pi;}
|
||||
ParsedInst s(ParsedInst pi) {return pi;}
|
||||
|
||||
ParsedInst mod(ParsedInst pi) {return pi;}
|
||||
ParsedInst inst(ParsedInst pi) {return pi;}
|
||||
ParsedInst rm(ParsedInst pi) {return pi;}
|
||||
|
||||
ParsedInst disp_lo(ParsedInst pi) {return pi;}
|
||||
ParsedInst disp_hi(ParsedInst pi) {return pi;}
|
||||
|
||||
ParsedInst data_w0(ParsedInst pi) {return pi;}
|
||||
ParsedInst data_w1(ParsedInst pi) {return pi;}
|
||||
|
||||
inst_parser_f inst_funcs[][6][4] =
|
||||
{
|
||||
{{pre_2, name, d, w}, {mod, reg, rm}, {disp_lo}, {disp_hi}},
|
||||
{{pre_6, s, w}, {mod, name, rm}, {disp_lo}, {disp_hi}, {data_w0}, {data_w1}},
|
||||
{{pre_6, w}, {data_w0}, {data_w1}},
|
||||
};
|
||||
|
||||
enum InstructionIdentifier inst_ids[][6][4] =
|
||||
{
|
||||
{{_PREFIX_2, _NAME, _D, _W}, {_MOD, _REGISTER, _RM}, {_DISP_LO}, {_DISP_HI}},
|
||||
{{_PREFIX_6, _S, _W}, {_MOD, _NAME, _RM}, {_DISP_LO}, {_DISP_HI}, {_DATA_W0}, {_DATA_W1}},
|
||||
{{_PREFIX_6, _W}, {_DATA_W0}, {_DATA_W1}},
|
||||
};
|
||||
|
||||
typedef struct InstructionParser
|
||||
{
|
||||
enum InstructionIdentifier inst_ids[6][4];
|
||||
} InstructionParser;
|
||||
|
||||
// InstructionParser inst_formats[] =
|
||||
// {
|
||||
// {{{_PREFIX_2, _NAME, _D, _W}, {_MOD, _REGISTER, _RM}, {_DISP_LO}, {_DISP_HI}}},
|
||||
// {{{_PREFIX_6, _S, _W}, {_MOD, _NAME, _RM}, {_DISP_LO}, {_DISP_HI}, {_DATA_W0}, {_DATA_W1}}},
|
||||
// };
|
||||
|
||||
InstFormat inst_formats[] =
|
||||
{
|
||||
////////
|
||||
// MOV
|
||||
////////
|
||||
// Register/memory to/from register
|
||||
{.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .mask_w=0x1,
|
||||
.has_operands=true, .has_displacement=true, .has_d=true, .has_w=true,
|
||||
.has_mod=true, .has_rm=true, .parse_reg={.tag = P_REG_MASK, .mask=0b00111000} },
|
||||
// Immediate to register/memory
|
||||
{.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true,
|
||||
.has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true},
|
||||
// Immediate to register
|
||||
{.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8,
|
||||
.parse_reg={.tag = P_REG_MASK, .mask=0b00000111}, .has_data=true, .has_w=true},
|
||||
// Memory to accumulator | Accumulator to memory using the `d` bit
|
||||
// even though the manual doesn't specify it
|
||||
{.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1, .has_data=true,
|
||||
.has_w=true, .has_d=true, .parse_reg={.tag = P_REG_FIXED, .fixed=0x0}},
|
||||
// Register/memory to segment register and inverse using the `d` bit
|
||||
{.id=5, .name="mov", .inst_enc=0b10001100, .mask_inst=0x3, .has_SR=true, .has_d=true,
|
||||
.has_displacement=true, .has_mod=true, .has_rm=true},
|
||||
////////
|
||||
// ADD
|
||||
////////
|
||||
// Reg/memory with register or either
|
||||
{.id=6, .name="add", .inst_enc=0b00000000, .mask_inst=0x3, .has_displacement=true,
|
||||
.mask_w=0x1, .has_operands=true, .has_w=true, .has_d=true, .has_mod=true,
|
||||
.has_rm=true, .parse_reg={.tag = P_REG_MASK, .mask=0b00111000}},
|
||||
// Immediate to register/memory
|
||||
{.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true,
|
||||
.has_s=true, .has_operands=true, .has_displacement=true,
|
||||
.has_data=true, .has_mod=true, .has_rm=true},
|
||||
{.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1,
|
||||
.has_data=true, .has_w=true, .parse_reg={.tag = P_REG_FIXED, .fixed=0x0}},
|
||||
};
|
611
decoder8086.odin
611
decoder8086.odin
@ -1,611 +0,0 @@
|
||||
package decoder_8086
|
||||
|
||||
import "core:os"
|
||||
import "core:fmt"
|
||||
import "core:math"
|
||||
import "core:strings"
|
||||
|
||||
Register :: struct {
|
||||
fullname: string,
|
||||
bytename: string,
|
||||
value: struct #raw_union {
|
||||
using _: struct {
|
||||
low, high: byte,
|
||||
},
|
||||
full: u16,
|
||||
},
|
||||
code: u8,
|
||||
}
|
||||
|
||||
WordSize :: enum {
|
||||
None,
|
||||
LastBit,
|
||||
FourthBit,
|
||||
Always8,
|
||||
Always16,
|
||||
}
|
||||
|
||||
None :: struct {}
|
||||
|
||||
Disp8 :: i8
|
||||
Disp16 :: i16
|
||||
Displacement :: union {
|
||||
None,
|
||||
Disp8,
|
||||
Disp16
|
||||
}
|
||||
|
||||
RegisterId :: distinct u8
|
||||
Immediate8 :: distinct i8
|
||||
Immediate16 :: distinct i16
|
||||
ImmediateU8 :: distinct u8
|
||||
MemoryAddr :: struct {
|
||||
addr_id: u8,
|
||||
displacement: Displacement,
|
||||
}
|
||||
DirectAddress :: distinct i16
|
||||
SegmentRegister :: distinct i8
|
||||
Jump :: distinct i8
|
||||
VariablePort :: struct {}
|
||||
ShiftRotate :: distinct bool
|
||||
Repeat :: string
|
||||
Intersegment :: struct {
|
||||
ip: i16,
|
||||
cs: i16,
|
||||
}
|
||||
DirectWithinSegment :: distinct u16
|
||||
|
||||
Operand :: union {
|
||||
None,
|
||||
RegisterId,
|
||||
Immediate8,
|
||||
ImmediateU8,
|
||||
Immediate16,
|
||||
MemoryAddr,
|
||||
DirectAddress,
|
||||
SegmentRegister,
|
||||
Jump,
|
||||
VariablePort,
|
||||
ShiftRotate,
|
||||
Repeat,
|
||||
DirectWithinSegment,
|
||||
Intersegment,
|
||||
}
|
||||
|
||||
OperandInfo :: enum {
|
||||
None,
|
||||
Register,
|
||||
SegmentRegister,
|
||||
RegisterMemory,
|
||||
Immediate,
|
||||
ImmediateUnsigned,
|
||||
Accumulator,
|
||||
DirectAddress,
|
||||
Jump,
|
||||
VariablePort,
|
||||
ShiftRotate,
|
||||
Repeat,
|
||||
DirectWithinSegment,
|
||||
Intersegment,
|
||||
}
|
||||
|
||||
RegisterEncodingBits :: enum {
|
||||
None,
|
||||
FirstByteLast3,
|
||||
SecondByteMiddle3,
|
||||
SecondByteLast3,
|
||||
FirstByteMiddle3,
|
||||
}
|
||||
|
||||
InstructionInfo :: struct {
|
||||
mask: u8,
|
||||
encoding: u8,
|
||||
opname: OpName,
|
||||
desc: string,
|
||||
src: OperandInfo,
|
||||
dst: OperandInfo,
|
||||
word_size: WordSize,
|
||||
reg_info: RegisterEncodingBits,
|
||||
has_flip: bool,
|
||||
has_sign_extension: bool,
|
||||
check_second_encoding: bool,
|
||||
consume_extra_bytes: int,
|
||||
shift_rotate_flag: bool,
|
||||
}
|
||||
|
||||
RIGHT_ALIGN_AMOUNT := 35
|
||||
|
||||
registers := [8]Register {
|
||||
{fullname = "ax", bytename = "al", code = 0b000},
|
||||
{fullname = "cx", bytename = "cl", code = 0b001},
|
||||
{fullname = "dx", bytename = "dl", code = 0b010},
|
||||
{fullname = "bx", bytename = "bl", code = 0b011},
|
||||
{fullname = "sp", bytename = "ah", code = 0b100},
|
||||
{fullname = "bp", bytename = "ch", code = 0b101},
|
||||
{fullname = "si", bytename = "dh", code = 0b110},
|
||||
{fullname = "di", bytename = "bh", code = 0b111},
|
||||
}
|
||||
|
||||
segment_registers := [4]Register {
|
||||
{fullname = "es", code = 0b000},
|
||||
{fullname = "cs", code = 0b001},
|
||||
{fullname = "ss", code = 0b010},
|
||||
{fullname = "ds", code = 0b011},
|
||||
}
|
||||
|
||||
variable_port := registers[2]
|
||||
|
||||
total_bytes_processed := 0
|
||||
|
||||
instruction_builder := strings.builder_make()
|
||||
|
||||
get_i16 :: proc(data: []u8) -> i16 {
|
||||
return (i16)(data[1]) << 8 | (i16)(data[0])
|
||||
}
|
||||
|
||||
operand_is :: proc($T: typeid, opr: Operand) -> bool {
|
||||
_, ok := opr.(T)
|
||||
return ok
|
||||
}
|
||||
|
||||
calculate_effective_address :: proc(r_m: u8) -> string {
|
||||
val: string
|
||||
switch r_m {
|
||||
case 0b000:
|
||||
val = "bx + si"
|
||||
case 0b001:
|
||||
val = "bx + di"
|
||||
case 0b010:
|
||||
val = "bp + si"
|
||||
case 0b011:
|
||||
val = "bp + di"
|
||||
case 0b100:
|
||||
val = "si"
|
||||
case 0b101:
|
||||
val = "di"
|
||||
case 0b110:
|
||||
val = "bp"
|
||||
case 0b111:
|
||||
val = "bx"
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
get_memory_string :: proc(memoryAddr: MemoryAddr, has_segment: Maybe(Register)) -> string {
|
||||
disp: string
|
||||
switch value in memoryAddr.displacement {
|
||||
case None:
|
||||
disp = ""
|
||||
case Disp8:
|
||||
if value != 0 {
|
||||
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
|
||||
}
|
||||
case Disp16:
|
||||
if value != 0 {
|
||||
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
|
||||
}
|
||||
}
|
||||
seg_string: string
|
||||
if segreg, ok := has_segment.?; ok {
|
||||
seg_string = fmt.aprintf("%s:", segreg.fullname)
|
||||
}
|
||||
text := fmt.aprintf("%s[%s%s]", seg_string, calculate_effective_address(memoryAddr.addr_id), disp)
|
||||
return text
|
||||
}
|
||||
|
||||
parse_displacement :: proc(data: []u8) -> (displacement: Displacement, disp_amount: int) {
|
||||
mod := (data[0] & 0b11000000) >> 6
|
||||
disp: Displacement = None{}
|
||||
amount: int
|
||||
switch mod {
|
||||
case 1:
|
||||
disp = (i8)(data[1])
|
||||
amount = 1
|
||||
case 2:
|
||||
disp = get_i16(data[1:])
|
||||
amount = 2
|
||||
}
|
||||
return disp, amount
|
||||
}
|
||||
|
||||
get_displacement_string :: proc(displacement: Displacement) -> string {
|
||||
disp := ""
|
||||
#partial switch value in displacement {
|
||||
case i8:
|
||||
if value != 0 {
|
||||
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
|
||||
}
|
||||
case i16:
|
||||
if value != 0 {
|
||||
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
|
||||
}
|
||||
}
|
||||
return disp
|
||||
}
|
||||
|
||||
get_repeat_op :: proc(data: u8) -> Repeat {
|
||||
bits := (data & 0b1110) >> 1
|
||||
w := (data & 0b1) == 1 ? "w" : "b"
|
||||
rep: string
|
||||
switch bits {
|
||||
case 0b010: rep = "movs"
|
||||
case 0b011: rep = "cmps"
|
||||
case 0b101: rep = "stos"
|
||||
case 0b110: rep = "lods"
|
||||
case 0b111: rep = "scas"
|
||||
}
|
||||
return Repeat(fmt.aprintf("%s%s", rep, w))
|
||||
}
|
||||
|
||||
try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) {
|
||||
for inst in instructions {
|
||||
if inst.encoding == (b & inst.mask) {
|
||||
return inst, true
|
||||
}
|
||||
}
|
||||
return InstructionInfo{}, false
|
||||
}
|
||||
|
||||
get_opname :: proc(opname: OpName, data: []u8) -> (string, bool) {
|
||||
name: string
|
||||
interseg: bool
|
||||
if opname == .TBD2 {
|
||||
switch data[1] & 0b00111000 >> 3 {
|
||||
case 0b000: name = "inc"
|
||||
case 0b001: name = "dec"
|
||||
case 0b010: name = "call"
|
||||
case 0b011: name = "call"; interseg = true
|
||||
case 0b100: name = "jmp"
|
||||
case 0b101: name = "jmp"; interseg = true
|
||||
case 0b110: name = "push"
|
||||
}
|
||||
} else if opname == .TBD5 {
|
||||
switch data[1] & 0b00111000 >> 3 {
|
||||
case 0b000: name = "test"
|
||||
case 0b001: name = "dec"
|
||||
case 0b010: name = "not"
|
||||
case 0b011: name = "neg"
|
||||
case 0b100: name = "mul"
|
||||
case 0b101: name = "imul"
|
||||
case 0b110: name = "div"
|
||||
case 0b111: name = "idiv"
|
||||
}
|
||||
} else if opname == .TBD6 {
|
||||
switch data[1] & 0b00111000 >> 3 {
|
||||
case 0b000: name = "rol"
|
||||
case 0b001: name = "ror"
|
||||
case 0b010: name = "rcl"
|
||||
case 0b011: name = "rcr"
|
||||
case 0b100: name = "shl"
|
||||
case 0b101: name = "shr"
|
||||
case 0b111: name = "sar"
|
||||
}
|
||||
} else {
|
||||
bits: u8
|
||||
if opname == .TBD1 || opname == .TBD3 {
|
||||
bits = data[0] & 0b00111000 >> 3
|
||||
} else {
|
||||
bits = data[1] & 0b00111000 >> 3
|
||||
}
|
||||
switch bits {
|
||||
case 0b000: name = "add"
|
||||
case 0b001: name = "or"
|
||||
case 0b010: name = "adc"
|
||||
case 0b011: name = "sbb"
|
||||
case 0b100: name = "and"
|
||||
case 0b101: name = "sub"
|
||||
case 0b110: name = "xor"
|
||||
case 0b111: name = "cmp"
|
||||
}
|
||||
}
|
||||
return name, interseg
|
||||
}
|
||||
|
||||
parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, processed: ^int, word: bool, has_segreg: Maybe(Register)) -> Operand {
|
||||
operand: Operand = None{}
|
||||
switch opinfo {
|
||||
case .None:
|
||||
case .Register:
|
||||
// rm: u8 = data[1] & 0b111
|
||||
// dst_opr = (RegisterId)(registers[rm].code)
|
||||
reg: u8
|
||||
// Read the RegisterEncodingBits
|
||||
switch inst.reg_info {
|
||||
case .None:
|
||||
// panic("Register is required but the encoded location is not provided")
|
||||
case .FirstByteLast3:
|
||||
reg = data[0] & 0b111
|
||||
case .FirstByteMiddle3:
|
||||
reg = (data[0] >> 3) & 0b111
|
||||
case .SecondByteMiddle3:
|
||||
reg = (data[1] >> 3) & 0b111
|
||||
case .SecondByteLast3:
|
||||
reg = data[1] & 0b111
|
||||
}
|
||||
operand = (RegisterId)(registers[reg].code)
|
||||
case .SegmentRegister:
|
||||
reg: u8
|
||||
switch inst.reg_info {
|
||||
case .None:
|
||||
// panic("Register is required but the encoded location is not provided")
|
||||
case .FirstByteLast3:
|
||||
reg = data[0] & 0b111
|
||||
case .FirstByteMiddle3:
|
||||
reg = (data[0] >> 3) & 0b111
|
||||
case .SecondByteMiddle3:
|
||||
reg = (data[1] >> 3) & 0b111
|
||||
case .SecondByteLast3:
|
||||
reg = data[1] & 0b111
|
||||
}
|
||||
operand = (SegmentRegister)(segment_registers[reg].code)
|
||||
case .RegisterMemory:
|
||||
mod := data[1] >> 6
|
||||
rm := data[1] & 0b111
|
||||
processed^ += 1
|
||||
op: Operand
|
||||
if mod == 0 {
|
||||
if rm == 0b110 {
|
||||
op = (DirectAddress)(get_i16(data[2:]))
|
||||
processed^ += 2
|
||||
} else {
|
||||
op = MemoryAddr{ addr_id = rm , displacement = None{} }
|
||||
}
|
||||
} else if mod == 1 {
|
||||
op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) }
|
||||
processed^ += 1
|
||||
} else if mod == 2 {
|
||||
op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) }
|
||||
processed^ += 2
|
||||
} else if mod == 3 {
|
||||
op = (RegisterId)(registers[rm].code)
|
||||
}
|
||||
operand = op
|
||||
case .Immediate:
|
||||
data_idx := processed^
|
||||
word_signed := word
|
||||
if inst.has_sign_extension {
|
||||
word_signed &&= data[0] & 0b0000_0010 == 0
|
||||
}
|
||||
operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx]))
|
||||
processed^ += word_signed ? 2 : 1
|
||||
case .ImmediateUnsigned:
|
||||
operand = (ImmediateU8)(data[processed^])
|
||||
processed^ += 1
|
||||
case .Accumulator:
|
||||
operand = (RegisterId)(registers[0].code)
|
||||
case .DirectAddress:
|
||||
operand = (DirectAddress)(get_i16(data[1:]))
|
||||
processed^ += 2
|
||||
case .Jump:
|
||||
processed^ += 1
|
||||
// NOTE: In order to mimic the label offset, you have to take the value you got and add two
|
||||
operand = (Jump)((i8)(data[1]) + 2)
|
||||
case .VariablePort:
|
||||
operand = VariablePort{}
|
||||
case .ShiftRotate:
|
||||
v_flag := data[0] & 0b10 != 0
|
||||
operand = (ShiftRotate)(v_flag)
|
||||
case .Repeat:
|
||||
operand = get_repeat_op(data[1])
|
||||
processed^ += 1
|
||||
case .DirectWithinSegment:
|
||||
value := (int)(get_i16(data[1:])) + total_bytes_processed + 3
|
||||
operand = (DirectWithinSegment)(value)
|
||||
processed^ += 2
|
||||
case .Intersegment:
|
||||
operand = Intersegment {
|
||||
ip = get_i16(data[1:]),
|
||||
cs = get_i16(data[3:]),
|
||||
}
|
||||
processed^ += 4
|
||||
}
|
||||
return operand
|
||||
}
|
||||
|
||||
get_operand_string :: proc(operand: Operand, is_word: bool, has_segment: Maybe(Register)) -> string {
|
||||
string_val: string
|
||||
switch val in operand {
|
||||
case None:
|
||||
string_val = ""
|
||||
case RegisterId:
|
||||
string_val = is_word ? registers[val].fullname : registers[val].bytename
|
||||
case Immediate8, ImmediateU8, Immediate16, DirectWithinSegment:
|
||||
string_val = fmt.aprintf("%d", val)
|
||||
case MemoryAddr:
|
||||
string_val = get_memory_string(val, has_segment)
|
||||
case DirectAddress:
|
||||
seg_string: string
|
||||
if segreg, ok := has_segment.?; ok {
|
||||
seg_string = fmt.aprintf("%s:", segreg.fullname)
|
||||
}
|
||||
string_val = fmt.aprintf("%s[%d]", seg_string, val)
|
||||
case SegmentRegister:
|
||||
string_val = segment_registers[val].fullname
|
||||
case Jump:
|
||||
string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val)
|
||||
case VariablePort:
|
||||
string_val = variable_port.fullname
|
||||
case ShiftRotate:
|
||||
string_val = val ? registers[1].bytename : "1"
|
||||
case Repeat:
|
||||
string_val = (string)(val)
|
||||
case Intersegment:
|
||||
string_val = fmt.aprintf("%d:%d", val.cs, val.ip)
|
||||
}
|
||||
return string_val
|
||||
}
|
||||
|
||||
main :: proc() {
|
||||
f,err := os.open(os.args[1])
|
||||
if err != os.ERROR_NONE {
|
||||
fmt.eprintln("ERROR:", err)
|
||||
os.exit(1)
|
||||
}
|
||||
defer os.close(f)
|
||||
|
||||
data := make([]u8, 1024)
|
||||
bytes_read, err2 := os.read(f, data)
|
||||
if err2 != nil {
|
||||
// ...
|
||||
os.exit(1)
|
||||
}
|
||||
|
||||
if false {
|
||||
os.exit(0)
|
||||
}
|
||||
|
||||
// asdf :u16 = 0b00000110_11011101
|
||||
// asdf2 :i16 = (i16)(asdf)
|
||||
// fmt.printfln("%d", asdf2)
|
||||
print_at_end := false
|
||||
idx := 0
|
||||
line_count := 0
|
||||
has_lock: bool
|
||||
has_segment: Maybe(Register)
|
||||
last_opname: [3]byte
|
||||
repeating_op_count := 0
|
||||
instruction_list := make([dynamic]string, 512)
|
||||
fmt.println("bits 16\n")
|
||||
for idx < bytes_read {
|
||||
processed := 1
|
||||
curr_byte := data[idx]
|
||||
|
||||
inst, ok := try_find_instruction(curr_byte)
|
||||
if !ok {
|
||||
txt := "unknown instruction"
|
||||
if print_at_end {
|
||||
line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
|
||||
instruction_list[line_count] = line
|
||||
line_count += 1
|
||||
} else {
|
||||
fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
|
||||
}
|
||||
idx += 1
|
||||
continue
|
||||
}
|
||||
|
||||
// Here we check if the instruction affects the next instruction
|
||||
if inst.opname == .LOCK {
|
||||
has_lock = true
|
||||
idx += 1
|
||||
continue
|
||||
} else if inst.opname == .SEGMENT {
|
||||
reg := (curr_byte & 0b11000) >> 3
|
||||
has_segment = segment_registers[reg]
|
||||
idx += 1
|
||||
continue
|
||||
} else if inst.opname == .AAM {
|
||||
processed += 1
|
||||
}
|
||||
|
||||
src_opr: Operand
|
||||
dst_opr: Operand
|
||||
|
||||
word: bool
|
||||
flip: bool
|
||||
indirect_intersegment: bool
|
||||
op: Operand
|
||||
|
||||
if inst.has_flip {
|
||||
flip = curr_byte & 2 != 0
|
||||
}
|
||||
|
||||
#partial switch inst.word_size {
|
||||
case .LastBit: word = curr_byte & 1 == 1
|
||||
case .FourthBit: word = curr_byte & 0b0000_1000 != 0
|
||||
case .Always16: word = true
|
||||
}
|
||||
|
||||
opname: string
|
||||
if inst.check_second_encoding {
|
||||
opname,indirect_intersegment = get_opname(inst.opname, data[idx:])
|
||||
// NOTE: This is a special case because it matches the bit pattern of .TBD5,
|
||||
// but the instruction itself is different
|
||||
if opname == "test" && (curr_byte & 0xFF) == 0b11110110 {
|
||||
inst = test_inst
|
||||
}
|
||||
} else {
|
||||
opname = strings.to_lower(fmt.aprintf("%s", inst.opname))
|
||||
}
|
||||
|
||||
dst_opr = parse_operand(inst, inst.dst, data[idx:], &processed, word, has_segment)
|
||||
src_opr = parse_operand(inst, inst.src, data[idx:], &processed, word, has_segment)
|
||||
|
||||
src_is_imm := operand_is(Immediate8, src_opr) || operand_is(Immediate16, src_opr)
|
||||
dst_is_bracketed := operand_is(MemoryAddr, dst_opr) || operand_is(DirectAddress, dst_opr)
|
||||
src_is_bracketed := operand_is(MemoryAddr, src_opr) || operand_is(DirectAddress, src_opr)
|
||||
shiftrot := inst.src == .ShiftRotate
|
||||
size_string := ""
|
||||
if ((src_is_imm && dst_is_bracketed) || (dst_is_bracketed && shiftrot)) || (src_is_bracketed && operand_is(None, dst_opr)) {
|
||||
size_string = word ? "word " : "byte "
|
||||
}
|
||||
|
||||
if flip {
|
||||
src_opr, dst_opr = dst_opr, src_opr
|
||||
}
|
||||
|
||||
dst_str := get_operand_string(dst_opr, word, has_segment)
|
||||
src_str := get_operand_string(src_opr, word, has_segment)
|
||||
full_inst: string
|
||||
if dst_str == "" {
|
||||
interseg_string: string
|
||||
if indirect_intersegment {
|
||||
interseg_string = " far"
|
||||
}
|
||||
full_inst = fmt.aprintf("%s%s %s%s", opname, interseg_string, size_string, src_str)
|
||||
} else {
|
||||
// NOTE: I don't know why this is the case, but only the move has the word/byte
|
||||
// keyword next to the immediate, but other instructions have it on the memory address
|
||||
if opname == "mov" {
|
||||
full_inst = fmt.aprintf("%s %s, %s%s", opname, dst_str, size_string, src_str)
|
||||
} else {
|
||||
full_inst = fmt.aprintf("%s %s%s, %s", opname, size_string, dst_str, src_str)
|
||||
}
|
||||
}
|
||||
|
||||
processed += inst.consume_extra_bytes
|
||||
|
||||
lock_string: string
|
||||
if has_lock {
|
||||
lock_string = "lock "
|
||||
}
|
||||
fmt.sbprintf(&instruction_builder, "%s%s %*[2]s", lock_string, full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;")
|
||||
if has_lock {
|
||||
fmt.sbprintf(&instruction_builder, " lock")
|
||||
}
|
||||
if _,ok := has_segment.?; ok {
|
||||
fmt.sbprintf(&instruction_builder, " segment")
|
||||
}
|
||||
for i in 0..<processed {
|
||||
fmt.sbprintf(&instruction_builder, " %08b", data[idx + i])
|
||||
}
|
||||
|
||||
op2 := strings.to_string(instruction_builder)
|
||||
if op2[0:3] != string(last_opname[:]) {
|
||||
if repeating_op_count > 0 {
|
||||
fmt.println()
|
||||
}
|
||||
repeating_op_count = 0
|
||||
} else {
|
||||
repeating_op_count += 1
|
||||
}
|
||||
copy(last_opname[:], op2[0:3])
|
||||
fmt.println(op2)
|
||||
|
||||
idx += processed
|
||||
strings.builder_reset(&instruction_builder)
|
||||
has_lock = false
|
||||
has_segment = nil
|
||||
total_bytes_processed = idx
|
||||
}
|
||||
if print_at_end {
|
||||
for i in 0..<line_count {
|
||||
opname := instruction_list[i]
|
||||
if !strings.has_prefix(opname, string(last_opname[:])) {
|
||||
fmt.println()
|
||||
}
|
||||
copy(last_opname[:], opname[0:3])
|
||||
fmt.println(instruction_list[i])
|
||||
}
|
||||
}
|
||||
}
|
194
decoding.odin
Normal file
194
decoding.odin
Normal file
@ -0,0 +1,194 @@
|
||||
package sim_8086
|
||||
|
||||
import "core:fmt"
|
||||
import "core:math"
|
||||
import "core:strings"
|
||||
|
||||
parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, processed: ^int, word: bool, has_segreg: Maybe(Register)) -> Operand {
|
||||
operand: Operand = None{}
|
||||
switch opinfo {
|
||||
case .None:
|
||||
case .Register:
|
||||
reg: u8
|
||||
switch inst.reg_info {
|
||||
case .None:
|
||||
panic("Register is required but the encoded location is not provided")
|
||||
case .FirstByteLast3:
|
||||
reg = data[0] & 0b111
|
||||
case .FirstByteMiddle3:
|
||||
reg = (data[0] >> 3) & 0b111
|
||||
case .SecondByteMiddle3:
|
||||
reg = (data[1] >> 3) & 0b111
|
||||
case .SecondByteLast3:
|
||||
reg = data[1] & 0b111
|
||||
}
|
||||
operand = (RegisterId)(registers[reg].code)
|
||||
case .SegmentRegister:
|
||||
reg: u8
|
||||
switch inst.reg_info {
|
||||
case .None:
|
||||
panic("Register is required but the encoded location is not provided")
|
||||
case .FirstByteLast3:
|
||||
reg = data[0] & 0b111
|
||||
case .FirstByteMiddle3:
|
||||
reg = (data[0] >> 3) & 0b111
|
||||
case .SecondByteMiddle3:
|
||||
reg = (data[1] >> 3) & 0b111
|
||||
case .SecondByteLast3:
|
||||
reg = data[1] & 0b111
|
||||
}
|
||||
operand = (SegmentRegister)(segment_registers[reg].code)
|
||||
case .RegisterMemory:
|
||||
mod := data[1] >> 6
|
||||
rm := data[1] & 0b111
|
||||
processed^ += 1
|
||||
op: Operand
|
||||
if mod == 0 {
|
||||
if rm == 0b110 {
|
||||
op = (DirectAddress)(get_i16(data[2:]))
|
||||
processed^ += 2
|
||||
} else {
|
||||
op = MemoryAddr{ addr_id = rm , displacement = None{} }
|
||||
}
|
||||
} else if mod == 1 {
|
||||
op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) }
|
||||
processed^ += 1
|
||||
} else if mod == 2 {
|
||||
op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) }
|
||||
processed^ += 2
|
||||
} else if mod == 3 {
|
||||
op = (RegisterId)(registers[rm].code)
|
||||
}
|
||||
operand = op
|
||||
case .Immediate:
|
||||
data_idx := processed^
|
||||
word_signed := word
|
||||
if inst.has_sign_extension {
|
||||
word_signed &&= data[0] & 0b0000_0010 == 0
|
||||
}
|
||||
operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx]))
|
||||
processed^ += word_signed ? 2 : 1
|
||||
case .ImmediateUnsigned:
|
||||
operand = (ImmediateU8)(data[processed^])
|
||||
processed^ += 1
|
||||
case .Accumulator:
|
||||
operand = (RegisterId)(registers[0].code)
|
||||
case .DirectAddress:
|
||||
operand = (DirectAddress)(get_i16(data[1:]))
|
||||
processed^ += 2
|
||||
case .Jump:
|
||||
processed^ += 1
|
||||
// NOTE: In order to mimic the label offset, you have to take the value you got and add two
|
||||
operand = (Jump)((i8)(data[1]) + 2)
|
||||
case .VariablePort:
|
||||
operand = VariablePort{}
|
||||
case .ShiftRotate:
|
||||
v_flag := data[0] & 0b10 != 0
|
||||
operand = (ShiftRotate)(v_flag)
|
||||
case .Repeat:
|
||||
operand = get_repeat_op(data[1])
|
||||
processed^ += 1
|
||||
case .DirectWithinSegment:
|
||||
value := (int)(get_i16(data[1:])) + total_bytes_processed + 3
|
||||
operand = (DirectWithinSegment)(value)
|
||||
processed^ += 2
|
||||
case .Intersegment:
|
||||
operand = Intersegment {
|
||||
ip = get_i16(data[1:]),
|
||||
cs = get_i16(data[3:]),
|
||||
}
|
||||
processed^ += 4
|
||||
}
|
||||
return operand
|
||||
}
|
||||
|
||||
decode_data :: proc(inst_list: ^[dynamic]Instruction, data: []u8, bytes_to_read: int) {
|
||||
idx := 0
|
||||
has_segment: Maybe(Register)
|
||||
has_lock: bool
|
||||
for idx < bytes_to_read {
|
||||
instruction: Instruction
|
||||
processed := 1
|
||||
curr_byte := data[idx]
|
||||
|
||||
inst, ok := try_find_instruction(curr_byte)
|
||||
if !ok {
|
||||
instruction = {
|
||||
opname = .UNKNOWN,
|
||||
bytes_read = 1,
|
||||
raw_data = data[idx:idx+1],
|
||||
}
|
||||
append(inst_list, instruction)
|
||||
idx += 1
|
||||
continue
|
||||
}
|
||||
|
||||
// Here we check if the instruction affects the next instruction
|
||||
if inst.opname == .LOCK {
|
||||
has_lock = true
|
||||
idx += 1
|
||||
continue
|
||||
} else if inst.opname == .SEGMENT {
|
||||
reg := (curr_byte & 0b11000) >> 3
|
||||
has_segment = segment_registers[reg]
|
||||
idx += 1
|
||||
continue
|
||||
} else if inst.opname == .AAM {
|
||||
processed += 1
|
||||
}
|
||||
|
||||
debug_str: string
|
||||
// NOTE: This is a special case because it matches the bit pattern of .TBD5,
|
||||
// but the instruction itself is different
|
||||
if inst.opname == .TBD5 && (data[idx] & 0xFF) == 0b11110110 && (data[idx+1] & 0b00111000) == 0 {
|
||||
inst = test_inst
|
||||
}
|
||||
|
||||
src_opr: Operand
|
||||
dst_opr: Operand
|
||||
|
||||
word: bool
|
||||
flip: bool
|
||||
indirect_intersegment: bool
|
||||
op: Operand
|
||||
|
||||
if inst.has_flip {
|
||||
flip = curr_byte & 2 != 0
|
||||
}
|
||||
|
||||
#partial switch inst.word_size {
|
||||
case .LastBit: word = curr_byte & 1 == 1
|
||||
case .FourthBit: word = curr_byte & 0b0000_1000 != 0
|
||||
case .Always16: word = true
|
||||
}
|
||||
|
||||
dst_opr = parse_operand(inst, inst.dst, data[idx:], &processed, word, has_segment)
|
||||
src_opr = parse_operand(inst, inst.src, data[idx:], &processed, word, has_segment)
|
||||
|
||||
if flip {
|
||||
src_opr, dst_opr = dst_opr, src_opr
|
||||
}
|
||||
|
||||
processed += inst.consume_extra_bytes
|
||||
|
||||
instruction.opname = inst.opname
|
||||
instruction.src = src_opr
|
||||
instruction.dst = dst_opr
|
||||
instruction.is_word = word
|
||||
instruction.bytes_read = processed
|
||||
instruction.raw_data = data[idx:idx+processed]
|
||||
instruction.debug_msg = debug_str
|
||||
instruction.info = inst
|
||||
instruction.has_lock = has_lock
|
||||
instruction.has_segment = has_segment
|
||||
|
||||
// fmt.println(parsed_inst)
|
||||
append(inst_list, instruction)
|
||||
|
||||
idx += processed
|
||||
|
||||
has_lock = false
|
||||
has_segment = nil
|
||||
total_bytes_processed = idx
|
||||
}
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
package decoder_8086
|
||||
package sim_8086
|
||||
|
||||
OpName :: enum {
|
||||
Op :: enum {
|
||||
UNKNOWN,
|
||||
TBD1,
|
||||
TBD2,
|
||||
TBD3,
|
||||
@ -100,8 +101,12 @@ OpName :: enum {
|
||||
// isn't that great; we return a string with the instruction name, but ideally we have all
|
||||
// the instructions accounted for, because eventually we will need the final parsed
|
||||
// instruction to contain all the information related to it
|
||||
// test_inst := InstructionInfo {
|
||||
// opname = .NOT, desc = "", mask = 0b11111110, encoding = 0b11110110,
|
||||
// dst = .RegisterMemory, src = .Immediate, word_size = .LastBit
|
||||
// }
|
||||
test_inst := InstructionInfo {
|
||||
opname = .NOT, desc = "", mask = 0b11111110, encoding = 0b11110110,
|
||||
opname = .TEST, desc = "", mask = 0b11111110, encoding = 0b11110110,
|
||||
dst = .RegisterMemory, src = .Immediate, word_size = .LastBit
|
||||
}
|
||||
|
||||
|
53
lib.h
53
lib.h
@ -1,53 +0,0 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
typedef int16_t i16;
|
||||
typedef int32_t i32;
|
||||
typedef uint32_t u32;
|
||||
typedef uint64_t u64;
|
||||
typedef float f32;
|
||||
typedef double f64;
|
||||
typedef uintptr_t uptr;
|
||||
typedef char sbyte;
|
||||
typedef ptrdiff_t size;
|
||||
typedef size_t usize;
|
||||
|
||||
enum OptionTag {NONE, SOME};
|
||||
|
||||
#define OPTION(type) \
|
||||
typedef struct \
|
||||
{ \
|
||||
enum OptionTag tag; \
|
||||
union { \
|
||||
char none; \
|
||||
type value; \
|
||||
}; \
|
||||
} type##_opt; \
|
||||
\
|
||||
static inline type##_opt none_##type(void) \
|
||||
{ \
|
||||
return (type##_opt){ .tag = NONE, .none = 0 }; \
|
||||
} \
|
||||
\
|
||||
static inline type##_opt some_##type(type value) \
|
||||
{ \
|
||||
return (type##_opt){ .tag = SOME, .value = value }; \
|
||||
} \
|
||||
\
|
||||
static inline int get_some_##type(type##_opt opt, type* out_value) \
|
||||
{ \
|
||||
if (opt.tag != SOME) return 0; \
|
||||
*out_value = opt.value; \
|
||||
return 1; \
|
||||
}
|
||||
|
||||
#define IF_LET_SOME(type, var, opt) \
|
||||
type var; \
|
||||
if (get_some_##type(opt, &var))
|
||||
|
||||
|
||||
OPTION(u8)
|
||||
OPTION(u16)
|
242
printing.odin
Normal file
242
printing.odin
Normal file
@ -0,0 +1,242 @@
|
||||
package sim_8086
|
||||
|
||||
import "core:fmt"
|
||||
import "core:math"
|
||||
import "core:strings"
|
||||
|
||||
instruction_builder := strings.builder_make()
|
||||
|
||||
calculate_effective_address :: proc(r_m: u8) -> string {
|
||||
val: string
|
||||
switch r_m {
|
||||
case 0b000:
|
||||
val = "bx + si"
|
||||
case 0b001:
|
||||
val = "bx + di"
|
||||
case 0b010:
|
||||
val = "bp + si"
|
||||
case 0b011:
|
||||
val = "bp + di"
|
||||
case 0b100:
|
||||
val = "si"
|
||||
case 0b101:
|
||||
val = "di"
|
||||
case 0b110:
|
||||
val = "bp"
|
||||
case 0b111:
|
||||
val = "bx"
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
get_memory_string :: proc(memoryAddr: MemoryAddr, has_segment: Maybe(Register)) -> string {
|
||||
disp: string
|
||||
switch value in memoryAddr.displacement {
|
||||
case None:
|
||||
disp = ""
|
||||
case Disp8:
|
||||
if value != 0 {
|
||||
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
|
||||
}
|
||||
case Disp16:
|
||||
if value != 0 {
|
||||
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
|
||||
}
|
||||
}
|
||||
seg_string: string
|
||||
if segreg, ok := has_segment.?; ok {
|
||||
seg_string = fmt.aprintf("%s:", segreg.fullname)
|
||||
}
|
||||
text := fmt.aprintf("%s[%s%s]", seg_string, calculate_effective_address(memoryAddr.addr_id), disp)
|
||||
return text
|
||||
}
|
||||
|
||||
get_displacement_string :: proc(displacement: Displacement) -> string {
|
||||
disp := ""
|
||||
#partial switch value in displacement {
|
||||
case i8:
|
||||
if value != 0 {
|
||||
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
|
||||
}
|
||||
case i16:
|
||||
if value != 0 {
|
||||
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
|
||||
}
|
||||
}
|
||||
return disp
|
||||
}
|
||||
|
||||
get_opname :: proc(inst: Instruction) -> (string, bool) {
|
||||
name: string
|
||||
interseg: bool
|
||||
if inst.opname == .TBD2 {
|
||||
switch inst.raw_data[1] & 0b00111000 >> 3 {
|
||||
case 0b000: name = "inc"
|
||||
case 0b001: name = "dec"
|
||||
case 0b010: name = "call"
|
||||
// TODO: We really have to fix this because we shouldn't be figuring out if this
|
||||
// is an intersegment here
|
||||
case 0b011: name = "call"; interseg = true
|
||||
case 0b100: name = "jmp"
|
||||
case 0b101: name = "jmp"; interseg = true
|
||||
case 0b110: name = "push"
|
||||
}
|
||||
} else if inst.opname == .TBD5 {
|
||||
switch inst.raw_data[1] & 0b00111000 >> 3 {
|
||||
case 0b000: name = "test"
|
||||
case 0b001: name = "dec"
|
||||
case 0b010: name = "not"
|
||||
case 0b011: name = "neg"
|
||||
case 0b100: name = "mul"
|
||||
case 0b101: name = "imul"
|
||||
case 0b110: name = "div"
|
||||
case 0b111: name = "idiv"
|
||||
}
|
||||
} else if inst.opname == .TBD6 {
|
||||
switch inst.raw_data[1] & 0b00111000 >> 3 {
|
||||
case 0b000: name = "rol"
|
||||
case 0b001: name = "ror"
|
||||
case 0b010: name = "rcl"
|
||||
case 0b011: name = "rcr"
|
||||
case 0b100: name = "shl"
|
||||
case 0b101: name = "shr"
|
||||
case 0b111: name = "sar"
|
||||
}
|
||||
} else {
|
||||
bits: u8
|
||||
if inst.opname == .TBD1 || inst.opname == .TBD3 {
|
||||
bits = inst.raw_data[0] & 0b00111000 >> 3
|
||||
} else {
|
||||
bits = inst.raw_data[1] & 0b00111000 >> 3
|
||||
}
|
||||
switch bits {
|
||||
case 0b000: name = "add"
|
||||
case 0b001: name = "or"
|
||||
case 0b010: name = "adc"
|
||||
case 0b011: name = "sbb"
|
||||
case 0b100: name = "and"
|
||||
case 0b101: name = "sub"
|
||||
case 0b110: name = "xor"
|
||||
case 0b111: name = "cmp"
|
||||
}
|
||||
}
|
||||
return name, interseg
|
||||
}
|
||||
|
||||
get_operand_string :: proc(operand: Operand, is_word: bool, has_segment: Maybe(Register)) -> string {
|
||||
string_val: string
|
||||
switch val in operand {
|
||||
case None:
|
||||
string_val = ""
|
||||
case RegisterId:
|
||||
string_val = is_word ? registers[val].fullname : registers[val].bytename
|
||||
case Immediate8, ImmediateU8, Immediate16, DirectWithinSegment:
|
||||
string_val = fmt.aprintf("%d", val)
|
||||
case MemoryAddr:
|
||||
string_val = get_memory_string(val, has_segment)
|
||||
case DirectAddress:
|
||||
seg_string: string
|
||||
if segreg, ok := has_segment.?; ok {
|
||||
seg_string = fmt.aprintf("%s:", segreg.fullname)
|
||||
}
|
||||
string_val = fmt.aprintf("%s[%d]", seg_string, val)
|
||||
case SegmentRegister:
|
||||
string_val = segment_registers[val].fullname
|
||||
case Jump:
|
||||
string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val)
|
||||
case VariablePort:
|
||||
string_val = variable_port.fullname
|
||||
case ShiftRotate:
|
||||
string_val = val ? registers[1].bytename : "1"
|
||||
case Repeat:
|
||||
string_val = (string)(val)
|
||||
case Intersegment:
|
||||
string_val = fmt.aprintf("%d:%d", val.cs, val.ip)
|
||||
}
|
||||
return string_val
|
||||
}
|
||||
|
||||
get_unknown_inst_string :: proc(inst: Instruction) -> string {
|
||||
print_at_end := false
|
||||
txt := "unknown instruction"
|
||||
line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", inst.raw_data[0])
|
||||
return line
|
||||
}
|
||||
|
||||
get_instruction_string :: proc(inst_info: InstructionInfo, instruction: Instruction) {
|
||||
inst := instruction
|
||||
src_is_imm := operand_is(Immediate8, inst.src) || operand_is(Immediate16, inst.src)
|
||||
dst_is_bracketed := operand_is(MemoryAddr, inst.dst) || operand_is(DirectAddress, inst.dst)
|
||||
src_is_bracketed := operand_is(MemoryAddr, inst.src) || operand_is(DirectAddress, inst.src)
|
||||
shiftrot := operand_is(ShiftRotate, inst.src)
|
||||
size_string := ""
|
||||
if ((src_is_imm && dst_is_bracketed) || (dst_is_bracketed && shiftrot)) || (src_is_bracketed && operand_is(None, inst.dst)) {
|
||||
size_string = inst.is_word ? "word " : "byte "
|
||||
}
|
||||
|
||||
if inst.has_lock {
|
||||
fmt.sbprint(&instruction_builder, "lock ")
|
||||
}
|
||||
|
||||
dst_str := get_operand_string(inst.dst, inst.is_word, inst.has_segment)
|
||||
src_str := get_operand_string(inst.src, inst.is_word, inst.has_segment)
|
||||
opname: string
|
||||
is_interseg: bool
|
||||
if inst_info.check_second_encoding {
|
||||
opname,is_interseg = get_opname(inst)
|
||||
} else {
|
||||
// TODO: Do the RTTI thing here with reflection
|
||||
opname = strings.to_lower(fmt.aprintf("%s", inst.opname))
|
||||
}
|
||||
|
||||
if dst_str == "" {
|
||||
interseg_string: string
|
||||
if is_interseg {
|
||||
interseg_string = " far"
|
||||
}
|
||||
fmt.sbprintf(&instruction_builder, "%s%s %s%s", opname, interseg_string, size_string, src_str)
|
||||
} else {
|
||||
// note: i don't know why this is the case, but only the move has the word/byte
|
||||
// keyword next to the immediate, but other instructions have it on the memory address
|
||||
if opname == "mov" {
|
||||
fmt.sbprintf(&instruction_builder, "%s %s, %s%s", opname, dst_str, size_string, src_str)
|
||||
} else {
|
||||
fmt.sbprintf(&instruction_builder, "%s %s%s, %s", opname, size_string, dst_str, src_str)
|
||||
}
|
||||
}
|
||||
|
||||
// Prepare padding and comment to add debug info
|
||||
b_len := strings.builder_len(instruction_builder)
|
||||
fmt.sbprintf(&instruction_builder, "%*[0]s", RIGHT_ALIGN_AMOUNT - b_len, ";;")
|
||||
|
||||
if inst.has_lock {
|
||||
fmt.sbprintf(&instruction_builder, " lock")
|
||||
}
|
||||
if _,ok := inst.has_segment.?; ok {
|
||||
fmt.sbprintf(&instruction_builder, " segment")
|
||||
}
|
||||
for i in 0..<inst.bytes_read {
|
||||
fmt.sbprintf(&instruction_builder, " %08b", inst.raw_data[i])
|
||||
}
|
||||
}
|
||||
|
||||
print_instructions_stdout :: proc(instructions: []Instruction) {
|
||||
last_opname: [3]byte
|
||||
repeating_op_count := 0
|
||||
fmt.println("bits 16\n")
|
||||
for inst in instructions {
|
||||
strings.builder_reset(&instruction_builder)
|
||||
get_instruction_string(inst.info, inst)
|
||||
op2 := strings.to_string(instruction_builder)
|
||||
if op2[0:3] != string(last_opname[:]) {
|
||||
if repeating_op_count > 0 {
|
||||
fmt.println()
|
||||
}
|
||||
repeating_op_count = 0
|
||||
} else {
|
||||
repeating_op_count += 1
|
||||
}
|
||||
copy(last_opname[:], op2[0:3])
|
||||
fmt.println(op2)
|
||||
}
|
||||
}
|
98
sim8086.odin
Normal file
98
sim8086.odin
Normal file
@ -0,0 +1,98 @@
|
||||
package sim_8086
|
||||
|
||||
import "core:os"
|
||||
import "core:fmt"
|
||||
import "core:math"
|
||||
import "core:strings"
|
||||
|
||||
RIGHT_ALIGN_AMOUNT := 35
|
||||
|
||||
registers := [8]Register {
|
||||
{fullname = "ax", bytename = "al", code = 0b000},
|
||||
{fullname = "cx", bytename = "cl", code = 0b001},
|
||||
{fullname = "dx", bytename = "dl", code = 0b010},
|
||||
{fullname = "bx", bytename = "bl", code = 0b011},
|
||||
{fullname = "sp", bytename = "ah", code = 0b100},
|
||||
{fullname = "bp", bytename = "ch", code = 0b101},
|
||||
{fullname = "si", bytename = "dh", code = 0b110},
|
||||
{fullname = "di", bytename = "bh", code = 0b111},
|
||||
}
|
||||
|
||||
segment_registers := [4]Register {
|
||||
{fullname = "es", code = 0b000},
|
||||
{fullname = "cs", code = 0b001},
|
||||
{fullname = "ss", code = 0b010},
|
||||
{fullname = "ds", code = 0b011},
|
||||
}
|
||||
|
||||
variable_port := registers[2]
|
||||
|
||||
total_bytes_processed := 0
|
||||
|
||||
get_i16 :: proc(data: []u8) -> i16 {
|
||||
return (i16)(data[1]) << 8 | (i16)(data[0])
|
||||
}
|
||||
|
||||
operand_is :: proc($T: typeid, opr: Operand) -> bool {
|
||||
_, ok := opr.(T)
|
||||
return ok
|
||||
}
|
||||
|
||||
get_repeat_op :: proc(data: u8) -> Repeat {
|
||||
bits := (data & 0b1110) >> 1
|
||||
w := (data & 0b1) == 1 ? "w" : "b"
|
||||
rep: string
|
||||
switch bits {
|
||||
case 0b010: rep = "movs"
|
||||
case 0b011: rep = "cmps"
|
||||
case 0b101: rep = "stos"
|
||||
case 0b110: rep = "lods"
|
||||
case 0b111: rep = "scas"
|
||||
}
|
||||
return Repeat(fmt.aprintf("%s%s", rep, w))
|
||||
}
|
||||
|
||||
try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) {
|
||||
for inst in instructions {
|
||||
if inst.encoding == (b & inst.mask) {
|
||||
return inst, true
|
||||
}
|
||||
}
|
||||
return InstructionInfo{}, false
|
||||
}
|
||||
|
||||
main :: proc() {
|
||||
f,err := os.open(os.args[1])
|
||||
if err != os.ERROR_NONE {
|
||||
fmt.eprintln("ERROR:", err)
|
||||
os.exit(1)
|
||||
}
|
||||
defer os.close(f)
|
||||
|
||||
data := make([]u8, 1024)
|
||||
bytes_read, err2 := os.read(f, data)
|
||||
if err2 != nil {
|
||||
// ...
|
||||
os.exit(1)
|
||||
}
|
||||
|
||||
if false {
|
||||
os.exit(0)
|
||||
}
|
||||
|
||||
// asdf :u16 = 0b00000110_11011101
|
||||
// asdf2 :i16 = (i16)(asdf)
|
||||
// fmt.printfln("%d", asdf2)
|
||||
print_at_end := false
|
||||
line_count := 0
|
||||
instruction_list := make([dynamic]string, 0, 512)
|
||||
instructions_list := make([dynamic]Instruction, 0, 512)
|
||||
|
||||
decode_data(&instructions_list, data[:], bytes_read)
|
||||
// for inst in instructions_list {
|
||||
// fmt.println(inst)
|
||||
// }
|
||||
if true {
|
||||
print_instructions_stdout(instructions_list[:])
|
||||
}
|
||||
}
|
@ -6,9 +6,14 @@ NC='\033[0m'
|
||||
|
||||
make asm_files > /dev/null
|
||||
|
||||
if [ ! "$(command -v ./sim8086)" ]; then
|
||||
echo -e "\nError: 'sim8086' executable not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for ASM_BIN in asm_files/*.bin;
|
||||
do
|
||||
./decoder8086 "$ASM_BIN" > output.asm 2> /dev/null
|
||||
./sim8086 "$ASM_BIN" > output.asm 2> /dev/null
|
||||
nasm output.asm -o output.bin 2> /dev/null
|
||||
ASM_FILE=${ASM_BIN%.*}.asm
|
||||
if [ ! -e output.bin ]; then
|
||||
|
123
types.odin
Normal file
123
types.odin
Normal file
@ -0,0 +1,123 @@
|
||||
package sim_8086
|
||||
|
||||
Register :: struct {
|
||||
fullname: string,
|
||||
bytename: string,
|
||||
value: struct #raw_union {
|
||||
using _: struct {
|
||||
low, high: byte,
|
||||
},
|
||||
full: u16,
|
||||
},
|
||||
code: u8,
|
||||
}
|
||||
|
||||
WordSize :: enum {
|
||||
None,
|
||||
LastBit,
|
||||
FourthBit,
|
||||
Always8,
|
||||
Always16,
|
||||
}
|
||||
|
||||
None :: struct {}
|
||||
|
||||
Disp8 :: i8
|
||||
Disp16 :: i16
|
||||
Displacement :: union {
|
||||
None,
|
||||
Disp8,
|
||||
Disp16
|
||||
}
|
||||
|
||||
RegisterId :: distinct u8
|
||||
Immediate8 :: distinct i8
|
||||
Immediate16 :: distinct i16
|
||||
ImmediateU8 :: distinct u8
|
||||
MemoryAddr :: struct {
|
||||
addr_id: u8,
|
||||
displacement: Displacement,
|
||||
}
|
||||
DirectAddress :: distinct i16
|
||||
SegmentRegister :: distinct i8
|
||||
Jump :: distinct i8
|
||||
VariablePort :: struct {}
|
||||
ShiftRotate :: distinct bool
|
||||
Repeat :: string
|
||||
Intersegment :: struct {
|
||||
ip: i16,
|
||||
cs: i16,
|
||||
}
|
||||
DirectWithinSegment :: distinct u16
|
||||
|
||||
Operand :: union {
|
||||
None,
|
||||
RegisterId,
|
||||
Immediate8,
|
||||
ImmediateU8,
|
||||
Immediate16,
|
||||
MemoryAddr,
|
||||
DirectAddress,
|
||||
SegmentRegister,
|
||||
Jump,
|
||||
VariablePort,
|
||||
ShiftRotate,
|
||||
Repeat,
|
||||
DirectWithinSegment,
|
||||
Intersegment,
|
||||
}
|
||||
|
||||
OperandInfo :: enum {
|
||||
None,
|
||||
Register,
|
||||
SegmentRegister,
|
||||
RegisterMemory,
|
||||
Immediate,
|
||||
ImmediateUnsigned,
|
||||
Accumulator,
|
||||
DirectAddress,
|
||||
Jump,
|
||||
VariablePort,
|
||||
ShiftRotate,
|
||||
Repeat,
|
||||
DirectWithinSegment,
|
||||
Intersegment,
|
||||
}
|
||||
|
||||
RegisterEncodingBits :: enum {
|
||||
None,
|
||||
FirstByteLast3,
|
||||
SecondByteMiddle3,
|
||||
SecondByteLast3,
|
||||
FirstByteMiddle3,
|
||||
}
|
||||
|
||||
InstructionInfo :: struct {
|
||||
mask: u8,
|
||||
encoding: u8,
|
||||
opname: Op,
|
||||
desc: string,
|
||||
src: OperandInfo,
|
||||
dst: OperandInfo,
|
||||
word_size: WordSize,
|
||||
reg_info: RegisterEncodingBits,
|
||||
has_flip: bool,
|
||||
has_sign_extension: bool,
|
||||
check_second_encoding: bool,
|
||||
consume_extra_bytes: int,
|
||||
shift_rotate_flag: bool,
|
||||
}
|
||||
|
||||
Instruction :: struct {
|
||||
opname: Op,
|
||||
src: Operand,
|
||||
dst: Operand,
|
||||
info: InstructionInfo,
|
||||
is_word: bool,
|
||||
indirect_intersegment: bool,
|
||||
has_segment: Maybe(Register),
|
||||
has_lock: bool,
|
||||
bytes_read: int,
|
||||
raw_data: []u8,
|
||||
debug_msg: string,
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user