479 lines
15 KiB
C
479 lines
15 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include "lib.h"
|
|
#include <sys/stat.h>
|
|
|
|
enum InstructionType
|
|
{
|
|
INST_MOV_REG_REG = 0b10001000,
|
|
// INST_MOV_REG_REG = 0b10001000,
|
|
// INST_MOV_REG_REG = 0b10001000,
|
|
};
|
|
|
|
enum Mode
|
|
{
|
|
MODE_MEM_NO_DIS = 0b00,
|
|
MODE_MEM_DIS_08 = 0b01,
|
|
MODE_MEM_DIS_16 = 0b10,
|
|
MODE_RGSTR_MODE = 0b11,
|
|
};
|
|
|
|
typedef struct Register
|
|
{
|
|
char code;
|
|
char* fullname;
|
|
char* bytename;
|
|
union {
|
|
struct {
|
|
char low;
|
|
char high;
|
|
};
|
|
u16 full;
|
|
} value;
|
|
} Register;
|
|
|
|
Register registers[8] = {
|
|
{.code = 0b000, .fullname = "ax", .bytename = "al"},
|
|
{.code = 0b001, .fullname = "cx", .bytename = "cl"},
|
|
{.code = 0b010, .fullname = "dx", .bytename = "dl"},
|
|
{.code = 0b011, .fullname = "bx", .bytename = "bl"},
|
|
{.code = 0b100, .fullname = "sp", .bytename = "ah"},
|
|
{.code = 0b101, .fullname = "bp", .bytename = "ch"},
|
|
{.code = 0b110, .fullname = "si", .bytename = "dh"},
|
|
{.code = 0b111, .fullname = "di", .bytename = "bh"},
|
|
};
|
|
|
|
typedef struct Instruction
|
|
{
|
|
u16 id;
|
|
char *name;
|
|
u16_opt data;
|
|
u16_opt displacement;
|
|
u8_opt w;
|
|
u8_opt d;
|
|
u8_opt s;
|
|
u8_opt mod;
|
|
u8_opt reg;
|
|
u8_opt rm;
|
|
u8_opt SR;
|
|
u8 bytes_read;
|
|
} Instruction;
|
|
|
|
char *memory[65536];
|
|
|
|
/// Get Effective Address Calculation Registers
|
|
char* get_eac_registers(char rm)
|
|
{
|
|
char* reg_name;
|
|
switch (rm)
|
|
{
|
|
case 0b000: reg_name = "bx + si"; break;
|
|
case 0b001: reg_name = "bx + di"; break;
|
|
case 0b010: reg_name = "bp + si"; break;
|
|
case 0b011: reg_name = "bp + di"; break;
|
|
case 0b100: reg_name = "si"; break;
|
|
case 0b101: reg_name = "di"; break;
|
|
case 0b110: reg_name = "bp"; break;
|
|
case 0b111: reg_name = "bx"; break;
|
|
default: perror("Invalid R/M value"); exit(1);
|
|
}
|
|
return reg_name;
|
|
}
|
|
|
|
static char* reg_name(Register reg, char wide)
|
|
{
|
|
return wide == 1 ? reg.fullname : reg.bytename;
|
|
}
|
|
|
|
static i16 get_data(unsigned char* buf, char wide)
|
|
{
|
|
// Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness
|
|
return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0];
|
|
}
|
|
|
|
static u8 mask_and_shift(u8 value, u8 mask)
|
|
{
|
|
value &= mask;
|
|
int count = 0;
|
|
while ((mask & 0x1) == 0 && count < 8)
|
|
{
|
|
value >>= 1;
|
|
mask >>= 1;
|
|
count++;
|
|
}
|
|
return value;
|
|
}
|
|
|
|
typedef struct InstFormat
|
|
{
|
|
u16 id;
|
|
char *name;
|
|
u8 inst_enc;
|
|
u8 mask_inst;
|
|
u8 mask_w;
|
|
u8 mask_reg;
|
|
bool has_operands;
|
|
bool has_displacement;
|
|
bool has_data;
|
|
bool has_d;
|
|
bool has_w;
|
|
bool has_reg;
|
|
bool has_mod;
|
|
bool has_rm;
|
|
bool has_s;
|
|
bool has_SR;
|
|
} InstFormat;
|
|
|
|
InstFormat inst_formats[] =
|
|
{
|
|
////////
|
|
// MOV
|
|
////////
|
|
// Register/memory to/from register
|
|
{.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .mask_reg=0b00111000,
|
|
.mask_w=0x1, .has_operands=true, .has_displacement=true, .has_d=true, .has_w=true,
|
|
.has_reg=true, .has_mod=true, .has_rm=true},
|
|
// Immediate to register/memory
|
|
{.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true,
|
|
.has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true},
|
|
// Immediate to register
|
|
{.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8,
|
|
.mask_reg=0b00000111, .has_data=true, .has_w=true},
|
|
// Memory to accumulator | Accumulator to memory using the `d` bit
|
|
// even though the manual doesn't specify it
|
|
{.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1,
|
|
.has_data=true, .has_w=true, .has_d=true},
|
|
// Register/memory to segment register and inverse using the `d` bit
|
|
{.id=5, .name="mov", .inst_enc=0b10001100, .mask_inst=0x3, .has_SR=true, .has_d=true,
|
|
.has_displacement=true, .has_mod=true, .has_rm=true},
|
|
////////
|
|
// ADD
|
|
////////
|
|
// Reg/memory with register or either
|
|
{.id=6, .name="add", .inst_enc=0b00000000, .mask_inst=0x3, .mask_w=0x1,
|
|
.has_operands=true, .has_displacement=true, .has_w=true,
|
|
.has_d=true, .has_reg=true, .has_mod=true, .has_rm=true},
|
|
// Immediate to register/memory
|
|
{.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true,
|
|
.has_operands=true, .has_displacement=true, .has_data=true, .has_mod=true, .has_rm=true},
|
|
{.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1, .has_data=true, .has_w=true},
|
|
};
|
|
|
|
Instruction parse_instruction(u8* buf)
|
|
{
|
|
u8 inst = buf[0];
|
|
InstFormat fmt;
|
|
bool matched_inst = false;
|
|
// TODO: This might be a good time to learn how to make a hashtable in C
|
|
for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++)
|
|
{
|
|
if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc)
|
|
{
|
|
fmt = inst_formats[i];
|
|
matched_inst = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!matched_inst)
|
|
return (Instruction){.bytes_read = 0};
|
|
u8_opt d_opt = none_u8();
|
|
u8_opt s_opt = none_u8();
|
|
u8_opt w_opt = none_u8();
|
|
u8_opt reg_opt = none_u8();
|
|
u8_opt mod_opt = none_u8();
|
|
u8_opt rm_opt = none_u8();
|
|
u16_opt data_opt = none_u16();
|
|
u16_opt displacement_opt = none_u16();
|
|
|
|
if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1);
|
|
if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1);
|
|
if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
|
|
if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6);
|
|
if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w));
|
|
if (fmt.has_reg)
|
|
{
|
|
u8 reg = fmt.has_operands ? buf[1] : buf[0];
|
|
reg_opt = some_u8(mask_and_shift(reg, fmt.mask_reg));
|
|
}
|
|
if (fmt.has_data)
|
|
{
|
|
u8 idx = 1;
|
|
if (fmt.has_operands) idx += 1;
|
|
if (fmt.has_displacement) idx += mod_opt.value % 3;
|
|
u16 data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[0] : (sbyte)buf[idx];
|
|
data_opt = some_u16(data);
|
|
}
|
|
if (fmt.has_displacement && mod_opt.value % 3 > 0)
|
|
{
|
|
u16 disp = mod_opt.value == MODE_MEM_DIS_16
|
|
? (i16)buf[3] << 8 | buf[2]
|
|
: (sbyte)buf[3];
|
|
displacement_opt = some_u16(disp);
|
|
}
|
|
|
|
u16 bytes_read = 1;
|
|
bytes_read += fmt.has_operands ? 1 : 0;
|
|
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
|
|
// respectively but mod == 3 wraps to 0 since it doesn't displace
|
|
if (fmt.has_displacement) bytes_read += mod_opt.value % 3;
|
|
if (fmt.has_data) bytes_read += w_opt.value == 0 ? 1 : 2;
|
|
return (Instruction) {
|
|
.id = fmt.id,
|
|
.name = fmt.name,
|
|
.data = data_opt,
|
|
.displacement = displacement_opt,
|
|
.w = w_opt,
|
|
.d = d_opt,
|
|
.s = s_opt,
|
|
.mod = mod_opt,
|
|
.reg = reg_opt,
|
|
.rm = rm_opt,
|
|
.bytes_read = bytes_read,
|
|
};
|
|
}
|
|
|
|
void decode_instruction(char* str_buf, Instruction inst)
|
|
{
|
|
IF_LET_SOME(u8, mod, inst.mod)
|
|
{
|
|
if (mod == MODE_RGSTR_MODE)
|
|
{
|
|
Register reg = registers[(size_t)inst.reg.value];
|
|
Register rm = registers[(size_t)inst.rm.value];
|
|
Register src_reg = inst.d.value == 0 ? reg : rm;
|
|
Register dst_reg = inst.d.value == 0 ? rm : reg;
|
|
char *src_name = reg_name(src_reg, inst.w.value);
|
|
char *dst_name = reg_name(dst_reg, inst.w.value);
|
|
sprintf(str_buf, "%s %s, %s ;%d", inst.name, dst_name, src_name, inst.id);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
sprintf(str_buf, "%s ;%d", inst.name, inst.id);
|
|
}
|
|
}
|
|
|
|
bool mov_inst(FILE* f, unsigned char* buf, char inst)
|
|
{
|
|
size_t bytes_read;
|
|
// Register/memory to/from register
|
|
if ((inst & ~0x3) == (char)0b10001000)
|
|
{
|
|
// TODO: We should add some form of error handling here
|
|
bytes_read = fread(buf, sizeof(char), 1, f);
|
|
char next_byte = buf[0];
|
|
char w = inst & 0b00000001;
|
|
char d = (inst & 0b00000010) >> 1;
|
|
char mod = (next_byte & 0b11000000) >> 6;
|
|
char reg = (next_byte & 0b00111000) >> 3;
|
|
char rm = (next_byte & 0b00000111);
|
|
if (mod == MODE_RGSTR_MODE)
|
|
{
|
|
Register src_reg = d == 0 ? registers[(size_t)reg] : registers[(size_t)rm];
|
|
Register dst_reg = d == 0 ? registers[(size_t)rm] : registers[(size_t)reg];
|
|
printf("mov %s, %s ;0", reg_name(dst_reg, w), reg_name(src_reg, w));
|
|
}
|
|
else
|
|
{
|
|
bool is_direct_addr = mod == 0 && rm == 0b110;
|
|
int bytes_to_read = is_direct_addr ? 2 : mod % 3;
|
|
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
|
char* eac_name = is_direct_addr ? "" : get_eac_registers(rm);
|
|
char disp_buf[16] = {'\0'};
|
|
if (bytes_to_read > 0)
|
|
{
|
|
i16 disp = get_data(buf, bytes_to_read - 1);
|
|
if (is_direct_addr) sprintf(disp_buf, "%d", abs(disp));
|
|
else sprintf(disp_buf, " %s %d", disp >= 0 ? "+" : "-", abs(disp));
|
|
}
|
|
Register rgstr = registers[(size_t)reg];
|
|
if (d) printf("mov %s, [%s%s] ;1", reg_name(rgstr, w), eac_name, disp_buf);
|
|
else printf("mov [%s%s], %s ;2", eac_name, disp_buf, reg_name(rgstr, w));
|
|
}
|
|
}
|
|
// Immediate to register/memory
|
|
else if ((inst & ~0x1) == (char)0b11000110)
|
|
{
|
|
bytes_read = fread(buf, sizeof(char), 1, f);
|
|
char w = inst & 0b00000001;
|
|
char mod = (buf[0] & 0b11000000) >> 6;
|
|
char rm = (buf[0] & 0b00000111);
|
|
int bytes_to_read = 1;
|
|
bytes_to_read += w == 0 ? 0 : 1;
|
|
// Same trick from earlier, see comment
|
|
bytes_to_read += mod % 3;
|
|
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
|
char *eac_name = get_eac_registers(rm);
|
|
i16 data = get_data(buf + (char)bytes_to_read - (w == 0 ? 1 : 2), w);
|
|
char *word_str = w == 0 ? "byte" : "word";
|
|
char disp_str[16] = {'\0'};
|
|
if (mod % 3 > 1) sprintf(disp_str, " + %d", get_data(buf, (mod % 3) - 1));
|
|
printf("mov [%s%s], %s %d ;3", eac_name, disp_str, word_str, data);
|
|
}
|
|
// Immediate to register
|
|
else if ((inst & ~0xF) == (char)0b10110000)
|
|
{
|
|
char w = (inst & 0b00001000) >> 3;
|
|
Register reg = registers[(size_t)inst & 0b00000111];
|
|
char bytes_to_read = w == 1 ? 2 : 1;
|
|
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
|
printf("mov %s, %d ;4", reg_name(reg, w), get_data(buf, w));
|
|
}
|
|
// Memory/accumulator to accumulator/memory
|
|
else if ((inst & ~0x3) == (char)0b10100000)
|
|
{
|
|
// This instruction uses AX/AL register exclusively
|
|
Register ax_al = registers[0];
|
|
char w = (inst & 0b00000001);
|
|
// The manual doesn't refer to this as `d` but it acts similarly in that this bit
|
|
// swaps the accumulator's src/dst position
|
|
char d = (inst & 0b00000010) >> 1;
|
|
char bytes_to_read = w == 1 ? 2 : 1;
|
|
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
|
if (d) printf("mov [%d], %s ;5", get_data(buf, w), reg_name(ax_al, w));
|
|
else printf("mov %s, [%d] ;6", reg_name(ax_al, w), get_data(buf, w));
|
|
}
|
|
// Register/memory to segment register or segment register to register/memory
|
|
else if ((inst & ~0x3) == (char)0b10001100)
|
|
{
|
|
// Manual doesn't refer to this as `d` but swaps like in the previous instruction
|
|
char d = (inst & 0b00000010) >> 1;
|
|
(void)d;
|
|
printf("mov regmem to segreg");
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
return bytes_read > 0;
|
|
}
|
|
|
|
bool add_inst(FILE* f, unsigned char* buf, char inst)
|
|
{
|
|
size_t bytes_read;
|
|
if ((inst & ~0x3) == (char)0b00000000)
|
|
{
|
|
bytes_read = fread(buf, sizeof(char), 1, f);
|
|
char next_byte = buf[0];
|
|
char w = inst & 0b00000001;
|
|
char d = (inst & 0b00000010) >> 1;
|
|
char mod = (next_byte & 0b11000000) >> 6;
|
|
char reg = (next_byte & 0b00111000) >> 3;
|
|
char rm = (next_byte & 0b00000111);
|
|
// Same trick from earlier, see comment
|
|
int bytes_to_read = mod % 3;
|
|
if (bytes_to_read > 0) bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
|
Register rgstr = registers[(size_t)reg];
|
|
(void)rm;
|
|
if (mod == MODE_RGSTR_MODE)
|
|
{
|
|
if (d) printf("add %s, [%d] ;7", reg_name(rgstr, w), get_data(buf, w));
|
|
else printf("add [%d], %s ;8", get_data(buf, w), reg_name(rgstr, w));
|
|
}
|
|
else if (mod == MODE_MEM_NO_DIS)
|
|
{
|
|
if (d) printf("add %s, [%s] ;9", reg_name(rgstr, w), get_eac_registers(rm));
|
|
else printf("add [%s], %s ;10", get_eac_registers(rm), reg_name(rgstr, w));
|
|
}
|
|
else
|
|
{
|
|
if (d) printf("add %s, [%s] ;11", reg_name(rgstr, w), get_eac_registers(rm));
|
|
else printf("add [%s], %s ;12", get_eac_registers(rm), reg_name(rgstr, w));
|
|
}
|
|
}
|
|
else if ((inst & ~0x3) == (char)0b10000000)
|
|
{
|
|
bytes_read = fread(buf, sizeof(char), 1, f);
|
|
char w = inst & 0b00000001;
|
|
char mod = (buf[0] & 0b11000000) >> 6;
|
|
char rm = (buf[0] & 0b00000111);
|
|
int bytes_to_read = 1;
|
|
bytes_to_read += w == 0 ? 1 : 2;
|
|
// Same trick from earlier, see comment
|
|
bytes_to_read += mod % 3;
|
|
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
|
char *eac_name = get_eac_registers(rm);
|
|
i16 data = get_data(buf + (char)bytes_to_read - (w == 0 ? 1 : 2), w);
|
|
char *word_str = w == 0 ? "byte" : "word";
|
|
char disp_str[16] = {'\0'};
|
|
if (mod % 3 > 1) sprintf(disp_str, " + %d", get_data(buf, (mod % 3) - 1));
|
|
printf("add [%s%s], %s %d ;13", eac_name, disp_str, word_str, data);
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
return bytes_read > 0;
|
|
}
|
|
|
|
int main(int argc, char** argv)
|
|
{
|
|
if (argc < 2)
|
|
{
|
|
printf("Usage: Please provide assembled instructions as input\n");
|
|
exit(0);
|
|
}
|
|
|
|
struct stat st;
|
|
if (stat(argv[1], &st) == -1)
|
|
{
|
|
perror("Unable to get file size\n");
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
unsigned char* buffer = malloc(st.st_size);
|
|
if (!buffer)
|
|
{
|
|
perror("Unable to allocate memory for binary file");
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
FILE *f = fopen(argv[1], "r");
|
|
if (!f)
|
|
{
|
|
perror("fopen\n");
|
|
free(buffer);
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
size_t bytes_read = fread(buffer, sizeof(unsigned char), st.st_size, f);
|
|
if (bytes_read != (size_t)st.st_size)
|
|
{
|
|
fprintf(stderr, "Read of binary file to memory incomplete.\n");
|
|
free(buffer);
|
|
fclose(f);
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
fclose(f);
|
|
|
|
printf("; Decoded 8086 Assembly Instructions\n\n");
|
|
printf("bits 16\n\n");
|
|
|
|
char *inst_str_buf = malloc(sizeof(char) * 256);
|
|
u32 bytes_processed = 0;
|
|
while (bytes_processed < bytes_read)
|
|
{
|
|
Instruction inst = parse_instruction(buffer + bytes_processed);
|
|
// char inst = buffer[0];
|
|
// if (mov_inst(f, buffer, inst)) goto handled;
|
|
// if (add_inst(f, buffer, inst)) goto handled;
|
|
|
|
if (inst.bytes_read > 0)
|
|
{
|
|
decode_instruction(inst_str_buf, inst);
|
|
printf("%s", inst_str_buf);
|
|
bytes_processed += inst.bytes_read;
|
|
}
|
|
else
|
|
{
|
|
bytes_processed += 1;
|
|
fprintf(stderr, "___Unrecognized Instruction___");
|
|
}
|
|
// handled:
|
|
printf("\n");
|
|
}
|
|
free(inst_str_buf);
|
|
free(buffer);
|
|
}
|