237 lines
6.9 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include "lib.h"
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
enum Instruction
{
INST_MOV_REG_REG = 0b10001000,
// INST_MOV_REG_REG = 0b10001000,
// INST_MOV_REG_REG = 0b10001000,
};
enum Mode
{
MODE_MEM_NO_DIS = 0b00,
MODE_MEM_DIS_08 = 0b01,
MODE_MEM_DIS_16 = 0b10,
MODE_RGSTR_MODE = 0b11,
};
typedef struct Register
{
char code;
char* fullname;
char* bytename;
union
{
struct
{
char low;
char high;
};
u16 full;
} value;
} Register;
Register registers[8] = {
{.code = 0b000, .fullname = "ax", .bytename = "al"},
{.code = 0b001, .fullname = "cx", .bytename = "cl"},
{.code = 0b010, .fullname = "dx", .bytename = "dl"},
{.code = 0b011, .fullname = "bx", .bytename = "bl"},
{.code = 0b100, .fullname = "sp", .bytename = "ah"},
{.code = 0b101, .fullname = "bp", .bytename = "ch"},
{.code = 0b110, .fullname = "si", .bytename = "dh"},
{.code = 0b111, .fullname = "di", .bytename = "bh"},
};
char* memory[65536];
// void inst_mov_rgmm_reg()
/// Get Effective Address Calculation Registers
char* get_eac_registers(char rm)
{
char* reg_name;
switch (rm)
{
case 0b000:
reg_name = "bx + si";
break;
case 0b001:
reg_name = "bx + di";
break;
case 0b010:
reg_name = "bp + si";
break;
case 0b011:
reg_name = "bp + di";
break;
case 0b100:
reg_name = "si";
break;
case 0b101:
reg_name = "di";
break;
case 0b110:
reg_name = "bp";
break;
case 0b111:
reg_name = "bx";
break;
default:
perror("Invalid R/M value");
exit(1);
}
return reg_name;
}
static inline char* reg_name(Register reg, char wide)
{
return wide == 1 ? reg.fullname : reg.bytename;
}
static inline i16 get_data(unsigned char* buf, char wide)
{
// Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness
return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0];
}
int main(int argc, char** argv)
{
if (argc < 2)
{
printf("Usage: Please provide assembled instructions as input\n");
exit(0);
}
unsigned char buf[256];
FILE *f = fopen(argv[1], "r");
if (!f)
{
perror("fopen\n");
return EXIT_FAILURE;
}
size_t bytes_read;
printf("; Decoded 8086 Assembly Instructions\n\n");
printf("bits 16\n\n");
while ((bytes_read = fread(buf, sizeof(char), 1, f)) > 0)
{
char inst = buf[0];
// Instruction instruction = 0;
// Register/memory to/from register
if ((inst & ~0x3) == (char)0b10001000)
{
// TODO: We should add some form of error handling here
bytes_read = fread(buf, sizeof(char), 1, f);
char next_byte = buf[0];
char w = inst & 0b00000001;
char d = (inst & 0b00000010) >> 1;
char mod = (next_byte & 0b11000000) >> 6;
char reg = (next_byte & 0b00111000) >> 3;
char rm = (next_byte & 0b00000111);
size_t reg_idx = reg;
size_t rm_idx = rm;
if (mod == MODE_RGSTR_MODE)
{
Register src_reg = d == 0 ? registers[reg_idx] : registers[rm_idx];
Register dst_reg = d == 0 ? registers[rm_idx] : registers[reg_idx];
printf("mov %s, %s ;0", reg_name(dst_reg, w), reg_name(src_reg, w));
}
else
{
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
// respectively but mod == 3 wraps to 0 since it doesn't displace
int bytes_to_read = mod % 3;
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
char* eac_name = get_eac_registers(rm);
char disp_buf[16] = {'\0'};
if (bytes_to_read > 0)
{
i16 disp = get_data(buf, bytes_to_read - 1);
sprintf(disp_buf, " %s %hd", disp >= 0 ? "+" : "-", abs(disp));
}
Register reg = registers[reg_idx];
if (d) printf("mov %s, [%s%s] ;1", reg_name(reg, w), eac_name, disp_buf);
else printf("mov [%s%s], %s ;2", eac_name, disp_buf, reg_name(reg, w));
}
}
// Immediate to register/memory
else if ((inst & ~0x1) == (char)0b11000110)
{
char w = inst & 0b00000001;
bytes_read = fread(buf, sizeof(char), 1, f);
char mod = (buf[0] & 0b11000000) >> 6;
char rm = (buf[0] & 0b00000111);
int bytes_to_read = 1;
bytes_to_read += w == 0 ? 0 : 1;
// Same trick from earlier, see comment
bytes_to_read += mod % 3;
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
char *eac_name = get_eac_registers(rm);
char *data_ptr = (char*)buf + (char)bytes_to_read - (w == 0 ? 1 : 2);
i16 data = w == 0 ? data_ptr[0] : (i16)data_ptr[1] << 8 | data_ptr[0];
char *word = w == 0 ? "byte" : "word";
char disp_str[16];
if (mod > 0 && mod < 3)
{
if (mod == 1)
{
sprintf(disp_str, " + %d", buf[1]);
}
else
{
i16 disp = (i16)buf[1] << 8 | buf[0];
sprintf(disp_str, " + %d", disp);
}
}
else
{
disp_str[0] = '\0';
}
printf("mov [%s%s], %s %d ;w %d mod %d rm %d", eac_name, disp_str, word, data, w, mod, rm);
}
// Immediate to register
else if ((inst & ~0xF) == (char)0b10110000)
{
char w = (inst & 0b00001000) >> 3;
Register reg = registers[(size_t)inst & 0b00000111];
char bytes_to_read = w == 1 ? 2 : 1;
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
printf("mov %s, %hd ; Immediate to register", reg_name(reg, w), get_data(buf, w));
}
// Memory to accumulator
else if ((inst & ~0x1) == (char)0b10100000)
{
printf("mov mem to acc");
}
// Accumulator to memory
else if ((inst & ~0x1) == (char)0b10100010)
{
printf("mov acc to mem");
}
// Register/memory to segment register
else if (inst == (char)0b10001110)
{
printf("mov regmem to segreg");
}
// Segment register to register/memory
else if (inst == (char)0b10001100)
{
printf("mov segreg to regmem");
}
else
{
fprintf(stderr, "Unrecognized Instruction");
}
printf("\n");
}
}