432 lines
12 KiB
C
432 lines
12 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <sys/stat.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include "lib.h"
|
|
#include "decode.h"
|
|
|
|
/// Get Effective Address Calculation Registers
|
|
char* get_eac_register(char rm)
|
|
{
|
|
char* reg_name;
|
|
switch (rm)
|
|
{
|
|
case 0b000: reg_name = "bx + si"; break;
|
|
case 0b001: reg_name = "bx + di"; break;
|
|
case 0b010: reg_name = "bp + si"; break;
|
|
case 0b011: reg_name = "bp + di"; break;
|
|
case 0b100: reg_name = "si"; break;
|
|
case 0b101: reg_name = "di"; break;
|
|
case 0b110: reg_name = "bp"; break;
|
|
case 0b111: reg_name = "bx"; break;
|
|
default: perror("Invalid R/M value"); exit(1);
|
|
}
|
|
return reg_name;
|
|
}
|
|
|
|
static char* reg_name(Register reg, char wide)
|
|
{
|
|
return wide == 1 ? reg.fullname : reg.bytename;
|
|
}
|
|
|
|
static u8 mask_and_shift(u8 value, u8 mask)
|
|
{
|
|
value &= mask;
|
|
int count = 0;
|
|
while ((mask & 0x1) == 0 && count < 8)
|
|
{
|
|
value >>= 1;
|
|
mask >>= 1;
|
|
count++;
|
|
}
|
|
return value;
|
|
}
|
|
|
|
ParsedInstruction parse_instruction_ids(u8* buf)
|
|
{
|
|
u8 inst = buf[0];
|
|
InstFormat fmt = {0};
|
|
bool matched_inst = false;
|
|
// TODO: This might be a good time to learn how to make a hashtable in C
|
|
for (u16 i = 0; i < sizeof(inst_funcs) / (sizeof(inst_parser_f)*6*4); i++)
|
|
for (int j = 0; j < 6; j++)
|
|
for (int k = 0; k < 4 && inst_funcs[i][j][k] != NULL; k++)
|
|
{
|
|
printf("%p\n", (void*)(uptr)inst_funcs[i][j][k]);
|
|
// Apply inst_func_t
|
|
}
|
|
// for (int j = 0; j < 4 || ;)
|
|
return (ParsedInstruction){0};
|
|
if (!matched_inst)
|
|
return (ParsedInstruction){.bytes_read = 0};
|
|
u8_opt d_opt = none_u8();
|
|
u8_opt s_opt = none_u8();
|
|
u8_opt w_opt = none_u8();
|
|
u8_opt reg_opt = none_u8();
|
|
u8_opt mod_opt = none_u8();
|
|
u8_opt rm_opt = none_u8();
|
|
u16_opt data_opt = none_u16();
|
|
u16_opt displacement_opt = none_u16();
|
|
u8 is_data_addr = false;
|
|
|
|
u16 bytes_read = 1;
|
|
bytes_read += fmt.has_operands ? 1 : 0;
|
|
|
|
if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1);
|
|
if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1);
|
|
if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
|
|
if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6);
|
|
if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w));
|
|
if (fmt.parse_reg.tag == P_REG_MASK)
|
|
{
|
|
u8 reg = fmt.has_operands ? buf[1] : buf[0];
|
|
reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask));
|
|
}
|
|
else if (fmt.parse_reg.tag == P_REG_FIXED)
|
|
{
|
|
reg_opt = some_u8(fmt.parse_reg.fixed);
|
|
is_data_addr = true;
|
|
}
|
|
if (fmt.has_data)
|
|
{
|
|
u8 idx = 1;
|
|
if (fmt.has_operands) idx += 1;
|
|
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
|
|
// respectively but mod == 3 wraps to 0 since it doesn't displace
|
|
if (fmt.has_displacement) idx += mod_opt.value % 3;
|
|
u16 data;
|
|
if (fmt.has_s && s_opt.value == 1)
|
|
{
|
|
data = (sbyte)buf[idx];
|
|
bytes_read += 1;
|
|
}
|
|
else
|
|
{
|
|
data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx];
|
|
bytes_read += w_opt.value == 0 ? 1 : 2;
|
|
}
|
|
data_opt = some_u16(data);
|
|
}
|
|
if (fmt.has_displacement && mod_opt.value % 3 > 0)
|
|
{
|
|
u16 disp = mod_opt.value == MODE_MEM_DIS_16
|
|
? (i16)buf[3] << 8 | buf[2]
|
|
: (sbyte)buf[2];
|
|
displacement_opt = some_u16(disp);
|
|
bytes_read += mod_opt.value % 3;
|
|
}
|
|
else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6)
|
|
{
|
|
displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]);
|
|
bytes_read += 2;
|
|
}
|
|
|
|
return (ParsedInstruction) {
|
|
.id = fmt.id,
|
|
.name = fmt.name,
|
|
.data = data_opt,
|
|
.displacement = displacement_opt,
|
|
.w = w_opt,
|
|
.d = d_opt,
|
|
.s = s_opt,
|
|
.mod = mod_opt,
|
|
.reg = reg_opt,
|
|
.rm = rm_opt,
|
|
.is_data_addr = is_data_addr,
|
|
.bytes_read = bytes_read,
|
|
};
|
|
}
|
|
ParsedInstruction parse_instruction(u8* buf)
|
|
{
|
|
u8 inst = buf[0];
|
|
InstFormat fmt = {0};
|
|
bool matched_inst = false;
|
|
// TODO: This might be a good time to learn how to make a hashtable in C
|
|
for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++)
|
|
{
|
|
if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc)
|
|
{
|
|
fmt = inst_formats[i];
|
|
matched_inst = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!matched_inst)
|
|
return (ParsedInstruction){.bytes_read = 0};
|
|
u8_opt d_opt = none_u8();
|
|
u8_opt s_opt = none_u8();
|
|
u8_opt w_opt = none_u8();
|
|
u8_opt reg_opt = none_u8();
|
|
u8_opt mod_opt = none_u8();
|
|
u8_opt rm_opt = none_u8();
|
|
u16_opt data_opt = none_u16();
|
|
u16_opt displacement_opt = none_u16();
|
|
u8 is_data_addr = false;
|
|
|
|
u16 bytes_read = 1;
|
|
bytes_read += fmt.has_operands ? 1 : 0;
|
|
|
|
if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1);
|
|
if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1);
|
|
if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
|
|
if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6);
|
|
if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w));
|
|
if (fmt.parse_reg.tag == P_REG_MASK)
|
|
{
|
|
u8 reg = fmt.has_operands ? buf[1] : buf[0];
|
|
reg_opt = some_u8(mask_and_shift(reg, fmt.parse_reg.mask));
|
|
}
|
|
else if (fmt.parse_reg.tag == P_REG_FIXED)
|
|
{
|
|
reg_opt = some_u8(fmt.parse_reg.fixed);
|
|
is_data_addr = true;
|
|
}
|
|
if (fmt.has_data)
|
|
{
|
|
u8 idx = 1;
|
|
if (fmt.has_operands) idx += 1;
|
|
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
|
|
// respectively but mod == 3 wraps to 0 since it doesn't displace
|
|
if (fmt.has_displacement) idx += mod_opt.value % 3;
|
|
u16 data;
|
|
if (fmt.has_s && s_opt.value == 1)
|
|
{
|
|
data = (sbyte)buf[idx];
|
|
bytes_read += 1;
|
|
}
|
|
else
|
|
{
|
|
data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx];
|
|
bytes_read += w_opt.value == 0 ? 1 : 2;
|
|
}
|
|
data_opt = some_u16(data);
|
|
}
|
|
if (fmt.has_displacement && mod_opt.value % 3 > 0)
|
|
{
|
|
u16 disp = mod_opt.value == MODE_MEM_DIS_16
|
|
? (i16)buf[3] << 8 | buf[2]
|
|
: (sbyte)buf[2];
|
|
displacement_opt = some_u16(disp);
|
|
bytes_read += mod_opt.value % 3;
|
|
}
|
|
else if (fmt.has_displacement && mod_opt.value == MODE_MEM_NO_DIS && rm_opt.value == 0x6)
|
|
{
|
|
displacement_opt = some_u16((i16)buf[3] << 8 | buf[2]);
|
|
bytes_read += 2;
|
|
}
|
|
|
|
return (ParsedInstruction) {
|
|
.id = fmt.id,
|
|
.name = fmt.name,
|
|
.data = data_opt,
|
|
.displacement = displacement_opt,
|
|
.w = w_opt,
|
|
.d = d_opt,
|
|
.s = s_opt,
|
|
.mod = mod_opt,
|
|
.reg = reg_opt,
|
|
.rm = rm_opt,
|
|
.is_data_addr = is_data_addr,
|
|
.bytes_read = bytes_read,
|
|
};
|
|
}
|
|
|
|
Instruction decode_instruction(ParsedInstruction inst)
|
|
{
|
|
Operand opr1 , opr2 = {0};
|
|
i16 payload = 0;
|
|
|
|
IF_LET_SOME(u8, mod, inst.mod)
|
|
{
|
|
IF_LET_SOME(u8, reg, inst.reg)
|
|
{
|
|
opr1.tag = OPR_T_REGISTER;
|
|
opr1.reg.value = registers[(size_t)reg];
|
|
opr1.reg.wide = inst.w.value;
|
|
}
|
|
else
|
|
{
|
|
opr1.tag = OPR_T_IMMEDIATE;
|
|
opr1.imm.value = inst.data.value;
|
|
// TODO: This is dumb, we shouldn't do it this way
|
|
if (inst.s.value == 1) opr1.imm.direct = 0;
|
|
else opr1.imm.direct = inst.w.value + 1;
|
|
}
|
|
if (mod == MODE_RGSTR_MODE)
|
|
{
|
|
opr2.tag = OPR_T_REGISTER;
|
|
opr2.reg.value = registers[(size_t)inst.rm.value];
|
|
opr2.reg.wide = inst.w.value;
|
|
}
|
|
else if (mod == MODE_MEM_NO_DIS && inst.rm.value == 0x6)
|
|
{
|
|
opr2.tag = OPR_T_DIRADDR;
|
|
opr2.dir_addr.value = inst.displacement.value;
|
|
}
|
|
else
|
|
{
|
|
opr2.tag = OPR_T_MEMORY;
|
|
opr2.mem.eac_name = get_eac_register(inst.rm.value);
|
|
opr2.mem.mode = mod;
|
|
opr2.mem.displacement = (i16)inst.displacement.value;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
IF_LET_SOME(u16, data, inst.data)
|
|
{
|
|
if (inst.is_data_addr)
|
|
{
|
|
opr1.tag = OPR_T_DIRADDR;
|
|
opr1.dir_addr.value = (i16)data;
|
|
}
|
|
else
|
|
{
|
|
opr1.tag = OPR_T_IMMEDIATE;
|
|
opr1.imm.value = (i16)data;
|
|
opr1.imm.direct = 0;
|
|
}
|
|
}
|
|
IF_LET_SOME(u8, reg, inst.reg)
|
|
{
|
|
opr2.tag = OPR_T_REGISTER;
|
|
opr2.reg.value = registers[(size_t)reg];
|
|
opr2.reg.wide = inst.w.value;
|
|
}
|
|
}
|
|
if (inst.d.tag == SOME && inst.d.value == 1)
|
|
{
|
|
Operand temp = opr1;
|
|
opr1 = opr2;
|
|
opr2 = temp;
|
|
}
|
|
return (Instruction) {
|
|
.id = inst.id,
|
|
.data = payload,
|
|
.operation = inst.name,
|
|
.src_opr = opr1,
|
|
.dst_opr = opr2,
|
|
};
|
|
}
|
|
|
|
void get_operand_string(char* str_buf, Operand oprnd)
|
|
{
|
|
if (oprnd.tag == OPR_T_REGISTER)
|
|
{
|
|
strcpy(str_buf, reg_name(oprnd.reg.value, oprnd.reg.wide));
|
|
}
|
|
else if (oprnd.tag == OPR_T_MEMORY)
|
|
{
|
|
char disp_str[16] = {'\0'};
|
|
i16 disp = oprnd.mem.displacement;
|
|
if (disp != 0)
|
|
sprintf(disp_str, " %s %d", (disp > 0 ? "+" : "-"), abs(disp));
|
|
sprintf(str_buf, "[%s%s]", oprnd.mem.eac_name, disp_str);
|
|
}
|
|
else if (oprnd.tag == OPR_T_IMMEDIATE)
|
|
{
|
|
char *size = "";
|
|
if (oprnd.imm.direct > 0)
|
|
size = oprnd.imm.direct == 1 ? "byte " : "word ";
|
|
sprintf(str_buf, "%s%d", size, oprnd.imm.value);
|
|
}
|
|
else if (oprnd.tag == OPR_T_DIRADDR)
|
|
{
|
|
sprintf(str_buf, "[%d]", oprnd.dir_addr.value);
|
|
}
|
|
}
|
|
|
|
void get_instr_string(char* str_buf, Instruction inst)
|
|
{
|
|
char src_str[32], dst_str[32];
|
|
get_operand_string(src_str, inst.src_opr);
|
|
get_operand_string(dst_str, inst.dst_opr);
|
|
sprintf(str_buf, "%s %s, %s", inst.operation, dst_str, src_str);
|
|
}
|
|
|
|
char *memory[65536];
|
|
// Keep this global for debugging purposes
|
|
u16 inst_count = 1;
|
|
|
|
int main(int argc, char** argv)
|
|
{
|
|
if (argc < 2)
|
|
{
|
|
printf("Usage: Please provide assembled instructions as input\n");
|
|
exit(0);
|
|
}
|
|
|
|
struct stat st;
|
|
if (stat(argv[1], &st) == -1)
|
|
{
|
|
perror("Unable to get file size\n");
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
unsigned char* buffer = malloc(st.st_size);
|
|
if (!buffer)
|
|
{
|
|
perror("Unable to allocate memory for binary file");
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
FILE *f = fopen(argv[1], "r");
|
|
if (!f)
|
|
{
|
|
perror("fopen\n");
|
|
free(buffer);
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
size_t bytes_read = fread(buffer, sizeof(unsigned char), st.st_size, f);
|
|
if (bytes_read != (size_t)st.st_size)
|
|
{
|
|
fprintf(stderr, "Read of binary file to memory incomplete.\n");
|
|
free(buffer);
|
|
fclose(f);
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
fclose(f);
|
|
|
|
printf("; Decoded 8086 Assembly Instructions\n\n");
|
|
printf("bits 16\n\n");
|
|
|
|
char *inst_str_buf = malloc(sizeof(char) * 256);
|
|
u32 bytes_processed = 0;
|
|
while (bytes_processed < bytes_read)
|
|
{
|
|
ParsedInstruction _ = parse_instruction_ids(buffer + bytes_processed);
|
|
(void)_;
|
|
ParsedInstruction parsed = parse_instruction(buffer + bytes_processed);
|
|
|
|
if (parsed.bytes_read > 0)
|
|
{
|
|
Instruction inst = decode_instruction(parsed);
|
|
get_instr_string(inst_str_buf, inst);
|
|
bytes_processed += parsed.bytes_read;
|
|
// printf("%s %d/%ld", inst_str_buf, bytes_processed, bytes_read);
|
|
printf("%s", inst_str_buf);
|
|
int len = strlen(inst_str_buf);
|
|
for (int i = 0; i < 32 - len; i++)
|
|
printf(" ");
|
|
printf("; %d, %d", inst_count++, inst.id);
|
|
}
|
|
else
|
|
{
|
|
bytes_processed += 1;
|
|
fprintf(stderr, "___Unrecognized Instruction___");
|
|
}
|
|
// char inst = buffer[0];
|
|
// if (mov_inst(f, buffer, inst)) goto handled;
|
|
// if (add_inst(f, buffer, inst)) goto handled;
|
|
// handled:
|
|
printf("\n");
|
|
}
|
|
free(inst_str_buf);
|
|
free(buffer);
|
|
|
|
return 0;
|
|
}
|