Compare commits

...

2 Commits

2 changed files with 244 additions and 53 deletions

290
decode.c
View File

@ -1,10 +1,9 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "lib.h" #include "lib.h"
#include <sys/stat.h>
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) enum InstructionType
enum Instruction
{ {
INST_MOV_REG_REG = 0b10001000, INST_MOV_REG_REG = 0b10001000,
// INST_MOV_REG_REG = 0b10001000, // INST_MOV_REG_REG = 0b10001000,
@ -24,15 +23,11 @@ typedef struct Register
char code; char code;
char* fullname; char* fullname;
char* bytename; char* bytename;
union {
union struct {
{
struct
{
char low; char low;
char high; char high;
}; };
u16 full; u16 full;
} value; } value;
} Register; } Register;
@ -48,9 +43,23 @@ Register registers[8] = {
{.code = 0b111, .fullname = "di", .bytename = "bh"}, {.code = 0b111, .fullname = "di", .bytename = "bh"},
}; };
char* memory[65536]; typedef struct Instruction
{
u16 id;
char *name;
u16_opt data;
u16_opt displacement;
u8_opt w;
u8_opt d;
u8_opt s;
u8_opt mod;
u8_opt reg;
u8_opt rm;
u8_opt SR;
u8 bytes_read;
} Instruction;
// void inst_mov_rgmm_reg() char *memory[65536];
/// Get Effective Address Calculation Registers /// Get Effective Address Calculation Registers
char* get_eac_registers(char rm) char* get_eac_registers(char rm)
@ -58,48 +67,192 @@ char* get_eac_registers(char rm)
char* reg_name; char* reg_name;
switch (rm) switch (rm)
{ {
case 0b000: case 0b000: reg_name = "bx + si"; break;
reg_name = "bx + si"; case 0b001: reg_name = "bx + di"; break;
break; case 0b010: reg_name = "bp + si"; break;
case 0b001: case 0b011: reg_name = "bp + di"; break;
reg_name = "bx + di"; case 0b100: reg_name = "si"; break;
break; case 0b101: reg_name = "di"; break;
case 0b010: case 0b110: reg_name = "bp"; break;
reg_name = "bp + si"; case 0b111: reg_name = "bx"; break;
break; default: perror("Invalid R/M value"); exit(1);
case 0b011:
reg_name = "bp + di";
break;
case 0b100:
reg_name = "si";
break;
case 0b101:
reg_name = "di";
break;
case 0b110:
reg_name = "bp";
break;
case 0b111:
reg_name = "bx";
break;
default:
perror("Invalid R/M value");
exit(1);
} }
return reg_name; return reg_name;
} }
static inline char* reg_name(Register reg, char wide) static char* reg_name(Register reg, char wide)
{ {
return wide == 1 ? reg.fullname : reg.bytename; return wide == 1 ? reg.fullname : reg.bytename;
} }
static inline i16 get_data(unsigned char* buf, char wide) static i16 get_data(unsigned char* buf, char wide)
{ {
// Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness // Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness
return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0]; return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0];
} }
static u8 mask_and_shift(u8 value, u8 mask)
{
value &= mask;
int count = 0;
while ((mask & 0x1) == 0 && count < 8)
{
value >>= 1;
mask >>= 1;
count++;
}
return value;
}
typedef struct InstFormat
{
u16 id;
char *name;
u8 inst_enc;
u8 mask_inst;
u8 mask_w;
u8 mask_reg;
bool has_operands;
bool has_displacement;
bool has_data;
bool has_d;
bool has_w;
bool has_reg;
bool has_mod;
bool has_rm;
bool has_s;
bool has_SR;
} InstFormat;
InstFormat inst_formats[] =
{
////////
// MOV
////////
// Register/memory to/from register
{.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .mask_reg=0b00111000,
.mask_w=0x1, .has_operands=true, .has_displacement=true, .has_d=true, .has_w=true,
.has_reg=true, .has_mod=true, .has_rm=true},
// Immediate to register/memory
{.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true,
.has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true},
// Immediate to register
{.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8,
.mask_reg=0b00000111, .has_data=true, .has_w=true},
// Memory to accumulator | Accumulator to memory using the `d` bit
// even though the manual doesn't specify it
{.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1,
.has_data=true, .has_w=true, .has_d=true},
// Register/memory to segment register and inverse using the `d` bit
{.id=5, .name="mov", .inst_enc=0b10001100, .mask_inst=0x3, .has_SR=true, .has_d=true,
.has_displacement=true, .has_mod=true, .has_rm=true},
////////
// ADD
////////
// Reg/memory with register or either
{.id=6, .name="add", .inst_enc=0b00000000, .mask_inst=0x3, .mask_w=0x1,
.has_operands=true, .has_displacement=true, .has_w=true,
.has_d=true, .has_reg=true, .has_mod=true, .has_rm=true},
// Immediate to register/memory
{.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true,
.has_operands=true, .has_displacement=true, .has_data=true, .has_mod=true, .has_rm=true},
{.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1, .has_data=true, .has_w=true},
};
Instruction parse_instruction(u8* buf)
{
u8 inst = buf[0];
InstFormat fmt;
bool matched_inst = false;
// TODO: This might be a good time to learn how to make a hashtable in C
for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++)
{
if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc)
{
fmt = inst_formats[i];
matched_inst = true;
break;
}
}
if (!matched_inst)
return (Instruction){.bytes_read = 0};
u8_opt d_opt = none_u8();
u8_opt s_opt = none_u8();
u8_opt w_opt = none_u8();
u8_opt reg_opt = none_u8();
u8_opt mod_opt = none_u8();
u8_opt rm_opt = none_u8();
u16_opt data_opt = none_u16();
u16_opt displacement_opt = none_u16();
if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1);
if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1);
if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6);
if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w));
if (fmt.has_reg)
{
u8 reg = fmt.has_operands ? buf[1] : buf[0];
reg_opt = some_u8(mask_and_shift(reg, fmt.mask_reg));
}
if (fmt.has_data)
{
u8 idx = 1;
if (fmt.has_operands) idx += 1;
if (fmt.has_displacement) idx += mod_opt.value % 3;
u16 data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[0] : (sbyte)buf[idx];
data_opt = some_u16(data);
}
if (fmt.has_displacement && mod_opt.value % 3 > 0)
{
u16 disp = mod_opt.value == MODE_MEM_DIS_16
? (i16)buf[3] << 8 | buf[2]
: (sbyte)buf[3];
displacement_opt = some_u16(disp);
}
u16 bytes_read = 1;
bytes_read += fmt.has_operands ? 1 : 0;
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
// respectively but mod == 3 wraps to 0 since it doesn't displace
if (fmt.has_displacement) bytes_read += mod_opt.value % 3;
if (fmt.has_data) bytes_read += w_opt.value == 0 ? 1 : 2;
return (Instruction) {
.id = fmt.id,
.name = fmt.name,
.data = data_opt,
.displacement = displacement_opt,
.w = w_opt,
.d = d_opt,
.s = s_opt,
.mod = mod_opt,
.reg = reg_opt,
.rm = rm_opt,
.bytes_read = bytes_read,
};
}
void decode_instruction(char* str_buf, Instruction inst)
{
IF_LET_SOME(u8, mod, inst.mod)
{
if (mod == MODE_RGSTR_MODE)
{
Register reg = registers[(size_t)inst.reg.value];
Register rm = registers[(size_t)inst.rm.value];
Register src_reg = inst.d.value == 0 ? reg : rm;
Register dst_reg = inst.d.value == 0 ? rm : reg;
char *src_name = reg_name(src_reg, inst.w.value);
char *dst_name = reg_name(dst_reg, inst.w.value);
sprintf(str_buf, "%s %s, %s ;%d", inst.name, dst_name, src_name, inst.id);
}
}
else
{
sprintf(str_buf, "%s ;%d", inst.name, inst.id);
}
}
bool mov_inst(FILE* f, unsigned char* buf, char inst) bool mov_inst(FILE* f, unsigned char* buf, char inst)
{ {
size_t bytes_read; size_t bytes_read;
@ -123,8 +276,6 @@ bool mov_inst(FILE* f, unsigned char* buf, char inst)
else else
{ {
bool is_direct_addr = mod == 0 && rm == 0b110; bool is_direct_addr = mod == 0 && rm == 0b110;
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
// respectively but mod == 3 wraps to 0 since it doesn't displace
int bytes_to_read = is_direct_addr ? 2 : mod % 3; int bytes_to_read = is_direct_addr ? 2 : mod % 3;
bytes_read = fread(buf, sizeof(char), bytes_to_read, f); bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
char* eac_name = is_direct_addr ? "" : get_eac_registers(rm); char* eac_name = is_direct_addr ? "" : get_eac_registers(rm);
@ -262,27 +413,66 @@ int main(int argc, char** argv)
printf("Usage: Please provide assembled instructions as input\n"); printf("Usage: Please provide assembled instructions as input\n");
exit(0); exit(0);
} }
unsigned char buf[256];
struct stat st;
if (stat(argv[1], &st) == -1)
{
perror("Unable to get file size\n");
return EXIT_FAILURE;
}
unsigned char* buffer = malloc(st.st_size);
if (!buffer)
{
perror("Unable to allocate memory for binary file");
return EXIT_FAILURE;
}
FILE *f = fopen(argv[1], "r"); FILE *f = fopen(argv[1], "r");
if (!f) if (!f)
{ {
perror("fopen\n"); perror("fopen\n");
free(buffer);
return EXIT_FAILURE; return EXIT_FAILURE;
} }
size_t bytes_read; size_t bytes_read = fread(buffer, sizeof(unsigned char), st.st_size, f);
if (bytes_read != (size_t)st.st_size)
{
fprintf(stderr, "Read of binary file to memory incomplete.\n");
free(buffer);
fclose(f);
return EXIT_FAILURE;
}
fclose(f);
printf("; Decoded 8086 Assembly Instructions\n\n"); printf("; Decoded 8086 Assembly Instructions\n\n");
printf("bits 16\n\n"); printf("bits 16\n\n");
while ((bytes_read = fread(buf, sizeof(char), 1, f)) > 0) char *inst_str_buf = malloc(sizeof(char) * 256);
u32 bytes_processed = 0;
while (bytes_processed < bytes_read)
{ {
char inst = buf[0]; Instruction inst = parse_instruction(buffer + bytes_processed);
if (mov_inst(f, buf, inst)) goto handled; // char inst = buffer[0];
if (add_inst(f, buf, inst)) goto handled; // if (mov_inst(f, buffer, inst)) goto handled;
// if (add_inst(f, buffer, inst)) goto handled;
fprintf(stderr, "___Unrecognized Instruction___"); if (inst.bytes_read > 0)
handled: {
decode_instruction(inst_str_buf, inst);
printf("%s", inst_str_buf);
bytes_processed += inst.bytes_read;
}
else
{
bytes_processed += 1;
fprintf(stderr, "___Unrecognized Instruction___");
}
// handled:
printf("\n"); printf("\n");
} }
free(inst_str_buf);
free(buffer);
} }

7
lib.h
View File

@ -23,7 +23,7 @@ enum OptionTag {NONE, SOME};
enum OptionTag tag; \ enum OptionTag tag; \
union { \ union { \
char none; \ char none; \
type some; \ type value; \
}; \ }; \
} type##_opt; \ } type##_opt; \
\ \
@ -34,13 +34,13 @@ enum OptionTag {NONE, SOME};
\ \
static inline type##_opt some_##type(type value) \ static inline type##_opt some_##type(type value) \
{ \ { \
return (type##_opt){ .tag = SOME, .some = value }; \ return (type##_opt){ .tag = SOME, .value = value }; \
} \ } \
\ \
static inline int get_some_##type(type##_opt opt, type* out_value) \ static inline int get_some_##type(type##_opt opt, type* out_value) \
{ \ { \
if (opt.tag != SOME) return 0; \ if (opt.tag != SOME) return 0; \
*out_value = opt.some; \ *out_value = opt.value; \
return 1; \ return 1; \
} }
@ -50,3 +50,4 @@ enum OptionTag {NONE, SOME};
OPTION(u8) OPTION(u8)
OPTION(u16)