WIP Refactor: Instruction parser now works and basic decoding in place
This commit is contained in:
parent
4754a8cd4c
commit
8497316768
129
decode.c
129
decode.c
@ -3,8 +3,6 @@
|
|||||||
#include "lib.h"
|
#include "lib.h"
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
|
||||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
|
|
||||||
|
|
||||||
enum InstructionType
|
enum InstructionType
|
||||||
{
|
{
|
||||||
INST_MOV_REG_REG = 0b10001000,
|
INST_MOV_REG_REG = 0b10001000,
|
||||||
@ -92,9 +90,19 @@ static i16 get_data(unsigned char* buf, char wide)
|
|||||||
// Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness
|
// Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness
|
||||||
return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0];
|
return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0];
|
||||||
}
|
}
|
||||||
#define IS_INST(value, m, inst) ((value & ~m) == (char)inst)
|
|
||||||
|
|
||||||
typedef struct InstMask {char *name; u8 mask; u8 code;} InstMask;
|
static u8 mask_and_shift(u8 value, u8 mask)
|
||||||
|
{
|
||||||
|
value &= mask;
|
||||||
|
int count = 0;
|
||||||
|
while ((mask & 0x1) == 0 && count < 8)
|
||||||
|
{
|
||||||
|
value >>= 1;
|
||||||
|
mask >>= 1;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
typedef struct InstFormat
|
typedef struct InstFormat
|
||||||
{
|
{
|
||||||
@ -103,7 +111,7 @@ typedef struct InstFormat
|
|||||||
u8 inst_enc;
|
u8 inst_enc;
|
||||||
u8 mask_inst;
|
u8 mask_inst;
|
||||||
u8 mask_w;
|
u8 mask_w;
|
||||||
u64 mask_reg;
|
u8 mask_reg;
|
||||||
bool has_operands;
|
bool has_operands;
|
||||||
bool has_displacement;
|
bool has_displacement;
|
||||||
bool has_data;
|
bool has_data;
|
||||||
@ -122,14 +130,15 @@ InstFormat inst_formats[] =
|
|||||||
// MOV
|
// MOV
|
||||||
////////
|
////////
|
||||||
// Register/memory to/from register
|
// Register/memory to/from register
|
||||||
{.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .has_operands=true, .has_displacement=true,
|
{.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .mask_reg=0b00111000,
|
||||||
.has_d=true, .has_w=true, .has_reg=true, .has_mod=true, .has_rm=true},
|
.mask_w=0x1, .has_operands=true, .has_displacement=true, .has_d=true, .has_w=true,
|
||||||
|
.has_reg=true, .has_mod=true, .has_rm=true},
|
||||||
// Immediate to register/memory
|
// Immediate to register/memory
|
||||||
{.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true,
|
{.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true,
|
||||||
.has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true},
|
.has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true},
|
||||||
// Immediate to register
|
// Immediate to register
|
||||||
{.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8,
|
{.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8,
|
||||||
.mask_reg=0x0b00000111, .has_data=true, .has_w=true},
|
.mask_reg=0b00000111, .has_data=true, .has_w=true},
|
||||||
// Memory to accumulator | Accumulator to memory using the `d` bit
|
// Memory to accumulator | Accumulator to memory using the `d` bit
|
||||||
// even though the manual doesn't specify it
|
// even though the manual doesn't specify it
|
||||||
{.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1,
|
{.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1,
|
||||||
@ -153,35 +162,97 @@ InstFormat inst_formats[] =
|
|||||||
Instruction parse_instruction(u8* buf)
|
Instruction parse_instruction(u8* buf)
|
||||||
{
|
{
|
||||||
u8 inst = buf[0];
|
u8 inst = buf[0];
|
||||||
InstFormat format;
|
InstFormat fmt;
|
||||||
bool matched_inst = false;
|
bool matched_inst = false;
|
||||||
// TODO: This might be a good time to learn how to make a hashtable in C
|
// TODO: This might be a good time to learn how to make a hashtable in C
|
||||||
for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++)
|
for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++)
|
||||||
{
|
{
|
||||||
if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc)
|
if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc)
|
||||||
{
|
{
|
||||||
format = inst_formats[i];
|
fmt = inst_formats[i];
|
||||||
matched_inst = true;
|
matched_inst = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!matched_inst)
|
if (!matched_inst)
|
||||||
return (Instruction){.bytes_read = 0};
|
return (Instruction){.bytes_read = 0};
|
||||||
|
u8_opt d_opt = none_u8();
|
||||||
|
u8_opt s_opt = none_u8();
|
||||||
|
u8_opt w_opt = none_u8();
|
||||||
|
u8_opt reg_opt = none_u8();
|
||||||
|
u8_opt mod_opt = none_u8();
|
||||||
|
u8_opt rm_opt = none_u8();
|
||||||
|
u16_opt data_opt = none_u16();
|
||||||
|
u16_opt displacement_opt = none_u16();
|
||||||
|
|
||||||
|
if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1);
|
||||||
|
if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1);
|
||||||
|
if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
|
||||||
|
if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6);
|
||||||
|
if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w));
|
||||||
|
if (fmt.has_reg)
|
||||||
|
{
|
||||||
|
u8 reg = fmt.has_operands ? buf[1] : buf[0];
|
||||||
|
reg_opt = some_u8(mask_and_shift(reg, fmt.mask_reg));
|
||||||
|
}
|
||||||
|
if (fmt.has_data)
|
||||||
|
{
|
||||||
|
u8 idx = 1;
|
||||||
|
if (fmt.has_operands) idx += 1;
|
||||||
|
if (fmt.has_displacement) idx += mod_opt.value % 3;
|
||||||
|
u16 data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[0] : (sbyte)buf[idx];
|
||||||
|
data_opt = some_u16(data);
|
||||||
|
}
|
||||||
|
if (fmt.has_displacement && mod_opt.value % 3 > 0)
|
||||||
|
{
|
||||||
|
u16 disp = mod_opt.value == MODE_MEM_DIS_16
|
||||||
|
? (i16)buf[3] << 8 | buf[2]
|
||||||
|
: (sbyte)buf[3];
|
||||||
|
displacement_opt = some_u16(disp);
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 bytes_read = 1;
|
||||||
|
bytes_read += fmt.has_operands ? 1 : 0;
|
||||||
|
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
|
||||||
|
// respectively but mod == 3 wraps to 0 since it doesn't displace
|
||||||
|
if (fmt.has_displacement) bytes_read += mod_opt.value % 3;
|
||||||
|
if (fmt.has_data) bytes_read += w_opt.value == 0 ? 1 : 2;
|
||||||
return (Instruction) {
|
return (Instruction) {
|
||||||
.id = format.id,
|
.id = fmt.id,
|
||||||
.name = format.name,
|
.name = fmt.name,
|
||||||
.data = none_u16(),
|
.data = data_opt,
|
||||||
.displacement = none_u16(),
|
.displacement = displacement_opt,
|
||||||
.w = none_u8(),
|
.w = w_opt,
|
||||||
.d = none_u8(),
|
.d = d_opt,
|
||||||
.s = none_u8(),
|
.s = s_opt,
|
||||||
.mod = none_u8(),
|
.mod = mod_opt,
|
||||||
.reg = none_u8(),
|
.reg = reg_opt,
|
||||||
.rm = none_u8(),
|
.rm = rm_opt,
|
||||||
.bytes_read = 2,
|
.bytes_read = bytes_read,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void decode_instruction(char* str_buf, Instruction inst)
|
||||||
|
{
|
||||||
|
IF_LET_SOME(u8, mod, inst.mod)
|
||||||
|
{
|
||||||
|
if (mod == MODE_RGSTR_MODE)
|
||||||
|
{
|
||||||
|
Register reg = registers[(size_t)inst.reg.value];
|
||||||
|
Register rm = registers[(size_t)inst.rm.value];
|
||||||
|
Register src_reg = inst.d.value == 0 ? reg : rm;
|
||||||
|
Register dst_reg = inst.d.value == 0 ? rm : reg;
|
||||||
|
char *src_name = reg_name(src_reg, inst.w.value);
|
||||||
|
char *dst_name = reg_name(dst_reg, inst.w.value);
|
||||||
|
sprintf(str_buf, "%s %s, %s ;%d", inst.name, dst_name, src_name, inst.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sprintf(str_buf, "%s ;%d", inst.name, inst.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool mov_inst(FILE* f, unsigned char* buf, char inst)
|
bool mov_inst(FILE* f, unsigned char* buf, char inst)
|
||||||
{
|
{
|
||||||
size_t bytes_read;
|
size_t bytes_read;
|
||||||
@ -205,8 +276,6 @@ bool mov_inst(FILE* f, unsigned char* buf, char inst)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
bool is_direct_addr = mod == 0 && rm == 0b110;
|
bool is_direct_addr = mod == 0 && rm == 0b110;
|
||||||
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
|
|
||||||
// respectively but mod == 3 wraps to 0 since it doesn't displace
|
|
||||||
int bytes_to_read = is_direct_addr ? 2 : mod % 3;
|
int bytes_to_read = is_direct_addr ? 2 : mod % 3;
|
||||||
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
||||||
char* eac_name = is_direct_addr ? "" : get_eac_registers(rm);
|
char* eac_name = is_direct_addr ? "" : get_eac_registers(rm);
|
||||||
@ -381,21 +450,29 @@ int main(int argc, char** argv)
|
|||||||
printf("; Decoded 8086 Assembly Instructions\n\n");
|
printf("; Decoded 8086 Assembly Instructions\n\n");
|
||||||
printf("bits 16\n\n");
|
printf("bits 16\n\n");
|
||||||
|
|
||||||
|
char *inst_str_buf = malloc(sizeof(char) * 256);
|
||||||
u32 bytes_processed = 0;
|
u32 bytes_processed = 0;
|
||||||
while (bytes_processed < bytes_read)
|
while (bytes_processed < bytes_read)
|
||||||
{
|
{
|
||||||
Instruction inst = parse_instruction(buffer + bytes_processed);
|
Instruction inst = parse_instruction(buffer + bytes_processed);
|
||||||
bytes_processed += inst.bytes_read;
|
|
||||||
// char inst = buffer[0];
|
// char inst = buffer[0];
|
||||||
// if (mov_inst(f, buffer, inst)) goto handled;
|
// if (mov_inst(f, buffer, inst)) goto handled;
|
||||||
// if (add_inst(f, buffer, inst)) goto handled;
|
// if (add_inst(f, buffer, inst)) goto handled;
|
||||||
|
|
||||||
if (inst.name != NULL)
|
if (inst.bytes_read > 0)
|
||||||
printf("%s ;%d", inst.name, inst.id);
|
{
|
||||||
|
decode_instruction(inst_str_buf, inst);
|
||||||
|
printf("%s", inst_str_buf);
|
||||||
|
bytes_processed += inst.bytes_read;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
bytes_processed += 1;
|
||||||
fprintf(stderr, "___Unrecognized Instruction___");
|
fprintf(stderr, "___Unrecognized Instruction___");
|
||||||
|
}
|
||||||
// handled:
|
// handled:
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
free(inst_str_buf);
|
||||||
free(buffer);
|
free(buffer);
|
||||||
}
|
}
|
||||||
|
6
lib.h
6
lib.h
@ -23,7 +23,7 @@ enum OptionTag {NONE, SOME};
|
|||||||
enum OptionTag tag; \
|
enum OptionTag tag; \
|
||||||
union { \
|
union { \
|
||||||
char none; \
|
char none; \
|
||||||
type some; \
|
type value; \
|
||||||
}; \
|
}; \
|
||||||
} type##_opt; \
|
} type##_opt; \
|
||||||
\
|
\
|
||||||
@ -34,13 +34,13 @@ enum OptionTag {NONE, SOME};
|
|||||||
\
|
\
|
||||||
static inline type##_opt some_##type(type value) \
|
static inline type##_opt some_##type(type value) \
|
||||||
{ \
|
{ \
|
||||||
return (type##_opt){ .tag = SOME, .some = value }; \
|
return (type##_opt){ .tag = SOME, .value = value }; \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
static inline int get_some_##type(type##_opt opt, type* out_value) \
|
static inline int get_some_##type(type##_opt opt, type* out_value) \
|
||||||
{ \
|
{ \
|
||||||
if (opt.tag != SOME) return 0; \
|
if (opt.tag != SOME) return 0; \
|
||||||
*out_value = opt.some; \
|
*out_value = opt.value; \
|
||||||
return 1; \
|
return 1; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user