WIP Refactor: Instruction parsing, decoding and printing split
This commit is contained in:
parent
8497316768
commit
ca0742de3c
184
decode.c
184
decode.c
@ -2,6 +2,8 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "lib.h"
|
#include "lib.h"
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
enum InstructionType
|
enum InstructionType
|
||||||
{
|
{
|
||||||
@ -20,7 +22,6 @@ enum Mode
|
|||||||
|
|
||||||
typedef struct Register
|
typedef struct Register
|
||||||
{
|
{
|
||||||
char code;
|
|
||||||
char* fullname;
|
char* fullname;
|
||||||
char* bytename;
|
char* bytename;
|
||||||
union {
|
union {
|
||||||
@ -30,8 +31,31 @@ typedef struct Register
|
|||||||
};
|
};
|
||||||
u16 full;
|
u16 full;
|
||||||
} value;
|
} value;
|
||||||
|
u8 code;
|
||||||
} Register;
|
} Register;
|
||||||
|
|
||||||
|
enum OperandType {OPR_T_MEMORY, OPR_T_REGISTER, OPR_T_IMMEDIATE};
|
||||||
|
|
||||||
|
typedef struct Operand
|
||||||
|
{
|
||||||
|
enum OperandType tag;
|
||||||
|
union {
|
||||||
|
struct Mem {
|
||||||
|
char *eac_name;
|
||||||
|
i16 displacement;
|
||||||
|
u8 mode;
|
||||||
|
} mem;
|
||||||
|
struct Reg {
|
||||||
|
Register value;
|
||||||
|
bool wide;
|
||||||
|
} reg;
|
||||||
|
struct Imm {
|
||||||
|
i16 value;
|
||||||
|
u8 direct;
|
||||||
|
} imm;
|
||||||
|
};
|
||||||
|
} Operand;
|
||||||
|
|
||||||
Register registers[8] = {
|
Register registers[8] = {
|
||||||
{.code = 0b000, .fullname = "ax", .bytename = "al"},
|
{.code = 0b000, .fullname = "ax", .bytename = "al"},
|
||||||
{.code = 0b001, .fullname = "cx", .bytename = "cl"},
|
{.code = 0b001, .fullname = "cx", .bytename = "cl"},
|
||||||
@ -43,7 +67,7 @@ Register registers[8] = {
|
|||||||
{.code = 0b111, .fullname = "di", .bytename = "bh"},
|
{.code = 0b111, .fullname = "di", .bytename = "bh"},
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct Instruction
|
typedef struct ParsedInstruction
|
||||||
{
|
{
|
||||||
u16 id;
|
u16 id;
|
||||||
char *name;
|
char *name;
|
||||||
@ -57,12 +81,21 @@ typedef struct Instruction
|
|||||||
u8_opt rm;
|
u8_opt rm;
|
||||||
u8_opt SR;
|
u8_opt SR;
|
||||||
u8 bytes_read;
|
u8 bytes_read;
|
||||||
|
} ParsedInstruction;
|
||||||
|
|
||||||
|
typedef struct Instruction
|
||||||
|
{
|
||||||
|
Operand src_opr;
|
||||||
|
Operand dst_opr;
|
||||||
|
i16 data;
|
||||||
|
char *operation;
|
||||||
|
u16 id;
|
||||||
} Instruction;
|
} Instruction;
|
||||||
|
|
||||||
char *memory[65536];
|
char *memory[65536];
|
||||||
|
|
||||||
/// Get Effective Address Calculation Registers
|
/// Get Effective Address Calculation Registers
|
||||||
char* get_eac_registers(char rm)
|
char* get_eac_register(char rm)
|
||||||
{
|
{
|
||||||
char* reg_name;
|
char* reg_name;
|
||||||
switch (rm)
|
switch (rm)
|
||||||
@ -138,7 +171,7 @@ InstFormat inst_formats[] =
|
|||||||
.has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true},
|
.has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true},
|
||||||
// Immediate to register
|
// Immediate to register
|
||||||
{.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8,
|
{.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8,
|
||||||
.mask_reg=0b00000111, .has_data=true, .has_w=true},
|
.mask_reg=0b00000111, .has_reg=true, .has_data=true, .has_w=true},
|
||||||
// Memory to accumulator | Accumulator to memory using the `d` bit
|
// Memory to accumulator | Accumulator to memory using the `d` bit
|
||||||
// even though the manual doesn't specify it
|
// even though the manual doesn't specify it
|
||||||
{.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1,
|
{.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1,
|
||||||
@ -156,10 +189,11 @@ InstFormat inst_formats[] =
|
|||||||
// Immediate to register/memory
|
// Immediate to register/memory
|
||||||
{.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true,
|
{.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true,
|
||||||
.has_operands=true, .has_displacement=true, .has_data=true, .has_mod=true, .has_rm=true},
|
.has_operands=true, .has_displacement=true, .has_data=true, .has_mod=true, .has_rm=true},
|
||||||
{.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1, .has_data=true, .has_w=true},
|
{.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1,
|
||||||
|
.has_data=true, .has_w=true},
|
||||||
};
|
};
|
||||||
|
|
||||||
Instruction parse_instruction(u8* buf)
|
ParsedInstruction parse_instruction(u8* buf)
|
||||||
{
|
{
|
||||||
u8 inst = buf[0];
|
u8 inst = buf[0];
|
||||||
InstFormat fmt;
|
InstFormat fmt;
|
||||||
@ -175,7 +209,7 @@ Instruction parse_instruction(u8* buf)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!matched_inst)
|
if (!matched_inst)
|
||||||
return (Instruction){.bytes_read = 0};
|
return (ParsedInstruction){.bytes_read = 0};
|
||||||
u8_opt d_opt = none_u8();
|
u8_opt d_opt = none_u8();
|
||||||
u8_opt s_opt = none_u8();
|
u8_opt s_opt = none_u8();
|
||||||
u8_opt w_opt = none_u8();
|
u8_opt w_opt = none_u8();
|
||||||
@ -185,6 +219,9 @@ Instruction parse_instruction(u8* buf)
|
|||||||
u16_opt data_opt = none_u16();
|
u16_opt data_opt = none_u16();
|
||||||
u16_opt displacement_opt = none_u16();
|
u16_opt displacement_opt = none_u16();
|
||||||
|
|
||||||
|
u16 bytes_read = 1;
|
||||||
|
bytes_read += fmt.has_operands ? 1 : 0;
|
||||||
|
|
||||||
if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1);
|
if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1);
|
||||||
if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1);
|
if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1);
|
||||||
if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
|
if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
|
||||||
@ -199,25 +236,23 @@ Instruction parse_instruction(u8* buf)
|
|||||||
{
|
{
|
||||||
u8 idx = 1;
|
u8 idx = 1;
|
||||||
if (fmt.has_operands) idx += 1;
|
if (fmt.has_operands) idx += 1;
|
||||||
|
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
|
||||||
|
// respectively but mod == 3 wraps to 0 since it doesn't displace
|
||||||
if (fmt.has_displacement) idx += mod_opt.value % 3;
|
if (fmt.has_displacement) idx += mod_opt.value % 3;
|
||||||
u16 data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[0] : (sbyte)buf[idx];
|
u16 data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[idx] : (sbyte)buf[idx];
|
||||||
data_opt = some_u16(data);
|
data_opt = some_u16(data);
|
||||||
|
bytes_read += w_opt.value == 0 ? 1 : 2;
|
||||||
}
|
}
|
||||||
if (fmt.has_displacement && mod_opt.value % 3 > 0)
|
if (fmt.has_displacement && mod_opt.value % 3 > 0)
|
||||||
{
|
{
|
||||||
u16 disp = mod_opt.value == MODE_MEM_DIS_16
|
u16 disp = mod_opt.value == MODE_MEM_DIS_16
|
||||||
? (i16)buf[3] << 8 | buf[2]
|
? (i16)buf[3] << 8 | buf[2]
|
||||||
: (sbyte)buf[3];
|
: (sbyte)buf[2];
|
||||||
displacement_opt = some_u16(disp);
|
displacement_opt = some_u16(disp);
|
||||||
|
bytes_read += mod_opt.value % 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
u16 bytes_read = 1;
|
return (ParsedInstruction) {
|
||||||
bytes_read += fmt.has_operands ? 1 : 0;
|
|
||||||
// This is a trick because mod == 1 and mod == 2 will displace one and two bytes
|
|
||||||
// respectively but mod == 3 wraps to 0 since it doesn't displace
|
|
||||||
if (fmt.has_displacement) bytes_read += mod_opt.value % 3;
|
|
||||||
if (fmt.has_data) bytes_read += w_opt.value == 0 ? 1 : 2;
|
|
||||||
return (Instruction) {
|
|
||||||
.id = fmt.id,
|
.id = fmt.id,
|
||||||
.name = fmt.name,
|
.name = fmt.name,
|
||||||
.data = data_opt,
|
.data = data_opt,
|
||||||
@ -232,25 +267,92 @@ Instruction parse_instruction(u8* buf)
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
void decode_instruction(char* str_buf, Instruction inst)
|
Instruction decode_instruction(ParsedInstruction inst)
|
||||||
{
|
{
|
||||||
|
Operand opr1 , opr2 = {0};
|
||||||
|
i16 payload = 0;
|
||||||
|
|
||||||
|
IF_LET_SOME(u16, data, inst.data) payload = data;
|
||||||
IF_LET_SOME(u8, mod, inst.mod)
|
IF_LET_SOME(u8, mod, inst.mod)
|
||||||
{
|
{
|
||||||
|
opr1.tag = OPR_T_REGISTER;
|
||||||
|
opr1.reg.value = registers[(size_t)inst.reg.value];
|
||||||
|
opr1.reg.wide = inst.w.value;
|
||||||
if (mod == MODE_RGSTR_MODE)
|
if (mod == MODE_RGSTR_MODE)
|
||||||
{
|
{
|
||||||
Register reg = registers[(size_t)inst.reg.value];
|
opr2.tag = OPR_T_REGISTER;
|
||||||
Register rm = registers[(size_t)inst.rm.value];
|
opr2.reg.value = registers[(size_t)inst.rm.value];
|
||||||
Register src_reg = inst.d.value == 0 ? reg : rm;
|
opr2.reg.wide = inst.w.value;
|
||||||
Register dst_reg = inst.d.value == 0 ? rm : reg;
|
}
|
||||||
char *src_name = reg_name(src_reg, inst.w.value);
|
else
|
||||||
char *dst_name = reg_name(dst_reg, inst.w.value);
|
{
|
||||||
sprintf(str_buf, "%s %s, %s ;%d", inst.name, dst_name, src_name, inst.id);
|
opr2.tag = OPR_T_MEMORY;
|
||||||
|
opr2.mem.eac_name = get_eac_register(inst.rm.value);
|
||||||
|
opr2.mem.mode = mod;
|
||||||
|
if (mod == MODE_MEM_DIS_08 || mod == MODE_MEM_DIS_16)
|
||||||
|
opr2.mem.displacement = (i16)inst.displacement.value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sprintf(str_buf, "%s ;%d", inst.name, inst.id);
|
IF_LET_SOME(u16, data, inst.data)
|
||||||
|
{
|
||||||
|
opr1.tag = OPR_T_IMMEDIATE;
|
||||||
|
opr1.imm.value = (i16)data;
|
||||||
|
// TODO: Have to fix this
|
||||||
|
opr1.imm.direct = 0;
|
||||||
}
|
}
|
||||||
|
IF_LET_SOME(u8, reg, inst.reg)
|
||||||
|
{
|
||||||
|
opr2.tag = OPR_T_REGISTER;
|
||||||
|
opr2.reg.value = registers[(size_t)reg];
|
||||||
|
opr2.reg.wide = inst.w.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (inst.d.tag == SOME && inst.d.value == 1)
|
||||||
|
{
|
||||||
|
Operand temp = opr1;
|
||||||
|
opr1 = opr2;
|
||||||
|
opr2 = temp;
|
||||||
|
}
|
||||||
|
return (Instruction) {
|
||||||
|
.id = inst.id,
|
||||||
|
.data = payload,
|
||||||
|
.operation = inst.name,
|
||||||
|
.src_opr = opr1,
|
||||||
|
.dst_opr = opr2,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_operand_string(char* str_buf, Operand oprnd)
|
||||||
|
{
|
||||||
|
if (oprnd.tag == OPR_T_REGISTER)
|
||||||
|
{
|
||||||
|
strcpy(str_buf, reg_name(oprnd.reg.value, oprnd.reg.wide));
|
||||||
|
}
|
||||||
|
else if (oprnd.tag == OPR_T_MEMORY)
|
||||||
|
{
|
||||||
|
char disp_str[16] = {'\0'};
|
||||||
|
i16 disp = oprnd.mem.displacement;
|
||||||
|
if (disp != 0)
|
||||||
|
sprintf(disp_str, " %s %d", (disp > 0 ? "+" : "-"), abs(disp));
|
||||||
|
sprintf(str_buf, "[%s%s]", oprnd.mem.eac_name, disp_str);
|
||||||
|
}
|
||||||
|
else if (oprnd.tag == OPR_T_IMMEDIATE)
|
||||||
|
{
|
||||||
|
char *size = "";
|
||||||
|
if (oprnd.imm.direct > 0)
|
||||||
|
size = oprnd.imm.direct == 1 ? "byte " : "word ";
|
||||||
|
sprintf(str_buf, "%s%d", size, oprnd.imm.value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_instr_string(char* str_buf, Instruction inst)
|
||||||
|
{
|
||||||
|
char src_str[32], dst_str[32];
|
||||||
|
get_operand_string(src_str, inst.src_opr);
|
||||||
|
get_operand_string(dst_str, inst.dst_opr);
|
||||||
|
sprintf(str_buf, "%s %s, %s ; Inst id->%d", inst.operation, dst_str, src_str, inst.id);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool mov_inst(FILE* f, unsigned char* buf, char inst)
|
bool mov_inst(FILE* f, unsigned char* buf, char inst)
|
||||||
@ -278,7 +380,7 @@ bool mov_inst(FILE* f, unsigned char* buf, char inst)
|
|||||||
bool is_direct_addr = mod == 0 && rm == 0b110;
|
bool is_direct_addr = mod == 0 && rm == 0b110;
|
||||||
int bytes_to_read = is_direct_addr ? 2 : mod % 3;
|
int bytes_to_read = is_direct_addr ? 2 : mod % 3;
|
||||||
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
||||||
char* eac_name = is_direct_addr ? "" : get_eac_registers(rm);
|
char* eac_name = is_direct_addr ? "" : get_eac_register(rm);
|
||||||
char disp_buf[16] = {'\0'};
|
char disp_buf[16] = {'\0'};
|
||||||
if (bytes_to_read > 0)
|
if (bytes_to_read > 0)
|
||||||
{
|
{
|
||||||
@ -303,7 +405,7 @@ bool mov_inst(FILE* f, unsigned char* buf, char inst)
|
|||||||
// Same trick from earlier, see comment
|
// Same trick from earlier, see comment
|
||||||
bytes_to_read += mod % 3;
|
bytes_to_read += mod % 3;
|
||||||
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
||||||
char *eac_name = get_eac_registers(rm);
|
char *eac_name = get_eac_register(rm);
|
||||||
i16 data = get_data(buf + (char)bytes_to_read - (w == 0 ? 1 : 2), w);
|
i16 data = get_data(buf + (char)bytes_to_read - (w == 0 ? 1 : 2), w);
|
||||||
char *word_str = w == 0 ? "byte" : "word";
|
char *word_str = w == 0 ? "byte" : "word";
|
||||||
char disp_str[16] = {'\0'};
|
char disp_str[16] = {'\0'};
|
||||||
@ -372,13 +474,13 @@ bool add_inst(FILE* f, unsigned char* buf, char inst)
|
|||||||
}
|
}
|
||||||
else if (mod == MODE_MEM_NO_DIS)
|
else if (mod == MODE_MEM_NO_DIS)
|
||||||
{
|
{
|
||||||
if (d) printf("add %s, [%s] ;9", reg_name(rgstr, w), get_eac_registers(rm));
|
if (d) printf("add %s, [%s] ;9", reg_name(rgstr, w), get_eac_register(rm));
|
||||||
else printf("add [%s], %s ;10", get_eac_registers(rm), reg_name(rgstr, w));
|
else printf("add [%s], %s ;10", get_eac_register(rm), reg_name(rgstr, w));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (d) printf("add %s, [%s] ;11", reg_name(rgstr, w), get_eac_registers(rm));
|
if (d) printf("add %s, [%s] ;11", reg_name(rgstr, w), get_eac_register(rm));
|
||||||
else printf("add [%s], %s ;12", get_eac_registers(rm), reg_name(rgstr, w));
|
else printf("add [%s], %s ;12", get_eac_register(rm), reg_name(rgstr, w));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if ((inst & ~0x3) == (char)0b10000000)
|
else if ((inst & ~0x3) == (char)0b10000000)
|
||||||
@ -392,7 +494,7 @@ bool add_inst(FILE* f, unsigned char* buf, char inst)
|
|||||||
// Same trick from earlier, see comment
|
// Same trick from earlier, see comment
|
||||||
bytes_to_read += mod % 3;
|
bytes_to_read += mod % 3;
|
||||||
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
|
||||||
char *eac_name = get_eac_registers(rm);
|
char *eac_name = get_eac_register(rm);
|
||||||
i16 data = get_data(buf + (char)bytes_to_read - (w == 0 ? 1 : 2), w);
|
i16 data = get_data(buf + (char)bytes_to_read - (w == 0 ? 1 : 2), w);
|
||||||
char *word_str = w == 0 ? "byte" : "word";
|
char *word_str = w == 0 ? "byte" : "word";
|
||||||
char disp_str[16] = {'\0'};
|
char disp_str[16] = {'\0'};
|
||||||
@ -454,25 +556,29 @@ int main(int argc, char** argv)
|
|||||||
u32 bytes_processed = 0;
|
u32 bytes_processed = 0;
|
||||||
while (bytes_processed < bytes_read)
|
while (bytes_processed < bytes_read)
|
||||||
{
|
{
|
||||||
Instruction inst = parse_instruction(buffer + bytes_processed);
|
ParsedInstruction parsed = parse_instruction(buffer + bytes_processed);
|
||||||
// char inst = buffer[0];
|
|
||||||
// if (mov_inst(f, buffer, inst)) goto handled;
|
|
||||||
// if (add_inst(f, buffer, inst)) goto handled;
|
|
||||||
|
|
||||||
if (inst.bytes_read > 0)
|
if (parsed.bytes_read > 0)
|
||||||
{
|
{
|
||||||
decode_instruction(inst_str_buf, inst);
|
Instruction inst = decode_instruction(parsed);
|
||||||
|
get_instr_string(inst_str_buf, inst);
|
||||||
|
bytes_processed += parsed.bytes_read;
|
||||||
|
// printf("%s %d/%ld", inst_str_buf, bytes_processed, bytes_read);
|
||||||
printf("%s", inst_str_buf);
|
printf("%s", inst_str_buf);
|
||||||
bytes_processed += inst.bytes_read;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
bytes_processed += 1;
|
bytes_processed += 1;
|
||||||
fprintf(stderr, "___Unrecognized Instruction___");
|
fprintf(stderr, "___Unrecognized Instruction___");
|
||||||
}
|
}
|
||||||
|
// char inst = buffer[0];
|
||||||
|
// if (mov_inst(f, buffer, inst)) goto handled;
|
||||||
|
// if (add_inst(f, buffer, inst)) goto handled;
|
||||||
// handled:
|
// handled:
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
free(inst_str_buf);
|
free(inst_str_buf);
|
||||||
free(buffer);
|
free(buffer);
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user