WIP Refactor: Instruction parser using a struct
This commit is contained in:
parent
20d1aed742
commit
4754a8cd4c
205
decode.c
205
decode.c
@ -1,10 +1,11 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "lib.h"
|
#include "lib.h"
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
|
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||||
|
|
||||||
enum Instruction
|
enum InstructionType
|
||||||
{
|
{
|
||||||
INST_MOV_REG_REG = 0b10001000,
|
INST_MOV_REG_REG = 0b10001000,
|
||||||
// INST_MOV_REG_REG = 0b10001000,
|
// INST_MOV_REG_REG = 0b10001000,
|
||||||
@ -24,15 +25,11 @@ typedef struct Register
|
|||||||
char code;
|
char code;
|
||||||
char* fullname;
|
char* fullname;
|
||||||
char* bytename;
|
char* bytename;
|
||||||
|
union {
|
||||||
union
|
struct {
|
||||||
{
|
|
||||||
struct
|
|
||||||
{
|
|
||||||
char low;
|
char low;
|
||||||
char high;
|
char high;
|
||||||
};
|
};
|
||||||
|
|
||||||
u16 full;
|
u16 full;
|
||||||
} value;
|
} value;
|
||||||
} Register;
|
} Register;
|
||||||
@ -48,9 +45,23 @@ Register registers[8] = {
|
|||||||
{.code = 0b111, .fullname = "di", .bytename = "bh"},
|
{.code = 0b111, .fullname = "di", .bytename = "bh"},
|
||||||
};
|
};
|
||||||
|
|
||||||
char* memory[65536];
|
typedef struct Instruction
|
||||||
|
{
|
||||||
|
u16 id;
|
||||||
|
char *name;
|
||||||
|
u16_opt data;
|
||||||
|
u16_opt displacement;
|
||||||
|
u8_opt w;
|
||||||
|
u8_opt d;
|
||||||
|
u8_opt s;
|
||||||
|
u8_opt mod;
|
||||||
|
u8_opt reg;
|
||||||
|
u8_opt rm;
|
||||||
|
u8_opt SR;
|
||||||
|
u8 bytes_read;
|
||||||
|
} Instruction;
|
||||||
|
|
||||||
// void inst_mov_rgmm_reg()
|
char *memory[65536];
|
||||||
|
|
||||||
/// Get Effective Address Calculation Registers
|
/// Get Effective Address Calculation Registers
|
||||||
char* get_eac_registers(char rm)
|
char* get_eac_registers(char rm)
|
||||||
@ -58,47 +69,118 @@ char* get_eac_registers(char rm)
|
|||||||
char* reg_name;
|
char* reg_name;
|
||||||
switch (rm)
|
switch (rm)
|
||||||
{
|
{
|
||||||
case 0b000:
|
case 0b000: reg_name = "bx + si"; break;
|
||||||
reg_name = "bx + si";
|
case 0b001: reg_name = "bx + di"; break;
|
||||||
break;
|
case 0b010: reg_name = "bp + si"; break;
|
||||||
case 0b001:
|
case 0b011: reg_name = "bp + di"; break;
|
||||||
reg_name = "bx + di";
|
case 0b100: reg_name = "si"; break;
|
||||||
break;
|
case 0b101: reg_name = "di"; break;
|
||||||
case 0b010:
|
case 0b110: reg_name = "bp"; break;
|
||||||
reg_name = "bp + si";
|
case 0b111: reg_name = "bx"; break;
|
||||||
break;
|
default: perror("Invalid R/M value"); exit(1);
|
||||||
case 0b011:
|
|
||||||
reg_name = "bp + di";
|
|
||||||
break;
|
|
||||||
case 0b100:
|
|
||||||
reg_name = "si";
|
|
||||||
break;
|
|
||||||
case 0b101:
|
|
||||||
reg_name = "di";
|
|
||||||
break;
|
|
||||||
case 0b110:
|
|
||||||
reg_name = "bp";
|
|
||||||
break;
|
|
||||||
case 0b111:
|
|
||||||
reg_name = "bx";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
perror("Invalid R/M value");
|
|
||||||
exit(1);
|
|
||||||
}
|
}
|
||||||
return reg_name;
|
return reg_name;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline char* reg_name(Register reg, char wide)
|
static char* reg_name(Register reg, char wide)
|
||||||
{
|
{
|
||||||
return wide == 1 ? reg.fullname : reg.bytename;
|
return wide == 1 ? reg.fullname : reg.bytename;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline i16 get_data(unsigned char* buf, char wide)
|
static i16 get_data(unsigned char* buf, char wide)
|
||||||
{
|
{
|
||||||
// Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness
|
// Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness
|
||||||
return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0];
|
return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0];
|
||||||
}
|
}
|
||||||
|
#define IS_INST(value, m, inst) ((value & ~m) == (char)inst)
|
||||||
|
|
||||||
|
typedef struct InstMask {char *name; u8 mask; u8 code;} InstMask;
|
||||||
|
|
||||||
|
typedef struct InstFormat
|
||||||
|
{
|
||||||
|
u16 id;
|
||||||
|
char *name;
|
||||||
|
u8 inst_enc;
|
||||||
|
u8 mask_inst;
|
||||||
|
u8 mask_w;
|
||||||
|
u64 mask_reg;
|
||||||
|
bool has_operands;
|
||||||
|
bool has_displacement;
|
||||||
|
bool has_data;
|
||||||
|
bool has_d;
|
||||||
|
bool has_w;
|
||||||
|
bool has_reg;
|
||||||
|
bool has_mod;
|
||||||
|
bool has_rm;
|
||||||
|
bool has_s;
|
||||||
|
bool has_SR;
|
||||||
|
} InstFormat;
|
||||||
|
|
||||||
|
InstFormat inst_formats[] =
|
||||||
|
{
|
||||||
|
////////
|
||||||
|
// MOV
|
||||||
|
////////
|
||||||
|
// Register/memory to/from register
|
||||||
|
{.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .has_operands=true, .has_displacement=true,
|
||||||
|
.has_d=true, .has_w=true, .has_reg=true, .has_mod=true, .has_rm=true},
|
||||||
|
// Immediate to register/memory
|
||||||
|
{.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true,
|
||||||
|
.has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true},
|
||||||
|
// Immediate to register
|
||||||
|
{.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8,
|
||||||
|
.mask_reg=0x0b00000111, .has_data=true, .has_w=true},
|
||||||
|
// Memory to accumulator | Accumulator to memory using the `d` bit
|
||||||
|
// even though the manual doesn't specify it
|
||||||
|
{.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1,
|
||||||
|
.has_data=true, .has_w=true, .has_d=true},
|
||||||
|
// Register/memory to segment register and inverse using the `d` bit
|
||||||
|
{.id=5, .name="mov", .inst_enc=0b10001100, .mask_inst=0x3, .has_SR=true, .has_d=true,
|
||||||
|
.has_displacement=true, .has_mod=true, .has_rm=true},
|
||||||
|
////////
|
||||||
|
// ADD
|
||||||
|
////////
|
||||||
|
// Reg/memory with register or either
|
||||||
|
{.id=6, .name="add", .inst_enc=0b00000000, .mask_inst=0x3, .mask_w=0x1,
|
||||||
|
.has_operands=true, .has_displacement=true, .has_w=true,
|
||||||
|
.has_d=true, .has_reg=true, .has_mod=true, .has_rm=true},
|
||||||
|
// Immediate to register/memory
|
||||||
|
{.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true,
|
||||||
|
.has_operands=true, .has_displacement=true, .has_data=true, .has_mod=true, .has_rm=true},
|
||||||
|
{.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1, .has_data=true, .has_w=true},
|
||||||
|
};
|
||||||
|
|
||||||
|
Instruction parse_instruction(u8* buf)
|
||||||
|
{
|
||||||
|
u8 inst = buf[0];
|
||||||
|
InstFormat format;
|
||||||
|
bool matched_inst = false;
|
||||||
|
// TODO: This might be a good time to learn how to make a hashtable in C
|
||||||
|
for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++)
|
||||||
|
{
|
||||||
|
if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc)
|
||||||
|
{
|
||||||
|
format = inst_formats[i];
|
||||||
|
matched_inst = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!matched_inst)
|
||||||
|
return (Instruction){.bytes_read = 0};
|
||||||
|
return (Instruction) {
|
||||||
|
.id = format.id,
|
||||||
|
.name = format.name,
|
||||||
|
.data = none_u16(),
|
||||||
|
.displacement = none_u16(),
|
||||||
|
.w = none_u8(),
|
||||||
|
.d = none_u8(),
|
||||||
|
.s = none_u8(),
|
||||||
|
.mod = none_u8(),
|
||||||
|
.reg = none_u8(),
|
||||||
|
.rm = none_u8(),
|
||||||
|
.bytes_read = 2,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
bool mov_inst(FILE* f, unsigned char* buf, char inst)
|
bool mov_inst(FILE* f, unsigned char* buf, char inst)
|
||||||
{
|
{
|
||||||
@ -262,27 +344,58 @@ int main(int argc, char** argv)
|
|||||||
printf("Usage: Please provide assembled instructions as input\n");
|
printf("Usage: Please provide assembled instructions as input\n");
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
unsigned char buf[256];
|
|
||||||
|
struct stat st;
|
||||||
|
if (stat(argv[1], &st) == -1)
|
||||||
|
{
|
||||||
|
perror("Unable to get file size\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char* buffer = malloc(st.st_size);
|
||||||
|
if (!buffer)
|
||||||
|
{
|
||||||
|
perror("Unable to allocate memory for binary file");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
FILE *f = fopen(argv[1], "r");
|
FILE *f = fopen(argv[1], "r");
|
||||||
if (!f)
|
if (!f)
|
||||||
{
|
{
|
||||||
perror("fopen\n");
|
perror("fopen\n");
|
||||||
|
free(buffer);
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t bytes_read;
|
size_t bytes_read = fread(buffer, sizeof(unsigned char), st.st_size, f);
|
||||||
|
if (bytes_read != (size_t)st.st_size)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Read of binary file to memory incomplete.\n");
|
||||||
|
free(buffer);
|
||||||
|
fclose(f);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(f);
|
||||||
|
|
||||||
printf("; Decoded 8086 Assembly Instructions\n\n");
|
printf("; Decoded 8086 Assembly Instructions\n\n");
|
||||||
printf("bits 16\n\n");
|
printf("bits 16\n\n");
|
||||||
|
|
||||||
while ((bytes_read = fread(buf, sizeof(char), 1, f)) > 0)
|
u32 bytes_processed = 0;
|
||||||
|
while (bytes_processed < bytes_read)
|
||||||
{
|
{
|
||||||
char inst = buf[0];
|
Instruction inst = parse_instruction(buffer+bytes_processed);
|
||||||
if (mov_inst(f, buf, inst)) goto handled;
|
bytes_processed += inst.bytes_read;
|
||||||
if (add_inst(f, buf, inst)) goto handled;
|
// char inst = buffer[0];
|
||||||
|
// if (mov_inst(f, buffer, inst)) goto handled;
|
||||||
|
// if (add_inst(f, buffer, inst)) goto handled;
|
||||||
|
|
||||||
fprintf(stderr, "___Unrecognized Instruction___");
|
if (inst.name != NULL)
|
||||||
handled:
|
printf("%s ;%d", inst.name, inst.id);
|
||||||
|
else
|
||||||
|
fprintf(stderr, "___Unrecognized Instruction___");
|
||||||
|
// handled:
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
free(buffer);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user