diff --git a/decode.c b/decode.c index 0ffcf0b..73e3ad8 100644 --- a/decode.c +++ b/decode.c @@ -1,10 +1,11 @@ #include #include #include "lib.h" +#include #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) -enum Instruction +enum InstructionType { INST_MOV_REG_REG = 0b10001000, // INST_MOV_REG_REG = 0b10001000, @@ -24,15 +25,11 @@ typedef struct Register char code; char* fullname; char* bytename; - - union - { - struct - { + union { + struct { char low; char high; }; - u16 full; } value; } Register; @@ -48,9 +45,23 @@ Register registers[8] = { {.code = 0b111, .fullname = "di", .bytename = "bh"}, }; -char* memory[65536]; +typedef struct Instruction +{ + u16 id; + char *name; + u16_opt data; + u16_opt displacement; + u8_opt w; + u8_opt d; + u8_opt s; + u8_opt mod; + u8_opt reg; + u8_opt rm; + u8_opt SR; + u8 bytes_read; +} Instruction; -// void inst_mov_rgmm_reg() +char *memory[65536]; /// Get Effective Address Calculation Registers char* get_eac_registers(char rm) @@ -58,47 +69,118 @@ char* get_eac_registers(char rm) char* reg_name; switch (rm) { - case 0b000: - reg_name = "bx + si"; - break; - case 0b001: - reg_name = "bx + di"; - break; - case 0b010: - reg_name = "bp + si"; - break; - case 0b011: - reg_name = "bp + di"; - break; - case 0b100: - reg_name = "si"; - break; - case 0b101: - reg_name = "di"; - break; - case 0b110: - reg_name = "bp"; - break; - case 0b111: - reg_name = "bx"; - break; - default: - perror("Invalid R/M value"); - exit(1); + case 0b000: reg_name = "bx + si"; break; + case 0b001: reg_name = "bx + di"; break; + case 0b010: reg_name = "bp + si"; break; + case 0b011: reg_name = "bp + di"; break; + case 0b100: reg_name = "si"; break; + case 0b101: reg_name = "di"; break; + case 0b110: reg_name = "bp"; break; + case 0b111: reg_name = "bx"; break; + default: perror("Invalid R/M value"); exit(1); } return reg_name; } -static inline char* reg_name(Register reg, char wide) +static char* reg_name(Register reg, char wide) { return wide == 1 ? reg.fullname : reg.bytename; } -static inline i16 get_data(unsigned char* buf, char wide) +static i16 get_data(unsigned char* buf, char wide) { // Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0]; } +#define IS_INST(value, m, inst) ((value & ~m) == (char)inst) + +typedef struct InstMask {char *name; u8 mask; u8 code;} InstMask; + +typedef struct InstFormat +{ + u16 id; + char *name; + u8 inst_enc; + u8 mask_inst; + u8 mask_w; + u64 mask_reg; + bool has_operands; + bool has_displacement; + bool has_data; + bool has_d; + bool has_w; + bool has_reg; + bool has_mod; + bool has_rm; + bool has_s; + bool has_SR; +} InstFormat; + +InstFormat inst_formats[] = +{ + //////// + // MOV + //////// + // Register/memory to/from register + {.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .has_operands=true, .has_displacement=true, + .has_d=true, .has_w=true, .has_reg=true, .has_mod=true, .has_rm=true}, + // Immediate to register/memory + {.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true, + .has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true}, + // Immediate to register + {.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8, + .mask_reg=0x0b00000111, .has_data=true, .has_w=true}, + // Memory to accumulator | Accumulator to memory using the `d` bit + // even though the manual doesn't specify it + {.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1, + .has_data=true, .has_w=true, .has_d=true}, + // Register/memory to segment register and inverse using the `d` bit + {.id=5, .name="mov", .inst_enc=0b10001100, .mask_inst=0x3, .has_SR=true, .has_d=true, + .has_displacement=true, .has_mod=true, .has_rm=true}, + //////// + // ADD + //////// + // Reg/memory with register or either + {.id=6, .name="add", .inst_enc=0b00000000, .mask_inst=0x3, .mask_w=0x1, + .has_operands=true, .has_displacement=true, .has_w=true, + .has_d=true, .has_reg=true, .has_mod=true, .has_rm=true}, + // Immediate to register/memory + {.id=7, .name="add", .inst_enc=0b10000000, .mask_inst=0x3, .mask_w=0x1, .has_w=true, + .has_operands=true, .has_displacement=true, .has_data=true, .has_mod=true, .has_rm=true}, + {.id=8, .name="add", .inst_enc=0b00000100, .mask_inst=0x1, .mask_w=0x1, .has_data=true, .has_w=true}, +}; + +Instruction parse_instruction(u8* buf) +{ + u8 inst = buf[0]; + InstFormat format; + bool matched_inst = false; + // TODO: This might be a good time to learn how to make a hashtable in C + for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++) + { + if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc) + { + format = inst_formats[i]; + matched_inst = true; + break; + } + } + if (!matched_inst) + return (Instruction){.bytes_read = 0}; + return (Instruction) { + .id = format.id, + .name = format.name, + .data = none_u16(), + .displacement = none_u16(), + .w = none_u8(), + .d = none_u8(), + .s = none_u8(), + .mod = none_u8(), + .reg = none_u8(), + .rm = none_u8(), + .bytes_read = 2, + }; +} bool mov_inst(FILE* f, unsigned char* buf, char inst) { @@ -262,27 +344,58 @@ int main(int argc, char** argv) printf("Usage: Please provide assembled instructions as input\n"); exit(0); } - unsigned char buf[256]; + + struct stat st; + if (stat(argv[1], &st) == -1) + { + perror("Unable to get file size\n"); + return EXIT_FAILURE; + } + + unsigned char* buffer = malloc(st.st_size); + if (!buffer) + { + perror("Unable to allocate memory for binary file"); + return EXIT_FAILURE; + } + FILE *f = fopen(argv[1], "r"); if (!f) { perror("fopen\n"); + free(buffer); return EXIT_FAILURE; } - size_t bytes_read; + size_t bytes_read = fread(buffer, sizeof(unsigned char), st.st_size, f); + if (bytes_read != (size_t)st.st_size) + { + fprintf(stderr, "Read of binary file to memory incomplete.\n"); + free(buffer); + fclose(f); + return EXIT_FAILURE; + } + + fclose(f); printf("; Decoded 8086 Assembly Instructions\n\n"); printf("bits 16\n\n"); - while ((bytes_read = fread(buf, sizeof(char), 1, f)) > 0) + u32 bytes_processed = 0; + while (bytes_processed < bytes_read) { - char inst = buf[0]; - if (mov_inst(f, buf, inst)) goto handled; - if (add_inst(f, buf, inst)) goto handled; + Instruction inst = parse_instruction(buffer+bytes_processed); + bytes_processed += inst.bytes_read; + // char inst = buffer[0]; + // if (mov_inst(f, buffer, inst)) goto handled; + // if (add_inst(f, buffer, inst)) goto handled; - fprintf(stderr, "___Unrecognized Instruction___"); - handled: + if (inst.name != NULL) + printf("%s ;%d", inst.name, inst.id); + else + fprintf(stderr, "___Unrecognized Instruction___"); + // handled: printf("\n"); } + free(buffer); } diff --git a/lib.h b/lib.h index 6a1b625..8323aea 100644 --- a/lib.h +++ b/lib.h @@ -50,3 +50,4 @@ enum OptionTag {NONE, SOME}; OPTION(u8) +OPTION(u16)