WIP Refactor: Instruction parser now works and basic decoding in place
This commit is contained in:
		
							parent
							
								
									4754a8cd4c
								
							
						
					
					
						commit
						8497316768
					
				
							
								
								
									
										131
									
								
								decode.c
									
									
									
									
									
								
							
							
						
						
									
										131
									
								
								decode.c
									
									
									
									
									
								
							@ -3,8 +3,6 @@
 | 
				
			|||||||
#include "lib.h"
 | 
					#include "lib.h"
 | 
				
			||||||
#include <sys/stat.h>
 | 
					#include <sys/stat.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
enum InstructionType
 | 
					enum InstructionType
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    INST_MOV_REG_REG = 0b10001000,
 | 
					    INST_MOV_REG_REG = 0b10001000,
 | 
				
			||||||
@ -92,9 +90,19 @@ static i16 get_data(unsigned char* buf, char wide)
 | 
				
			|||||||
    // Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness
 | 
					    // Cast buf[0] to sbyte if not the conversion to i16 won't detect signedness
 | 
				
			||||||
    return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0];
 | 
					    return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0];
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#define IS_INST(value, m, inst) ((value & ~m) == (char)inst)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
typedef struct InstMask {char *name; u8 mask; u8 code;} InstMask;
 | 
					static u8 mask_and_shift(u8 value, u8 mask)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    value &= mask;
 | 
				
			||||||
 | 
					    int count = 0;
 | 
				
			||||||
 | 
					    while ((mask & 0x1) == 0 && count < 8)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        value >>= 1;
 | 
				
			||||||
 | 
					        mask >>= 1;
 | 
				
			||||||
 | 
					        count++;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return value;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
typedef struct InstFormat
 | 
					typedef struct InstFormat
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@ -103,7 +111,7 @@ typedef struct InstFormat
 | 
				
			|||||||
    u8 inst_enc;
 | 
					    u8 inst_enc;
 | 
				
			||||||
    u8 mask_inst;
 | 
					    u8 mask_inst;
 | 
				
			||||||
    u8 mask_w;
 | 
					    u8 mask_w;
 | 
				
			||||||
    u64 mask_reg;
 | 
					    u8 mask_reg;
 | 
				
			||||||
    bool has_operands;
 | 
					    bool has_operands;
 | 
				
			||||||
    bool has_displacement;
 | 
					    bool has_displacement;
 | 
				
			||||||
    bool has_data;
 | 
					    bool has_data;
 | 
				
			||||||
@ -122,14 +130,15 @@ InstFormat inst_formats[] =
 | 
				
			|||||||
    // MOV
 | 
					    // MOV
 | 
				
			||||||
    ////////
 | 
					    ////////
 | 
				
			||||||
    // Register/memory to/from register
 | 
					    // Register/memory to/from register
 | 
				
			||||||
    {.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .has_operands=true, .has_displacement=true,
 | 
					    {.id=1, .name="mov", .inst_enc=0b10001000, .mask_inst=0x3, .mask_reg=0b00111000,
 | 
				
			||||||
     .has_d=true, .has_w=true, .has_reg=true, .has_mod=true, .has_rm=true},
 | 
					     .mask_w=0x1, .has_operands=true, .has_displacement=true, .has_d=true, .has_w=true,
 | 
				
			||||||
 | 
					     .has_reg=true, .has_mod=true, .has_rm=true},
 | 
				
			||||||
    // Immediate to register/memory
 | 
					    // Immediate to register/memory
 | 
				
			||||||
    {.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true,
 | 
					    {.id=2, .name="mov", .inst_enc=0b11000110, .mask_inst=0x1, .mask_w=0x1, .has_operands=true,
 | 
				
			||||||
     .has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true},
 | 
					     .has_displacement=true, .has_data=true, .has_w=true, .has_mod=true, .has_rm=true},
 | 
				
			||||||
    // Immediate to register
 | 
					    // Immediate to register
 | 
				
			||||||
    {.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8,
 | 
					    {.id=3, .name="mov", .inst_enc=0b10110000, .mask_inst=0xF, .mask_w=0x8,
 | 
				
			||||||
     .mask_reg=0x0b00000111, .has_data=true, .has_w=true},
 | 
					     .mask_reg=0b00000111, .has_data=true, .has_w=true},
 | 
				
			||||||
    // Memory to accumulator | Accumulator to memory using the `d` bit
 | 
					    // Memory to accumulator | Accumulator to memory using the `d` bit
 | 
				
			||||||
    // even though the manual doesn't specify it
 | 
					    // even though the manual doesn't specify it
 | 
				
			||||||
    {.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1,
 | 
					    {.id=4, .name="mov", .inst_enc=0b10100000, .mask_inst=0x3, .mask_w=0x1,
 | 
				
			||||||
@ -153,35 +162,97 @@ InstFormat inst_formats[] =
 | 
				
			|||||||
Instruction parse_instruction(u8* buf)
 | 
					Instruction parse_instruction(u8* buf)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    u8 inst = buf[0];
 | 
					    u8 inst = buf[0];
 | 
				
			||||||
    InstFormat format;
 | 
					    InstFormat fmt;
 | 
				
			||||||
    bool matched_inst = false;
 | 
					    bool matched_inst = false;
 | 
				
			||||||
    // TODO: This might be a good time to learn how to make a hashtable in C
 | 
					    // TODO: This might be a good time to learn how to make a hashtable in C
 | 
				
			||||||
    for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++)
 | 
					    for (u16 i = 0; i < sizeof(inst_formats) / sizeof(InstFormat); i++)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc)
 | 
					        if ((inst & ~inst_formats[i].mask_inst) == inst_formats[i].inst_enc)
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            format = inst_formats[i];
 | 
					            fmt = inst_formats[i];
 | 
				
			||||||
            matched_inst = true;
 | 
					            matched_inst = true;
 | 
				
			||||||
            break;
 | 
					            break;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    if (!matched_inst)
 | 
					    if (!matched_inst)
 | 
				
			||||||
        return (Instruction){.bytes_read = 0};
 | 
					        return (Instruction){.bytes_read = 0};
 | 
				
			||||||
 | 
					    u8_opt d_opt = none_u8();
 | 
				
			||||||
 | 
					    u8_opt s_opt = none_u8();
 | 
				
			||||||
 | 
					    u8_opt w_opt = none_u8();
 | 
				
			||||||
 | 
					    u8_opt reg_opt = none_u8();
 | 
				
			||||||
 | 
					    u8_opt mod_opt = none_u8();
 | 
				
			||||||
 | 
					    u8_opt rm_opt = none_u8();
 | 
				
			||||||
 | 
					    u16_opt data_opt = none_u16();
 | 
				
			||||||
 | 
					    u16_opt displacement_opt = none_u16();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (fmt.has_d) d_opt = some_u8((inst & 0x2) >> 1);
 | 
				
			||||||
 | 
					    if (fmt.has_s) s_opt = some_u8((inst & 0x2) >> 1);
 | 
				
			||||||
 | 
					    if (fmt.has_rm) rm_opt = some_u8(buf[1] & 0x7);
 | 
				
			||||||
 | 
					    if (fmt.has_mod) mod_opt = some_u8((buf[1] & 0b11000000) >> 6);
 | 
				
			||||||
 | 
					    if (fmt.has_w) w_opt = some_u8(mask_and_shift(buf[0], fmt.mask_w));
 | 
				
			||||||
 | 
					    if (fmt.has_reg)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        u8 reg = fmt.has_operands ? buf[1] : buf[0];
 | 
				
			||||||
 | 
					        reg_opt = some_u8(mask_and_shift(reg, fmt.mask_reg));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (fmt.has_data)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        u8 idx = 1;
 | 
				
			||||||
 | 
					        if (fmt.has_operands) idx += 1;
 | 
				
			||||||
 | 
					        if (fmt.has_displacement) idx += mod_opt.value % 3;
 | 
				
			||||||
 | 
					        u16 data = w_opt.value != 0 ? (i16)buf[idx+1] << 8 | buf[0] : (sbyte)buf[idx];
 | 
				
			||||||
 | 
					        data_opt = some_u16(data);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (fmt.has_displacement && mod_opt.value % 3 > 0)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        u16 disp = mod_opt.value == MODE_MEM_DIS_16
 | 
				
			||||||
 | 
					            ? (i16)buf[3] << 8 | buf[2]
 | 
				
			||||||
 | 
					            : (sbyte)buf[3];
 | 
				
			||||||
 | 
					        displacement_opt = some_u16(disp);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    u16 bytes_read = 1;
 | 
				
			||||||
 | 
					    bytes_read += fmt.has_operands ? 1 : 0;
 | 
				
			||||||
 | 
					        // This is a trick because mod == 1 and mod == 2 will displace one and two bytes
 | 
				
			||||||
 | 
					        // respectively but mod == 3 wraps to 0 since it doesn't displace
 | 
				
			||||||
 | 
					    if (fmt.has_displacement) bytes_read += mod_opt.value % 3;
 | 
				
			||||||
 | 
					    if (fmt.has_data) bytes_read += w_opt.value == 0 ? 1 : 2;
 | 
				
			||||||
    return (Instruction) {
 | 
					    return (Instruction) {
 | 
				
			||||||
        .id = format.id,
 | 
					        .id = fmt.id,
 | 
				
			||||||
        .name = format.name,
 | 
					        .name = fmt.name,
 | 
				
			||||||
        .data = none_u16(),
 | 
					        .data = data_opt,
 | 
				
			||||||
        .displacement = none_u16(),
 | 
					        .displacement = displacement_opt,
 | 
				
			||||||
        .w = none_u8(),
 | 
					        .w = w_opt,
 | 
				
			||||||
        .d = none_u8(),
 | 
					        .d = d_opt,
 | 
				
			||||||
        .s = none_u8(),
 | 
					        .s = s_opt,
 | 
				
			||||||
        .mod = none_u8(),
 | 
					        .mod = mod_opt,
 | 
				
			||||||
        .reg = none_u8(),
 | 
					        .reg = reg_opt,
 | 
				
			||||||
        .rm = none_u8(),
 | 
					        .rm = rm_opt,
 | 
				
			||||||
        .bytes_read = 2,
 | 
					        .bytes_read = bytes_read,
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void decode_instruction(char* str_buf, Instruction inst)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    IF_LET_SOME(u8, mod, inst.mod)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        if (mod == MODE_RGSTR_MODE)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            Register reg = registers[(size_t)inst.reg.value];
 | 
				
			||||||
 | 
					            Register rm = registers[(size_t)inst.rm.value];
 | 
				
			||||||
 | 
					            Register src_reg = inst.d.value == 0 ? reg : rm;
 | 
				
			||||||
 | 
					            Register dst_reg = inst.d.value == 0 ? rm : reg;
 | 
				
			||||||
 | 
					            char *src_name = reg_name(src_reg, inst.w.value);
 | 
				
			||||||
 | 
					            char *dst_name = reg_name(dst_reg, inst.w.value);
 | 
				
			||||||
 | 
					            sprintf(str_buf, "%s %s, %s ;%d", inst.name, dst_name, src_name, inst.id);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    else
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        sprintf(str_buf, "%s ;%d", inst.name, inst.id);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool mov_inst(FILE* f, unsigned char* buf, char inst)
 | 
					bool mov_inst(FILE* f, unsigned char* buf, char inst)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    size_t bytes_read;
 | 
					    size_t bytes_read;
 | 
				
			||||||
@ -205,8 +276,6 @@ bool mov_inst(FILE* f, unsigned char* buf, char inst)
 | 
				
			|||||||
        else
 | 
					        else
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            bool is_direct_addr = mod == 0 && rm == 0b110;
 | 
					            bool is_direct_addr = mod == 0 && rm == 0b110;
 | 
				
			||||||
            // This is a trick because mod == 1 and mod == 2 will displace one and two bytes
 | 
					 | 
				
			||||||
            // respectively but mod == 3 wraps to 0 since it doesn't displace
 | 
					 | 
				
			||||||
            int bytes_to_read = is_direct_addr ? 2 : mod % 3;
 | 
					            int bytes_to_read = is_direct_addr ? 2 : mod % 3;
 | 
				
			||||||
            bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
 | 
					            bytes_read = fread(buf, sizeof(char), bytes_to_read, f);
 | 
				
			||||||
            char* eac_name = is_direct_addr ? "" : get_eac_registers(rm);
 | 
					            char* eac_name = is_direct_addr ? "" : get_eac_registers(rm);
 | 
				
			||||||
@ -381,21 +450,29 @@ int main(int argc, char** argv)
 | 
				
			|||||||
    printf("; Decoded 8086 Assembly Instructions\n\n");
 | 
					    printf("; Decoded 8086 Assembly Instructions\n\n");
 | 
				
			||||||
    printf("bits 16\n\n");
 | 
					    printf("bits 16\n\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    char *inst_str_buf = malloc(sizeof(char) * 256);
 | 
				
			||||||
    u32 bytes_processed = 0;
 | 
					    u32 bytes_processed = 0;
 | 
				
			||||||
    while (bytes_processed < bytes_read)
 | 
					    while (bytes_processed < bytes_read)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        Instruction inst = parse_instruction(buffer+bytes_processed);
 | 
					        Instruction inst = parse_instruction(buffer + bytes_processed);
 | 
				
			||||||
        bytes_processed += inst.bytes_read;
 | 
					 | 
				
			||||||
        // char inst = buffer[0];
 | 
					        // char inst = buffer[0];
 | 
				
			||||||
        // if (mov_inst(f, buffer, inst)) goto handled;
 | 
					        // if (mov_inst(f, buffer, inst)) goto handled;
 | 
				
			||||||
        // if (add_inst(f, buffer, inst)) goto handled;
 | 
					        // if (add_inst(f, buffer, inst)) goto handled;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (inst.name != NULL)
 | 
					        if (inst.bytes_read > 0)
 | 
				
			||||||
            printf("%s ;%d", inst.name, inst.id);
 | 
					        {
 | 
				
			||||||
 | 
					            decode_instruction(inst_str_buf, inst);
 | 
				
			||||||
 | 
					            printf("%s", inst_str_buf);
 | 
				
			||||||
 | 
					            bytes_processed += inst.bytes_read;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        else
 | 
					        else
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            bytes_processed += 1;
 | 
				
			||||||
            fprintf(stderr, "___Unrecognized Instruction___");
 | 
					            fprintf(stderr, "___Unrecognized Instruction___");
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
    // handled:
 | 
					    // handled:
 | 
				
			||||||
        printf("\n");
 | 
					        printf("\n");
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    free(inst_str_buf);
 | 
				
			||||||
    free(buffer);
 | 
					    free(buffer);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										6
									
								
								lib.h
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								lib.h
									
									
									
									
									
								
							@ -23,7 +23,7 @@ enum OptionTag {NONE, SOME};
 | 
				
			|||||||
        enum OptionTag tag;                                             \
 | 
					        enum OptionTag tag;                                             \
 | 
				
			||||||
        union {                                                         \
 | 
					        union {                                                         \
 | 
				
			||||||
            char none;                                                  \
 | 
					            char none;                                                  \
 | 
				
			||||||
            type some;                                                  \
 | 
					            type value;                                                  \
 | 
				
			||||||
        };                                                              \
 | 
					        };                                                              \
 | 
				
			||||||
    } type##_opt;                                                       \
 | 
					    } type##_opt;                                                       \
 | 
				
			||||||
                                                                        \
 | 
					                                                                        \
 | 
				
			||||||
@ -34,13 +34,13 @@ enum OptionTag {NONE, SOME};
 | 
				
			|||||||
                                                                        \
 | 
					                                                                        \
 | 
				
			||||||
    static inline type##_opt some_##type(type value)                    \
 | 
					    static inline type##_opt some_##type(type value)                    \
 | 
				
			||||||
    {                                                                   \
 | 
					    {                                                                   \
 | 
				
			||||||
        return (type##_opt){ .tag = SOME, .some = value };              \
 | 
					        return (type##_opt){ .tag = SOME, .value = value };              \
 | 
				
			||||||
    }                                                                   \
 | 
					    }                                                                   \
 | 
				
			||||||
                                                                        \
 | 
					                                                                        \
 | 
				
			||||||
    static inline int get_some_##type(type##_opt opt, type* out_value)  \
 | 
					    static inline int get_some_##type(type##_opt opt, type* out_value)  \
 | 
				
			||||||
    {                                                                   \
 | 
					    {                                                                   \
 | 
				
			||||||
        if (opt.tag != SOME) return 0;                                  \
 | 
					        if (opt.tag != SOME) return 0;                                  \
 | 
				
			||||||
        *out_value = opt.some;                                          \
 | 
					        *out_value = opt.value;                                          \
 | 
				
			||||||
        return 1;                                                       \
 | 
					        return 1;                                                       \
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user