diff --git a/.gitignore b/.gitignore index 57683ad..ff23a1f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /decode /.idea/ /asm_files/*.bin +/8086_family_Users_Manual_1_.pdf diff --git a/asm_files/01-03-41.asm b/asm_files/01-03-41.asm new file mode 100644 index 0000000..25a3407 --- /dev/null +++ b/asm_files/01-03-41.asm @@ -0,0 +1,109 @@ +; ======================================================================== +; LISTING 41 +; ======================================================================== + +bits 16 + +add bx, [bx + si] +add bx, [bp] +add si, 2 +add bp, 2 +add cx, 8 +add bx, [bp + 0] +add cx, [bx + 2] +add bh, [bp + si + 4] +add di, [bp + di + 6] +add [bx + si], bx +add [bp], bx +add [bp + 0], bx +add [bx + 2], cx +add [bp + si + 4], bh +add [bp + di + 6], di +add byte [bx], 34 +add word [bp + si + 1000], 29 +add ax, [bp] +add al, [bx + si] +add ax, bx +add al, ah +add ax, 1000 +add al, -30 +add al, 9 + +sub bx, [bx + si] +sub bx, [bp] +sub si, 2 +sub bp, 2 +sub cx, 8 +sub bx, [bp + 0] +sub cx, [bx + 2] +sub bh, [bp + si + 4] +sub di, [bp + di + 6] +sub [bx+si], bx +sub [bp], bx +sub [bp + 0], bx +sub [bx + 2], cx +sub [bp + si + 4], bh +sub [bp + di + 6], di +sub byte [bx], 34 +sub word [bx + di], 29 +sub ax, [bp] +sub al, [bx + si] +sub ax, bx +sub al, ah +sub ax, 1000 +sub al, -30 +sub al, 9 + +cmp bx, [bx + si] +cmp bx, [bp] +cmp si, 2 +cmp bp, 2 +cmp cx, 8 +cmp bx, [bp + 0] +cmp cx, [bx + 2] +cmp bh, [bp + si + 4] +cmp di, [bp + di + 6] +cmp [bx + si], bx +cmp [bp], bx +cmp [bp + 0], bx +cmp [bx + 2], cx +cmp [bp + si + 4], bh +cmp [bp + di + 6], di +cmp byte [bx], 34 +cmp word [4834], 29 +cmp ax, [bp] +cmp al, [bx + si] +cmp ax, bx +cmp al, ah +cmp ax, 1000 +cmp al, -30 +cmp al, 9 + +test_label0: +jnz test_label1 +jnz test_label0 +test_label1: +jnz test_label0 +jnz test_label1 + +label: +je label +jl label +jle label +jb label +jbe label +jp label +jo label +js label +jne label +jnl label +jg label +jnb label +ja label +jnp label +jno label +jns label +loop label +loopz label +loopnz label +jcxz label diff --git a/decode.c b/decode.c index 2111f9b..0ffcf0b 100644 --- a/decode.c +++ b/decode.c @@ -100,6 +100,161 @@ static inline i16 get_data(unsigned char* buf, char wide) return wide == 1 ? (i16)buf[1] << 8 | buf[0] : (sbyte)buf[0]; } +bool mov_inst(FILE* f, unsigned char* buf, char inst) +{ + size_t bytes_read; + // Register/memory to/from register + if ((inst & ~0x3) == (char)0b10001000) + { + // TODO: We should add some form of error handling here + bytes_read = fread(buf, sizeof(char), 1, f); + char next_byte = buf[0]; + char w = inst & 0b00000001; + char d = (inst & 0b00000010) >> 1; + char mod = (next_byte & 0b11000000) >> 6; + char reg = (next_byte & 0b00111000) >> 3; + char rm = (next_byte & 0b00000111); + if (mod == MODE_RGSTR_MODE) + { + Register src_reg = d == 0 ? registers[(size_t)reg] : registers[(size_t)rm]; + Register dst_reg = d == 0 ? registers[(size_t)rm] : registers[(size_t)reg]; + printf("mov %s, %s ;0", reg_name(dst_reg, w), reg_name(src_reg, w)); + } + else + { + bool is_direct_addr = mod == 0 && rm == 0b110; + // This is a trick because mod == 1 and mod == 2 will displace one and two bytes + // respectively but mod == 3 wraps to 0 since it doesn't displace + int bytes_to_read = is_direct_addr ? 2 : mod % 3; + bytes_read = fread(buf, sizeof(char), bytes_to_read, f); + char* eac_name = is_direct_addr ? "" : get_eac_registers(rm); + char disp_buf[16] = {'\0'}; + if (bytes_to_read > 0) + { + i16 disp = get_data(buf, bytes_to_read - 1); + if (is_direct_addr) sprintf(disp_buf, "%d", abs(disp)); + else sprintf(disp_buf, " %s %d", disp >= 0 ? "+" : "-", abs(disp)); + } + Register rgstr = registers[(size_t)reg]; + if (d) printf("mov %s, [%s%s] ;1", reg_name(rgstr, w), eac_name, disp_buf); + else printf("mov [%s%s], %s ;2", eac_name, disp_buf, reg_name(rgstr, w)); + } + } + // Immediate to register/memory + else if ((inst & ~0x1) == (char)0b11000110) + { + bytes_read = fread(buf, sizeof(char), 1, f); + char w = inst & 0b00000001; + char mod = (buf[0] & 0b11000000) >> 6; + char rm = (buf[0] & 0b00000111); + int bytes_to_read = 1; + bytes_to_read += w == 0 ? 0 : 1; + // Same trick from earlier, see comment + bytes_to_read += mod % 3; + bytes_read = fread(buf, sizeof(char), bytes_to_read, f); + char *eac_name = get_eac_registers(rm); + i16 data = get_data(buf + (char)bytes_to_read - (w == 0 ? 1 : 2), w); + char *word_str = w == 0 ? "byte" : "word"; + char disp_str[16] = {'\0'}; + if (mod % 3 > 1) sprintf(disp_str, " + %d", get_data(buf, (mod % 3) - 1)); + printf("mov [%s%s], %s %d ;3", eac_name, disp_str, word_str, data); + } + // Immediate to register + else if ((inst & ~0xF) == (char)0b10110000) + { + char w = (inst & 0b00001000) >> 3; + Register reg = registers[(size_t)inst & 0b00000111]; + char bytes_to_read = w == 1 ? 2 : 1; + bytes_read = fread(buf, sizeof(char), bytes_to_read, f); + printf("mov %s, %d ;4", reg_name(reg, w), get_data(buf, w)); + } + // Memory/accumulator to accumulator/memory + else if ((inst & ~0x3) == (char)0b10100000) + { + // This instruction uses AX/AL register exclusively + Register ax_al = registers[0]; + char w = (inst & 0b00000001); + // The manual doesn't refer to this as `d` but it acts similarly in that this bit + // swaps the accumulator's src/dst position + char d = (inst & 0b00000010) >> 1; + char bytes_to_read = w == 1 ? 2 : 1; + bytes_read = fread(buf, sizeof(char), bytes_to_read, f); + if (d) printf("mov [%d], %s ;5", get_data(buf, w), reg_name(ax_al, w)); + else printf("mov %s, [%d] ;6", reg_name(ax_al, w), get_data(buf, w)); + } + // Register/memory to segment register or segment register to register/memory + else if ((inst & ~0x3) == (char)0b10001100) + { + // Manual doesn't refer to this as `d` but swaps like in the previous instruction + char d = (inst & 0b00000010) >> 1; + (void)d; + printf("mov regmem to segreg"); + } + else + { + return false; + } + return bytes_read > 0; +} + +bool add_inst(FILE* f, unsigned char* buf, char inst) +{ + size_t bytes_read; + if ((inst & ~0x3) == (char)0b00000000) + { + bytes_read = fread(buf, sizeof(char), 1, f); + char next_byte = buf[0]; + char w = inst & 0b00000001; + char d = (inst & 0b00000010) >> 1; + char mod = (next_byte & 0b11000000) >> 6; + char reg = (next_byte & 0b00111000) >> 3; + char rm = (next_byte & 0b00000111); + // Same trick from earlier, see comment + int bytes_to_read = mod % 3; + if (bytes_to_read > 0) bytes_read = fread(buf, sizeof(char), bytes_to_read, f); + Register rgstr = registers[(size_t)reg]; + (void)rm; + if (mod == MODE_RGSTR_MODE) + { + if (d) printf("add %s, [%d] ;7", reg_name(rgstr, w), get_data(buf, w)); + else printf("add [%d], %s ;8", get_data(buf, w), reg_name(rgstr, w)); + } + else if (mod == MODE_MEM_NO_DIS) + { + if (d) printf("add %s, [%s] ;9", reg_name(rgstr, w), get_eac_registers(rm)); + else printf("add [%s], %s ;10", get_eac_registers(rm), reg_name(rgstr, w)); + } + else + { + if (d) printf("add %s, [%s] ;11", reg_name(rgstr, w), get_eac_registers(rm)); + else printf("add [%s], %s ;12", get_eac_registers(rm), reg_name(rgstr, w)); + } + } + else if ((inst & ~0x3) == (char)0b10000000) + { + bytes_read = fread(buf, sizeof(char), 1, f); + char w = inst & 0b00000001; + char mod = (buf[0] & 0b11000000) >> 6; + char rm = (buf[0] & 0b00000111); + int bytes_to_read = 1; + bytes_to_read += w == 0 ? 1 : 2; + // Same trick from earlier, see comment + bytes_to_read += mod % 3; + bytes_read = fread(buf, sizeof(char), bytes_to_read, f); + char *eac_name = get_eac_registers(rm); + i16 data = get_data(buf + (char)bytes_to_read - (w == 0 ? 1 : 2), w); + char *word_str = w == 0 ? "byte" : "word"; + char disp_str[16] = {'\0'}; + if (mod % 3 > 1) sprintf(disp_str, " + %d", get_data(buf, (mod % 3) - 1)); + printf("add [%s%s], %s %d ;13", eac_name, disp_str, word_str, data); + } + else + { + return false; + } + return bytes_read > 0; +} + int main(int argc, char** argv) { if (argc < 2) @@ -123,99 +278,11 @@ int main(int argc, char** argv) while ((bytes_read = fread(buf, sizeof(char), 1, f)) > 0) { char inst = buf[0]; - // Instruction instruction = 0; - // Register/memory to/from register - if ((inst & ~0x3) == (char)0b10001000) - { - // TODO: We should add some form of error handling here - bytes_read = fread(buf, sizeof(char), 1, f); - char next_byte = buf[0]; - char w = inst & 0b00000001; - char d = (inst & 0b00000010) >> 1; - char mod = (next_byte & 0b11000000) >> 6; - char reg = (next_byte & 0b00111000) >> 3; - char rm = (next_byte & 0b00000111); - size_t reg_idx = reg; - size_t rm_idx = rm; - if (mod == MODE_RGSTR_MODE) - { - Register src_reg = d == 0 ? registers[reg_idx] : registers[rm_idx]; - Register dst_reg = d == 0 ? registers[rm_idx] : registers[reg_idx]; - printf("mov %s, %s ;0", reg_name(dst_reg, w), reg_name(src_reg, w)); - } - else - { - bool is_direct_addr = mod == 0 && rm == 0b110; - // This is a trick because mod == 1 and mod == 2 will displace one and two bytes - // respectively but mod == 3 wraps to 0 since it doesn't displace - int bytes_to_read = is_direct_addr ? 2 : mod % 3; - bytes_read = fread(buf, sizeof(char), bytes_to_read, f); - char* eac_name = is_direct_addr ? "" : get_eac_registers(rm); - char disp_buf[16] = {'\0'}; - if (bytes_to_read > 0) - { - i16 disp = get_data(buf, bytes_to_read - 1); - if (is_direct_addr) sprintf(disp_buf, "%d", abs(disp)); - else sprintf(disp_buf, " %s %d", disp >= 0 ? "+" : "-", abs(disp)); - } - Register rgstr = registers[reg_idx]; - if (d) printf("mov %s, [%s%s] ;1", reg_name(rgstr, w), eac_name, disp_buf); - else printf("mov [%s%s], %s ;2", eac_name, disp_buf, reg_name(rgstr, w)); - } - } - // Immediate to register/memory - else if ((inst & ~0x1) == (char)0b11000110) - { - bytes_read = fread(buf, sizeof(char), 1, f); - char w = inst & 0b00000001; - char mod = (buf[0] & 0b11000000) >> 6; - char rm = (buf[0] & 0b00000111); - int bytes_to_read = 1; - bytes_to_read += w == 0 ? 0 : 1; - // Same trick from earlier, see comment - bytes_to_read += mod % 3; - bytes_read = fread(buf, sizeof(char), bytes_to_read, f); - char *eac_name = get_eac_registers(rm); - i16 data = get_data(buf + (char)bytes_to_read - (w == 0 ? 1 : 2), w); - char *word_str = w == 0 ? "byte" : "word"; - char disp_str[16] = {'\0'}; - if (mod % 3 > 1) sprintf(disp_str, " + %d", get_data(buf, (mod % 3) - 1)); - printf("mov [%s%s], %s %d ;3", eac_name, disp_str, word_str, data); - } - // Immediate to register - else if ((inst & ~0xF) == (char)0b10110000) - { - char w = (inst & 0b00001000) >> 3; - Register reg = registers[(size_t)inst & 0b00000111]; - char bytes_to_read = w == 1 ? 2 : 1; - bytes_read = fread(buf, sizeof(char), bytes_to_read, f); - printf("mov %s, %hd ;4", reg_name(reg, w), get_data(buf, w)); - } - // Memory to accumulator - else if ((inst & ~0x1) == (char)0b10100000) - { - printf("mov mem to acc"); - } - // Accumulator to memory - else if ((inst & ~0x1) == (char)0b10100010) - { - printf("mov acc to mem"); - } - // Register/memory to segment register - else if (inst == (char)0b10001110) - { - printf("mov regmem to segreg"); - } - // Segment register to register/memory - else if (inst == (char)0b10001100) - { - printf("mov segreg to regmem"); - } - else - { - fprintf(stderr, "Unrecognized Instruction"); - } + if (mov_inst(f, buf, inst)) goto handled; + if (add_inst(f, buf, inst)) goto handled; + fprintf(stderr, "___Unrecognized Instruction___"); + handled: printf("\n"); } } diff --git a/implicit_use_general_registers.png b/implicit_use_general_registers.png new file mode 100644 index 0000000..61f152e Binary files /dev/null and b/implicit_use_general_registers.png differ