diff --git a/decoder8086.odin b/decoder8086.odin index 6afae9a..e7ba68e 100644 --- a/decoder8086.odin +++ b/decoder8086.odin @@ -3,6 +3,7 @@ package decoder_8086 import "core:os" import "core:fmt" import "core:math" +import "core:strings" Register :: struct { fullname: string, @@ -17,11 +18,44 @@ Register :: struct { } OpName :: enum { + TBD, MOV, ADD, SUB, CMP, JMP, + JNZ, + JNGE, + JE, + JZ, + JL, + JLE, + JNG, + JB, + JNAE, + JP, + JPE, + JNA, + JBE, + JO, + JS, + JNE, + JNL, + JGE, + JNLE, + JG, + JNB, + JAE, + JNBE, + JA, + JNP, + JPO, + JNO, + JNS, + LOOP, + LOOPZ, + LOOPNZ, + JCXZ, } registers := [8]Register { @@ -40,6 +74,12 @@ RegInfo :: struct { shift_offset: u8, } +OpCodeId :: enum { + None, + First, + Second, +} + LastBit :: struct{} FourthBit :: struct{} @@ -52,51 +92,93 @@ WordSize :: union { InstructionInfo :: struct { mask: u8, encoding: u8, - name: string, + opname: OpName, desc: string, - has_mod_rm: bool, + opcode_id: OpCodeId, word_size: WordSize, reg_info: Maybe(RegInfo), has_data: bool, - has_displacement: bool, + has_address: bool, + has_accumulator: bool, has_segreg: bool, has_flip: bool, - has_bracketed_immediate: bool, has_explicit_size: bool, has_sign_extension: bool, + is_jump: bool, } reg_first_last := RegInfo{ in_first_byte = true, shift_offset = 0 } reg_second_middle := RegInfo{ in_first_byte = false, shift_offset = 3 } instructions := [?]InstructionInfo { - { name = "mov", desc = "Register/memory to/from register", mask = 0b11111100, encoding = 0b10001000, - has_mod_rm = true, reg_info = reg_second_middle, has_data = false, has_displacement = true, - word_size = LastBit{}, has_flip = true }, - { name = "mov", desc = "Immediate to register/memory", mask = 0b11111110, encoding = 0b11000110, - has_mod_rm = true, reg_info = nil, has_data = true, has_displacement = true, - word_size = LastBit{}, has_bracketed_immediate = false, has_explicit_size = true }, - { name = "mov", desc = "Immediate to register", mask = 0b11110000, encoding = 0b10110000, - has_mod_rm = false, reg_info = reg_first_last, has_data = true, has_displacement = false, - word_size = FourthBit{} }, - { name = "mov", desc = "Memory to accumulator", mask = 0b11111110, encoding = 0b10100000, - has_mod_rm = false, reg_info = nil, has_data = true, has_displacement = false, has_flip = true, - word_size = LastBit{}, has_bracketed_immediate = true }, - { name = "mov", desc = "Accumulator to memory", mask = 0b11111110, encoding = 0b10100010, - has_mod_rm = false, reg_info = nil, has_data = true, has_displacement = false, has_flip = true, - word_size = LastBit{}, has_bracketed_immediate = true }, - { name = "mov", desc = "Register/memory to segment register", mask = 0b11111111, encoding = 0b10001110, - has_mod_rm = true, reg_info = nil, has_segreg = true, has_displacement = true, - word_size = None{} }, - { name = "mov", desc = "Segment register to register/memory", mask = 0b11111111, encoding = 0b10001100, - has_mod_rm = true, reg_info = nil, has_segreg = true, has_displacement = true, - word_size = None{} }, - { name = "add", desc = "Register/memory to/from register", mask = 0b11111100, encoding = 0b00000000, - has_mod_rm = true, reg_info = reg_second_middle, has_data = false, has_displacement = true, - word_size = LastBit{}, has_flip = true }, - { name = "add", desc = "Immediate to register/memory", mask = 0b11111110, encoding = 0b10000000, - has_mod_rm = true, reg_info = nil, has_data = true, has_displacement = true, + { opname = .MOV, desc = "Register/memory to/from register", mask = 0b11111100, encoding = 0b10001000, + reg_info = reg_second_middle, has_address = true, word_size = LastBit{}, has_flip = true }, + { opname = .MOV, desc = "Immediate to register/memory", mask = 0b11111110, encoding = 0b11000110, + has_data = true, has_address = true, word_size = LastBit{}, has_explicit_size = true }, + { opname = .MOV, desc = "Immediate to register", mask = 0b11110000, encoding = 0b10110000, + reg_info = reg_first_last, has_data = true, word_size = FourthBit{} }, + { opname = .MOV, desc = "Memory to accumulator", mask = 0b11111110, encoding = 0b10100000, + has_flip = true, word_size = LastBit{}, has_accumulator = true }, + { opname = .MOV, desc = "Accumulator to memory", mask = 0b11111110, encoding = 0b10100010, + has_flip = true, word_size = LastBit{}, has_accumulator = true }, + { opname = .MOV, desc = "Register/memory to segment register", mask = 0b11111111, encoding = 0b10001110, + has_segreg = true, has_address = true, word_size = None{} }, + { opname = .MOV, desc = "Segment register to register/memory", mask = 0b11111111, encoding = 0b10001100, + has_segreg = true, has_address = true, word_size = None{} }, + { opname = .TBD, desc = "Reg/memory with register to either", mask = 0b11000100, encoding = 0b00000000, + opcode_id = .First, reg_info = reg_second_middle, has_address = true, word_size = LastBit{}, has_flip = true }, + { opname = .TBD, desc = "Immediate to register/memory", mask = 0b11111100, encoding = 0b10000000, + opcode_id = .Second, has_data = true, has_address = true, word_size = LastBit{}, has_sign_extension = true }, + { opname = .TBD, desc = "Immediate to accumulator", mask = 0b11111110, encoding = 0b00000100, + word_size = LastBit{}, has_data = true }, + + { opname = .JE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110100, is_jump = true}, + { opname = .JZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110100, is_jump = true}, + + { opname = .JL, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111100, is_jump = true}, + { opname = .JNGE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111100, is_jump = true}, + + { opname = .JLE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111110, is_jump = true}, + { opname = .JNG, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111110, is_jump = true}, + + { opname = .JB, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110010, is_jump = true}, + { opname = .JNAE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110010, is_jump = true}, + + { opname = .JBE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110110, is_jump = true}, + { opname = .JNA, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110110, is_jump = true}, + + { opname = .JP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111010, is_jump = true}, + { opname = .JPE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111010, is_jump = true}, + + { opname = .JO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110000, is_jump = true}, + { opname = .JS, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111000, is_jump = true}, + + { opname = .JNE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110101, is_jump = true}, + { opname = .JNZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110101, is_jump = true}, + + { opname = .JNL, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111101, is_jump = true}, + { opname = .JGE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111101, is_jump = true}, + + { opname = .JNLE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111111, is_jump = true}, + { opname = .JG, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111111, is_jump = true}, + + { opname = .JNB, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110011, is_jump = true}, + { opname = .JAE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110011, is_jump = true}, + + { opname = .JNBE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110111, is_jump = true}, + { opname = .JA, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110111, is_jump = true}, + + { opname = .JNP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111011, is_jump = true}, + { opname = .JPO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111011, is_jump = true}, + + { opname = .JNO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110001, is_jump = true}, + + { opname = .JNS, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111001, is_jump = true}, + { opname = .LOOP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100010, is_jump = true}, + { opname = .LOOPZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100001, is_jump = true}, + { opname = .LOOPNZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100000, is_jump = true}, + { opname = .JCXZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100011, is_jump = true}, } None :: struct {} @@ -136,11 +218,13 @@ MemoryAddr :: struct { addr_id: u8, displacement: Displacement } +Accumulator :: distinct i16 OperandType :: union { RegisterId, Immediate8, Immediate16, MemoryAddr, + Accumulator, } inst_map := make(map[u8]InstructionInfo) @@ -187,17 +271,19 @@ get_memory_string :: proc(memoryAddr: MemoryAddr) -> string { return text } -get_memory_type_string :: proc(mem_type: OperandType, is_word: bool, bracketed: bool) -> string { +get_memory_type_string :: proc(mem_type: OperandType, is_word: bool) -> string { string_val: string switch val in mem_type { case RegisterId: string_val = is_word ? registers[val].fullname : registers[val].bytename case Immediate8: - string_val = fmt.aprintf(bracketed ? "[%d]" : "%d", val) + string_val = fmt.aprintf("%d", val) case Immediate16: - string_val = fmt.aprintf(bracketed ? "[%d]" : "%d", val) + string_val = fmt.aprintf("%d", val) case MemoryAddr: string_val = get_memory_string(val) + case Accumulator: + string_val = fmt.aprintf("[%d]", val) } return string_val } @@ -237,20 +323,28 @@ get_displacement_string :: proc(displacement: Displacement) -> string { } try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) { - mask: u8 = 0xFF - for j in 0..=4 { - encoding := b & mask - if inst, ok := inst_map[encoding]; ok { + for inst in instructions { + // fmt.print(inst.encoding, ",") + if inst.encoding == (b & inst.mask) { return inst, true } - mask <<= 1 } return InstructionInfo{}, false } +get_opname :: proc(b: u8) -> string { + name: string + switch b & 0b00111000 >> 3 { + case 0b000: name = "add" + case 0b101: name = "sub" + case 0b111: name = "cmp" + } + return name +} + main :: proc() { - f,err := os.open(len(os.args) > 1 ? os.args[1] : "./asm_files/01-02-39.bin") - // f,err := os.open(len(os.args) > 1 ? os.args[1] : "./asm_files/01-02-40.bin") + // f,err := os.open(len(os.args) > 1 ? os.args[1] : "./asm_files/01-02-39.bin") + f,err := os.open(os.args[1]) if err != os.ERROR_NONE { os.exit(1) } @@ -270,13 +364,14 @@ main :: proc() { if false { os.exit(0) } - // asdf :u16 = 0b1111_0000_1001_0100 + // asdf :u16 = 0b00000011_11101000 // asdf2 :i16 = (i16)(asdf) // fmt.printfln("%d", asdf2) read_next := false src_dst := true fmt.println("bits 16\n") idx := 0 + added_label := false for idx < bytes_read { processed := 1 curr_byte := data[idx] @@ -294,7 +389,6 @@ main :: proc() { is_word: bool is_immediate := false flip_dst := false - bracket_operand := instruction.has_bracketed_immediate rm: u8 mod: u8 reg: u8 @@ -316,7 +410,7 @@ main :: proc() { data_idx := idx + 1 - if instruction.has_mod_rm { + if instruction.has_address { mod = data[idx+1] >> 6 rm = data[idx+1] & 0b00000111 @@ -325,8 +419,7 @@ main :: proc() { if mod == 0 { if rm == 0b110 { - lhs2 = (Immediate16)(get_i16(data[idx+2:])) - bracket_operand = true + lhs2 = (Accumulator)(get_i16(data[idx+2:])) processed += 2 } else { lhs2 = MemoryAddr{ addr_id = rm , displacement = None{} } @@ -342,8 +435,15 @@ main :: proc() { lhs2 = (RegisterId)(registers[reg].code) } if instruction.has_data { + word_signed := is_word + if instruction.has_sign_extension { + word_signed = is_word && curr_byte & 0b0000_0010 == 0 + } + processed += word_signed ? 2 : 1 + rhs2 = (OperandType)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx])) + } else if instruction.has_accumulator { processed += is_word ? 2 : 1 - rhs2 = (OperandType)(is_word ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx])) + rhs2 = (OperandType)(is_word ? (Accumulator)(get_i16(data[data_idx:])) : (Accumulator)(data[data_idx])) } else { rhs2 = (RegisterId)(reg) } @@ -352,10 +452,32 @@ main :: proc() { lhs2, rhs2 = rhs2, lhs2 } - lhs := get_memory_type_string(lhs2, is_word, bracket_operand) - rhs := get_memory_type_string(rhs2, is_word, bracket_operand) + lhs := get_memory_type_string(lhs2, is_word) + rhs := get_memory_type_string(rhs2, is_word) size_string := instruction.has_explicit_size ? is_word ? "word " : "byte " : "" - full_inst := fmt.aprintf("%s %s, %s%s", instruction.name, lhs, size_string, rhs) + full_inst: string + opname: string + if instruction.opname == .TBD { + opid: u8 + if instruction.opcode_id == .First { + opid = curr_byte & 0b00_111_000 >> 3 + } else if instruction.opcode_id == .Second { + opid = data[idx+1] & 0b00_111_000 >> 3 + } + opname = strings.to_lower(fmt.aprintf("%s", get_opname(opid))) + } else { + opname = strings.to_lower(fmt.aprintf("%s", instruction.opname)) + } + if instruction.is_jump { + if !added_label { + fmt.println("\nlabel:") + added_label = true + } + full_inst = fmt.aprintf("%s %s", strings.to_lower(opname), "label") + processed += 1 + } else { + full_inst = fmt.aprintf("%s %s, %s%s", strings.to_lower(opname), lhs, size_string, rhs) + } fmt.printf("%s %*[1]s", full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;") for i in 0..