diff --git a/decoder8086.odin b/decoder8086.odin index faa15b0..adc7e9d 100644 --- a/decoder8086.odin +++ b/decoder8086.odin @@ -16,14 +16,7 @@ Register :: struct { code: u8, } -RegMemMode :: enum { - Memory00 = 0b00, - Memory08 = 0b01, - Memory16 = 0b10, - Register = 0b11, -}; - -OpCode :: enum { +OpName :: enum { MOV, ADD, SUB, @@ -42,35 +35,116 @@ registers := [8]Register { {fullname = "di", bytename = "bh", code = 0b111}, } -Instruction :: struct { +RegInfo :: struct { + in_first_byte: bool, + shift_offset: u8, +} + +LastBit :: struct{} +FourthBit :: struct{} + +WordSize :: union { + None, + LastBit, + FourthBit, +} + +InstructionInfo :: struct { mask: u8, encoding: u8, name: string, desc: string, + has_mod_rm: bool, + word_size: WordSize, + reg_info: Maybe(RegInfo), + has_data: bool, + has_displacement: bool, + has_segreg: bool, + has_flip: bool, + has_explicit_size: bool, + has_accumulator: bool, } -instructions := [?]Instruction { - { mask = 0b11111100, encoding = 0b10001000, name = "mov", desc = "Register/memory to/from register" }, - { mask = 0b11111110, encoding = 0b11000110, name = "mov", desc = "Immediate to register/memory" }, - { mask = 0b11110000, encoding = 0b10110000, name = "mov", desc = "Immediate to register" }, - { mask = 0b11111110, encoding = 0b10100000, name = "mov", desc = "Memory to accumulator" }, - { mask = 0b11111110, encoding = 0b10100010, name = "mov", desc = "Accumulator to memory" }, - { mask = 0b11111111, encoding = 0b10001110, name = "mov", desc = "Register/memory to segment register" }, - { mask = 0b11111111, encoding = 0b10001100, name = "mov", desc = "Segment register to register/memory" }, +reg_first_last := RegInfo{ in_first_byte = true, shift_offset = 0 } +reg_second_middle := RegInfo{ in_first_byte = false, shift_offset = 3 } + +instructions := [?]InstructionInfo { + { name = "mov", desc = "Register/memory to/from register", mask = 0b11111100, encoding = 0b10001000, + has_mod_rm = true, reg_info = reg_second_middle, has_data = false, has_displacement = true, + word_size = LastBit{}, has_flip = true }, + { name = "mov", desc = "Immediate to register/memory", mask = 0b11111110, encoding = 0b11000110, + has_mod_rm = true, reg_info = nil, has_data = true, has_displacement = true, + word_size = LastBit{}, has_explicit_size = true }, + { name = "mov", desc = "Immediate to register", mask = 0b11110000, encoding = 0b10110000, + has_mod_rm = false, reg_info = reg_first_last, has_data = true, has_displacement = false, + word_size = FourthBit{} }, + { name = "mov", desc = "Memory to accumulator", mask = 0b11111110, encoding = 0b10100000, + has_mod_rm = false, reg_info = nil, has_data = true, has_displacement = false, has_flip = true, + word_size = LastBit{}, has_accumulator = true }, + { name = "mov", desc = "Accumulator to memory", mask = 0b11111110, encoding = 0b10100010, + has_mod_rm = false, reg_info = nil, has_data = true, has_displacement = false, has_flip = true, + word_size = LastBit{}, has_accumulator = true }, + { name = "mov", desc = "Register/memory to segment register", mask = 0b11111111, encoding = 0b10001110, + has_mod_rm = true, reg_info = nil, has_segreg = true, has_displacement = true, + word_size = None{} }, + { name = "mov", desc = "Segment register to register/memory", mask = 0b11111111, encoding = 0b10001100, + has_mod_rm = true, reg_info = nil, has_segreg = true, has_displacement = true, + word_size = None{} }, } -ParsedInstruction :: struct { - code: OpCode, - displacement: DisplacementMode, +None :: struct {} + +Disp8 :: i8 +Disp16 :: i16 +Displacement :: union { + None, + Disp8, + Disp16 } -inst_map := make(map[u8]Instruction) +Value8 :: i8 +Value16 :: i16 +Data :: union { + None, + Value8, + Value16 +} + +ModMemory :: struct {} +Mod8BitDisp :: i8 +Mod16BitDisp :: i16 +ModRegister :: struct {} + +ModMode :: union { + ModMemory, + Mod8BitDisp, + Mod16BitDisp, + ModRegister, +} + +RegisterId :: distinct u8 +Immediate8 :: distinct i8 +Immediate16 :: distinct i16 +MemoryAddr :: struct { + addr_id: u8, + displacement: Displacement +} +DirectAddress :: distinct i16 +Accumulator8 :: distinct i8 +Accumulator16 :: distinct i16 +OperandType :: union { + RegisterId, + Immediate8, + Immediate16, + MemoryAddr, + DirectAddress, + Accumulator8, + Accumulator16, +} + +inst_map := make(map[u8]InstructionInfo) RIGHT_ALIGN_AMOUNT := 30 -get_instruction :: proc(bytes: []u8) -> (Instruction, u8) { - return {}, 0 -} - calculate_effective_address :: proc(r_m: u8) -> string { val: string switch r_m { @@ -94,39 +168,6 @@ calculate_effective_address :: proc(r_m: u8) -> string { return val } -ModMemory :: struct {} -Mod8BitDisp :: i8 -Mod16BitDisp :: i16 -ModRegister :: struct {} - -DisplacementMode :: union { - ModMemory, - Mod8BitDisp, - Mod16BitDisp, - ModRegister, -} - -ModField :: struct { - displacement: DisplacementMode -} - -None :: struct {} -Disp8 :: i8 -Disp16 :: i16 -Displacement :: union { - None, - Disp8, - Disp16 -} - -RegisterId :: u8 -Immediate8 :: i8 -Immediate16 :: i16 -MemoryAddr :: struct { - addr_id: u8, - displacement: Displacement -} - get_memory_string :: proc(memoryAddr: MemoryAddr) -> string { disp: string switch value in memoryAddr.displacement { @@ -145,42 +186,46 @@ get_memory_string :: proc(memoryAddr: MemoryAddr) -> string { return text } -MemoryType :: union { - RegisterId, - MemoryAddr -} - -OperandType :: union { - RegisterId, - Immediate8, - Immediate16, - MemoryAddr +get_memory_type_string :: proc(mem_type: OperandType, is_word: bool) -> string { + switch val in mem_type { + case RegisterId: + return is_word ? registers[val].fullname : registers[val].bytename + case Immediate8: + return fmt.aprintf("%d", val) + case Immediate16: + return fmt.aprintf("%d", val) + case MemoryAddr: + return get_memory_string(val) + case DirectAddress: + return fmt.aprintf("[%d]", val) + case Accumulator8: + return fmt.aprintf("[%d]", val) + case Accumulator16: + return fmt.aprintf("[%d]", val) + } + return "" } get_i16 :: proc(data: []u8) -> i16 { return (i16)(data[1]) << 8 | (i16)(data[0]) } -parse_displacement :: proc(data: []u8) -> (displacement: DisplacementMode, disp_amount: int) { +parse_displacement :: proc(data: []u8) -> (displacement: Displacement, disp_amount: int) { mod := (data[0] & 0b11000000) >> 6 - disp: DisplacementMode + disp: Displacement = None{} amount: int switch mod { - case 0: - disp = ModMemory{} case 1: disp = (i8)(data[1]) amount = 1 case 2: disp = get_i16(data[1:]) amount = 2 - case 3: - disp = ModRegister{} } return disp, amount } -get_displacement_string :: proc(displacement: DisplacementMode) -> string { +get_displacement_string :: proc(displacement: Displacement) -> string { disp := "" #partial switch value in displacement { case i8: @@ -195,7 +240,7 @@ get_displacement_string :: proc(displacement: DisplacementMode) -> string { return disp } -try_find_instruction :: proc(b: u8) -> (Instruction, bool) { +try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) { mask: u8 = 0xFF for j in 0..=4 { encoding := b & mask @@ -204,7 +249,7 @@ try_find_instruction :: proc(b: u8) -> (Instruction, bool) { } mask <<= 1 } - return Instruction{}, false + return InstructionInfo{}, false } main :: proc() { @@ -229,20 +274,20 @@ main :: proc() { if false { os.exit(0) } - + // asdf :u16 = 0b1111_0000_1001_0100 + // asdf2 :i16 = (i16)(asdf) + // fmt.printfln("%d", asdf2) read_next := false src_dst := true fmt.println("bits 16\n") idx := 0 for idx < bytes_read { - processed := 0 + processed := 1 curr_byte := data[idx] - inst_name: string - if instruction, ok := try_find_instruction(curr_byte); ok { - inst_name = instruction.name - } else { + instruction, ok := try_find_instruction(curr_byte) + if !ok { txt := "unknown instruction" fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte) idx += 1 @@ -251,149 +296,78 @@ main :: proc() { lhs2: OperandType rhs2: OperandType - lhs: string - rhs: string is_word: bool is_immediate := false flip_dst := false + rm: u8 + mod: u8 + reg: u8 - if curr_byte & 0b11110000 == 0b10110000 { - is_word = curr_byte & 0b0000_1000 != 0 - reg := registers[curr_byte & 0b00000111] - lhs = is_word ? reg.fullname : reg.bytename - processed += is_word ? 1 : 0 - lhs2 := (RegisterId)(reg.code) - rhs2 := (OperandType)(is_word ? ((Immediate16)(get_i16(data[idx+1:]))) : ((Immediate8)(data[idx+1]))) - } else if curr_byte & 0b11111000 == 0b10001000 { - mod_reg_rm := data[idx + 1] - is_word = curr_byte & 1 == 1 + if instruction.has_flip { flip_dst = curr_byte & 2 != 0 - reg := (mod_reg_rm & 0b00111000) >> 3 - rm := mod_reg_rm & 0b00000111 - mod, disp_amount := parse_displacement(data[idx + 1:]) - switch disp_val in mod { - case ModMemory: - lhs2 = (RegisterId)(reg) - rhs2 = MemoryAddr{ addr_id = rm , displacement = None{} } - processed += 1 - case Mod8BitDisp: - lhs2 = (RegisterId)(reg) - rhs2 = MemoryAddr{ addr_id = rm , displacement = disp_val } - processed += 1 - case Mod16BitDisp: - lhs2 = (RegisterId)(reg) - rhs2 = MemoryAddr{ addr_id = rm , displacement = disp_val } - processed += 2 - case ModRegister: - lhs2 = (RegisterId)(rm) - rhs2 = (RegisterId)(reg) - processed += 1 + } + + switch val in instruction.word_size { + case LastBit: is_word = curr_byte & 1 == 1 + case FourthBit: is_word = curr_byte & 0b0000_1000 != 0 + case None: + } + + if reg_info, ok := instruction.reg_info.(RegInfo); ok { + b := reg_info.in_first_byte ? data[idx] : data[idx+1] + reg = (b >> reg_info.shift_offset) & 0b111 + } + + if instruction.has_mod_rm { + mod = data[idx+1] >> 6 + rm = data[idx+1] & 0b00000111 + + processed += 1 + ((int)(mod) % 3) + + if mod == 0 { + if rm == 0b110 { + lhs2 = (DirectAddress)(get_i16(data[idx+2:])) + processed += 2 + } else { + lhs2 = MemoryAddr{ addr_id = rm , displacement = None{} } + } + } else if mod == 1 { + lhs2 = MemoryAddr{ addr_id = rm , displacement = (i8)(data[idx+2]) } + } else if mod == 2 { + lhs2 = MemoryAddr{ addr_id = rm , displacement = get_i16(data[idx+2:]) } + } else if mod == 3 { + lhs2 = (RegisterId)(registers[rm].code) } - dst_reg := registers[rm] + if instruction.has_explicit_size { + imm_idx := idx + 2 + ((int)(mod) % 3) + rhs2 = (OperandType)(is_word ? (Immediate16)(get_i16(data[imm_idx:])) : (Immediate8)(data[imm_idx])) + processed += is_word ? 2 : 1 + } else { + rhs2 = (RegisterId)(reg) + } + } else { + lhs2 = (RegisterId)(registers[reg].code) + if instruction.has_accumulator { + rhs2 = (OperandType)(is_word ? ((Accumulator16)(get_i16(data[idx+1:]))) : ((Accumulator8)(data[idx+1]))) + } else { + rhs2 = (OperandType)(is_word ? ((Immediate16)(get_i16(data[idx+1:]))) : ((Immediate8)(data[idx+1]))) + } + processed += is_word ? 2 : 1 } if flip_dst { lhs2, rhs2 = rhs2, lhs2 } - switch val in lhs2 { - case RegisterId: - lhs = fmt.aprintf("%s", is_word ? registers[val].fullname : registers[val].bytename) - case Immediate8: - lhs = fmt.aprintf("%d", val) - case Immediate16: - lhs = fmt.aprintf("%d", val) - case MemoryAddr: - lhs = get_memory_string(val) - } - switch val in rhs2 { - case RegisterId: - rhs = is_word ? registers[val].fullname : registers[val].bytename - case Immediate8: - rhs = fmt.aprintf("%d", val) - case Immediate16: - rhs = fmt.aprintf("%d", val) - case MemoryAddr: - rhs = get_memory_string(val) - } - full_inst := fmt.aprintf("%s %s, %s", inst_name, lhs, rhs) - processed += 1 - fmt.printf("%s %*[1]s a %08b", full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;", curr_byte) - for i in 0..=processed { - fmt.printf(" %08b", data[processed + 1 + i]) + + lhs := get_memory_type_string(lhs2, is_word) + rhs := get_memory_type_string(rhs2, is_word) + size_string := instruction.has_explicit_size ? is_word ? "word " : "byte " : "" + full_inst := fmt.aprintf("%s %s, %s%s", instruction.name, lhs, size_string, rhs) + fmt.printf("%s %*[1]s a", full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;") + for i in 0..> 3 - rm := next_byte & 0b00000111 - dst_reg := registers[rm] - - displacement, disp_amount := parse_displacement(data[processed + 1:]) - - src_name, dst_name: string - // switch disp_val in displacement { - // case DisplaceMemoryMode: - // src_name = is_word ? registers[rm].fullname : registers[rm].bytename - // if is_imm_mode { - // if is_word { - // dst_name = fmt.aprintf("word %d", get_i16(data[processed+2:])) - // } else { - // dst_name = fmt.aprintf("byte %d", (i8)(data[processed+2])) - // } - // } - // disp_amount += is_word ? 2 : 1 - // case Displace8Bits: - // case Displace16Bits: - // case DisplaceRegisterMode: - // } - // if disp_val, ok := displacement.(DisplaceRegisterMode); ok { - // src_name = is_word ? registers[rm].fullname : registers[rm].bytename - // } else { - // src_name = fmt.aprintf("[%s%s]", calculate_effective_address(rm), get_displacement_string(displacement)) - // } - - if flip_src && !is_imm_mode { src_name, dst_name = dst_name, src_name } - - inst_string := fmt.aprintf("mov %s, %s", src_name, dst_name) - - fmt.printf("%s %*[1]s a %08b", inst_string, RIGHT_ALIGN_AMOUNT - len(inst_string), ";;", curr_byte) - for i in 0..=disp_amount { - fmt.printf(" %08b", data[processed + 1 + i]) - } - fmt.println() - processed += 1 + disp_amount - } else if curr_byte & 0b11110000 == 0b10110000 { - is_word := curr_byte & 0b0000_1000 != 0 - reg := curr_byte & 0b00000111 - dst_name: string - imm: i16 - if is_word { - dst_name = registers[reg].fullname - imm = (i16)(data[processed+2]) << 8 | (i16)(data[processed+1]) - processed += 2 - } else { - dst_name = registers[reg].bytename - imm = (i16)(data[processed+1]) - processed += 1 - } - inst_string := fmt.aprintf("mov %s, %d", dst_name, imm) - fmt.printfln("%s %*[1]s b %08b %08b", inst_string, RIGHT_ALIGN_AMOUNT - len(inst_string), ";; 2", curr_byte, data[processed + 1]) - - } else if curr_byte & 0b11111110 == 0b11000110 { - is_word := curr_byte & 1 != 0 - fmt.printfln("mov [%s], asdf ;; %08b %8b %8b", "", curr_byte, data[processed + 1], data[processed + 2]) - } else { - txt := "unknown instruction" - fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte) - } - processed += 1 } }