package decoder_8086 import "core:os" import "core:fmt" import "core:math" import "core:strings" Register :: struct { fullname: string, bytename: string, value: struct #raw_union { using _: struct { low, high: byte, }, full: u16, }, code: u8, } registers := [8]Register { {fullname = "ax", bytename = "al", code = 0b000}, {fullname = "cx", bytename = "cl", code = 0b001}, {fullname = "dx", bytename = "dl", code = 0b010}, {fullname = "bx", bytename = "bl", code = 0b011}, {fullname = "sp", bytename = "ah", code = 0b100}, {fullname = "bp", bytename = "ch", code = 0b101}, {fullname = "si", bytename = "dh", code = 0b110}, {fullname = "di", bytename = "bh", code = 0b111}, } segment_registers := [4]Register { {fullname = "es", code = 0b000}, {fullname = "cs", code = 0b001}, {fullname = "ss", code = 0b010}, {fullname = "ds", code = 0b011}, } variable_port := registers[2] RegInfo :: struct { in_first_byte: bool, shift_offset: u8, } LastBit :: struct{} FourthBit :: struct{} Force :: struct{} WordSize :: union { None, LastBit, FourthBit, Force, } WordSize2 :: enum { None, LastBit, FourthBit, Always8, Always16, Unsigned8, } None :: struct {} Disp8 :: i8 Disp16 :: i16 Displacement :: union { None, Disp8, Disp16 } Value8 :: i8 Value16 :: i16 Data :: union { None, Value8, Value16 } ModMemory :: struct {} Mod8BitDisp :: i8 Mod16BitDisp :: i16 ModRegister :: struct {} ModMode :: union { ModMemory, Mod8BitDisp, Mod16BitDisp, ModRegister, } RegisterId :: distinct u8 Immediate8 :: distinct i8 Immediate16 :: distinct i16 MemoryAddr :: struct { addr_id: u8, displacement: Displacement } DirectAddress :: distinct i16 SegmentRegister :: distinct i8 Jump :: distinct i8 VariablePort :: struct {} Repeat :: string Operand :: union { None, RegisterId, Immediate8, Immediate16, MemoryAddr, DirectAddress, SegmentRegister, Jump, VariablePort, Repeat, } OperandInfo :: enum { None, Register, SegmentRegister, RegisterMemory, Immediate, Accumulator, DirectAddress, Jump, VariablePort, ShiftRotate, Repeat, } RegisterEncodingBits :: enum { None, FirstByteLast3, SecondByteMiddle3, SecondByteLast3, FirstByteMiddle3, } InstructionInfo :: struct { mask: u8, encoding: u8, opname: OpName, desc: string, src: OperandInfo, dst: OperandInfo, word_size: WordSize2, reg_info: RegisterEncodingBits, has_flip: bool, has_sign_extension: bool, check_second_encoding: bool, consume_extra_bytes: int, shift_rotate_flag: bool, } RIGHT_ALIGN_AMOUNT := 35 calculate_effective_address :: proc(r_m: u8) -> string { val: string switch r_m { case 0b000: val = "bx + si" case 0b001: val = "bx + di" case 0b010: val = "bp + si" case 0b011: val = "bp + di" case 0b100: val = "si" case 0b101: val = "di" case 0b110: val = "bp" case 0b111: val = "bx" } return val } get_memory_string :: proc(memoryAddr: MemoryAddr) -> string { disp: string switch value in memoryAddr.displacement { case None: disp = "" case Disp8: if value != 0 { disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) } case Disp16: if value != 0 { disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) } } text := fmt.aprintf("[%s%s]", calculate_effective_address(memoryAddr.addr_id), disp) return text } get_operand_string :: proc(operand: Operand, is_word: bool) -> string { string_val: string switch val in operand { case None: string_val = "" case RegisterId: string_val = is_word ? registers[val].fullname : registers[val].bytename case Immediate8: string_val = fmt.aprintf("%d", val) case Immediate16: string_val = fmt.aprintf("%d", val) case MemoryAddr: string_val = get_memory_string(val) case DirectAddress: string_val = fmt.aprintf("[%d]", val) case SegmentRegister: string_val = segment_registers[val].fullname case Jump: string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val) case VariablePort: string_val = variable_port.fullname case Repeat: string_val = (string)(val) } return string_val } get_i16 :: proc(data: []u8) -> i16 { return (i16)(data[1]) << 8 | (i16)(data[0]) } parse_displacement :: proc(data: []u8) -> (displacement: Displacement, disp_amount: int) { mod := (data[0] & 0b11000000) >> 6 disp: Displacement = None{} amount: int switch mod { case 1: disp = (i8)(data[1]) amount = 1 case 2: disp = get_i16(data[1:]) amount = 2 } return disp, amount } get_displacement_string :: proc(displacement: Displacement) -> string { disp := "" #partial switch value in displacement { case i8: if value != 0 { disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) } case i16: if value != 0 { disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) } } return disp } get_repeat_op :: proc(data: u8) -> Repeat { bits := (data & 0b1110) >> 1 w := (data & 0b1) == 1 ? "w" : "b" rep: string switch bits { case 0b010: rep = "movs" case 0b011: rep = "cmps" case 0b101: rep = "stos" case 0b110: rep = "lods" case 0b111: rep = "scas" } return Repeat(fmt.aprintf("%s%s", rep, w)) } try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) { for inst in instructions { if inst.encoding == (b & inst.mask) { return inst, true } } return InstructionInfo{}, false } get_opname :: proc(opname: OpName, data: []u8) -> string { name: string if opname == .TBD2 { switch data[1] & 0b00111000 >> 3 { case 0b000: name = "inc" case 0b001: name = "dec" case 0b010: name = "call" case 0b011: name = "call" case 0b100: name = "jmp" case 0b101: name = "jmp" case 0b110: name = "push" } } else if opname == .TBD5 { switch data[1] & 0b00111000 >> 3 { case 0b000: name = "test" case 0b001: name = "dec" case 0b010: name = "not" case 0b011: name = "neg" case 0b100: name = "mul" case 0b101: name = "imul" case 0b110: name = "div" case 0b111: name = "idiv" } } else if opname == .TBD6 { switch data[1] & 0b00111000 >> 3 { case 0b000: name = "rol" case 0b001: name = "ror" case 0b010: name = "rcl" case 0b011: name = "rcr" case 0b100: name = "shl" case 0b101: name = "shr" case 0b111: name = "sar" } } else { bits: u8 if opname == .TBD1 || opname == .TBD3 { bits = data[0] & 0b00111000 >> 3 } else { bits = data[1] & 0b00111000 >> 3 } switch bits { case 0b000: name = "add" case 0b001: name = "or" case 0b010: name = "adc" case 0b011: name = "sbb" case 0b100: name = "and" case 0b101: name = "sub" case 0b110: name = "xor" case 0b111: name = "cmp" } } return name } parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, processed: ^int, word: bool) -> Operand { operand: Operand = None{} switch opinfo { case .None: case .Register: // rm: u8 = data[1] & 0b111 // dst_opr = (RegisterId)(registers[rm].code) reg: u8 // Read the RegisterEncodingBits switch inst.reg_info { case .None: // panic("Register is required but the encoded location is not provided") case .FirstByteLast3: reg = data[0] & 0b111 case .FirstByteMiddle3: reg = (data[0] >> 3) & 0b111 case .SecondByteMiddle3: reg = (data[1] >> 3) & 0b111 case .SecondByteLast3: reg = data[1] & 0b111 } operand = (RegisterId)(registers[reg].code) case .SegmentRegister: reg: u8 switch inst.reg_info { case .None: // panic("Register is required but the encoded location is not provided") case .FirstByteLast3: reg = data[0] & 0b111 case .FirstByteMiddle3: reg = (data[0] >> 3) & 0b111 case .SecondByteMiddle3: reg = (data[1] >> 3) & 0b111 case .SecondByteLast3: reg = data[1] & 0b111 } operand = (SegmentRegister)(segment_registers[reg].code) case .RegisterMemory: mod := data[1] >> 6 rm := data[1] & 0b111 processed^ += 1 op: Operand if mod == 0 { if rm == 0b110 { op = (DirectAddress)(get_i16(data[2:])) processed^ += 2 } else { op = MemoryAddr{ addr_id = rm , displacement = None{} } } } else if mod == 1 { op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) } processed^ += 1 } else if mod == 2 { op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) } processed^ += 2 } else if mod == 3 { op = (RegisterId)(registers[rm].code) } operand = op case .Immediate: data_idx := processed^ word_signed := word if inst.has_sign_extension { word_signed &&= data[0] & 0b0000_0010 == 0 } operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx])) processed^ += word_signed ? 2 : 1 case .Accumulator: operand = (RegisterId)(registers[0].code) case .DirectAddress: operand = (DirectAddress)(get_i16(data[1:])) processed^ += 2 case .Jump: processed^ += 1 // NOTE: In order to mimic the label offset, you have to take the value you got and add two operand = (Jump)((i8)(data[1]) + 2) case .VariablePort: operand = VariablePort{} case .ShiftRotate: v_flag := data[0] & 0b10 != 0 operand = v_flag ? (RegisterId)(registers[1].code) : (Immediate8)(1) case .Repeat: operand = get_repeat_op(data[1]) processed^ += 1 } return operand } main :: proc() { f,err := os.open(os.args[1]) if err != os.ERROR_NONE { fmt.eprintln("ERROR:", err) os.exit(1) } defer os.close(f) data := make([]u8, 1024) bytes_read, err2 := os.read(f, data) if err2 != nil { // ... os.exit(1) } if false { os.exit(0) } // asdf :u16 = 0b00000011_11101000 // asdf2 :i16 = (i16)(asdf) // fmt.printfln("%d", asdf2) print_at_end := false read_next := false src_dst := true idx := 0 added_label := false line_count := 0 has_lock: bool has_segment: bool last_opname: [3]byte repeating_op_count := 0 instruction_builder := strings.builder_make() instruction_list := make([dynamic]string, 512) fmt.println("bits 16") for idx < bytes_read { processed := 1 curr_byte := data[idx] inst, ok := try_find_instruction(curr_byte) if !ok { txt := "unknown instruction" if print_at_end { line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte) instruction_list[line_count] = line line_count += 1 } else { fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte) } idx += 1 continue } // Here we check if the instruction affects the next instruction if inst.opname == .LOCK { has_lock = true idx += 1 continue } else if inst.opname == .SEGMENT { has_segment = true idx += 1 continue } has_segment = false src_opr: Operand dst_opr: Operand word: bool flip: bool op: Operand if inst.has_flip { flip = curr_byte & 2 != 0 } #partial switch inst.word_size { case .LastBit: word = curr_byte & 1 == 1 case .FourthBit: word = curr_byte & 0b0000_1000 != 0 case .Always16: word = true } opname: string // TODO: Figure out a way to do this in the string builder if inst.check_second_encoding { opname = strings.to_lower(fmt.aprintf("%s", get_opname(inst.opname, data[idx:]))) // NOTE: This is a special case because it matches the bit pattern of .TBD5, // but the instruction itself is different if opname == "not" { inst = not_inst } } else { opname = strings.to_lower(fmt.aprintf("%s", inst.opname)) } dst_opr = parse_operand(inst, inst.dst, data[idx:], &processed, word) src_opr = parse_operand(inst, inst.src, data[idx:], &processed, word) // TODO: This is ugly as hell _,ok_1 := src_opr.(Immediate8) _,ok_2 := src_opr.(Immediate16) _,ok_3 := dst_opr.(MemoryAddr); _,ok_4 := dst_opr.(DirectAddress); shiftrot := inst.src == .ShiftRotate size_string := "" if ((ok_1 || ok_2) && (ok_3 || ok_4)) || ((ok_3 || ok_4) && shiftrot) { size_string = word ? "word " : "byte " } if flip { src_opr, dst_opr = dst_opr, src_opr } dst_str := get_operand_string(dst_opr, word) src_str := get_operand_string(src_opr, word) full_inst: string if dst_str == "" { _,ok_1 := src_opr.(MemoryAddr); _,ok_2 := src_opr.(DirectAddress); if (ok_1 || ok_2) && inst.word_size != .Always16 { size_string = word ? "word " : "byte " } full_inst = fmt.aprintf("%s %s%s", opname, size_string, src_str) } else { // NOTE: I don't know why this is the case, but only the move has the word/byte // keyword next to the immediate, but other instructions have it on the memory address if opname == "mov" { full_inst = fmt.aprintf("%s %s, %s%s", opname, dst_str, size_string, src_str) } else { full_inst = fmt.aprintf("%s %s%s, %s", opname, size_string, dst_str, src_str) } } processed += inst.consume_extra_bytes // fmt.sbprintf(&instruction_builder, "%s%s%s %*[2]s", lock_string, seg_string, full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;") fmt.sbprintf(&instruction_builder, "%s %*[1]s", full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;") for i in 0.. 1 { fmt.println() } repeating_op_count = 0 } else { repeating_op_count += 1 } copy(last_opname[:], op2[0:3]) fmt.println(op2) idx += processed strings.builder_reset(&instruction_builder) } if print_at_end { for i in 0..