package decoder_8086 import "core:os" import "core:fmt" import "core:math" import "core:strings" Register :: struct { fullname: string, bytename: string, value: struct #raw_union { using _: struct { low, high: byte, }, full: u16, }, code: u8, } OpName :: enum { TBD, MOV, ADD, SUB, CMP, JMP, JNZ, JNGE, JE, JZ, JL, JLE, JNG, JB, JNAE, JP, JPE, JNA, JBE, JO, JS, JNE, JNL, JGE, JNLE, JG, JNB, JAE, JNBE, JA, JNP, JPO, JNO, JNS, LOOP, LOOPZ, LOOPNZ, JCXZ, } registers := [8]Register { {fullname = "ax", bytename = "al", code = 0b000}, {fullname = "cx", bytename = "cl", code = 0b001}, {fullname = "dx", bytename = "dl", code = 0b010}, {fullname = "bx", bytename = "bl", code = 0b011}, {fullname = "sp", bytename = "ah", code = 0b100}, {fullname = "bp", bytename = "ch", code = 0b101}, {fullname = "si", bytename = "dh", code = 0b110}, {fullname = "di", bytename = "bh", code = 0b111}, } RegInfo :: struct { in_first_byte: bool, shift_offset: u8, } OpCodeId :: enum { None, First, Second, } LastBit :: struct{} FourthBit :: struct{} WordSize :: union { None, LastBit, FourthBit, } InstructionInfo :: struct { mask: u8, encoding: u8, opname: OpName, desc: string, opcode_id: OpCodeId, word_size: WordSize, reg_info: Maybe(RegInfo), has_data: bool, has_address: bool, has_accumulator: bool, has_segreg: bool, has_flip: bool, has_sign_extension: bool, is_jump: bool, } // TODO: Maybe we can get rid of it since I don't have to specify the shift_offset, // not like it changes a lot reg_first_last := RegInfo{ in_first_byte = true, shift_offset = 0 } reg_second_middle := RegInfo{ in_first_byte = false, shift_offset = 3 } instructions := [?]InstructionInfo { { opname = .MOV, desc = "Register/memory to/from register", mask = 0b11111100, encoding = 0b10001000, reg_info = reg_second_middle, has_address = true, word_size = LastBit{}, has_flip = true }, { opname = .MOV, desc = "Immediate to register/memory", mask = 0b11111110, encoding = 0b11000110, has_data = true, has_address = true, word_size = LastBit{}, }, { opname = .MOV, desc = "Immediate to register", mask = 0b11110000, encoding = 0b10110000, reg_info = reg_first_last, has_data = true, word_size = FourthBit{} }, { opname = .MOV, desc = "Memory to accumulator", mask = 0b11111110, encoding = 0b10100000, has_flip = true, word_size = LastBit{}, has_accumulator = true }, { opname = .MOV, desc = "Accumulator to memory", mask = 0b11111110, encoding = 0b10100010, has_flip = true, word_size = LastBit{}, has_accumulator = true }, { opname = .MOV, desc = "Register/memory to segment register", mask = 0b11111111, encoding = 0b10001110, has_segreg = true, has_address = true, word_size = None{} }, { opname = .MOV, desc = "Segment register to register/memory", mask = 0b11111111, encoding = 0b10001100, has_segreg = true, has_address = true, word_size = None{} }, { opname = .TBD, desc = "Reg/memory with register to either", mask = 0b11000100, encoding = 0b00000000, opcode_id = .First, reg_info = reg_second_middle, has_address = true, word_size = LastBit{}, has_flip = true }, { opname = .TBD, desc = "Immediate to register/memory", mask = 0b11111100, encoding = 0b10000000, opcode_id = .Second, has_data = true, has_address = true, word_size = LastBit{}, has_sign_extension = true }, { opname = .TBD, desc = "Immediate to accumulator", mask = 0b11000100, encoding = 0b00000100, word_size = LastBit{}, has_data = true }, { opname = .JE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110100, is_jump = true}, { opname = .JZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110100, is_jump = true}, { opname = .JL, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111100, is_jump = true}, { opname = .JNGE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111100, is_jump = true}, { opname = .JLE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111110, is_jump = true}, { opname = .JNG, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111110, is_jump = true}, { opname = .JB, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110010, is_jump = true}, { opname = .JNAE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110010, is_jump = true}, { opname = .JBE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110110, is_jump = true}, { opname = .JNA, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110110, is_jump = true}, { opname = .JP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111010, is_jump = true}, { opname = .JPE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111010, is_jump = true}, { opname = .JO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110000, is_jump = true}, { opname = .JS, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111000, is_jump = true}, { opname = .JNE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110101, is_jump = true}, { opname = .JNZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110101, is_jump = true}, { opname = .JNL, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111101, is_jump = true}, { opname = .JGE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111101, is_jump = true}, { opname = .JNLE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111111, is_jump = true}, { opname = .JG, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111111, is_jump = true}, { opname = .JNB, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110011, is_jump = true}, { opname = .JAE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110011, is_jump = true}, { opname = .JNBE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110111, is_jump = true}, { opname = .JA, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110111, is_jump = true}, { opname = .JNP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111011, is_jump = true}, { opname = .JPO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111011, is_jump = true}, { opname = .JNO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110001, is_jump = true}, { opname = .JNS, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111001, is_jump = true}, { opname = .LOOP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100010, is_jump = true}, { opname = .LOOPZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100001, is_jump = true}, { opname = .LOOPNZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100000, is_jump = true}, { opname = .JCXZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100011, is_jump = true}, } None :: struct {} Disp8 :: i8 Disp16 :: i16 Displacement :: union { None, Disp8, Disp16 } Value8 :: i8 Value16 :: i16 Data :: union { None, Value8, Value16 } ModMemory :: struct {} Mod8BitDisp :: i8 Mod16BitDisp :: i16 ModRegister :: struct {} ModMode :: union { ModMemory, Mod8BitDisp, Mod16BitDisp, ModRegister, } RegisterId :: distinct u8 Immediate8 :: distinct i8 Immediate16 :: distinct i16 MemoryAddr :: struct { addr_id: u8, displacement: Displacement } Accumulator :: distinct i16 OperandType :: union { RegisterId, Immediate8, Immediate16, MemoryAddr, Accumulator, } inst_map := make(map[u8]InstructionInfo) RIGHT_ALIGN_AMOUNT := 35 calculate_effective_address :: proc(r_m: u8) -> string { val: string switch r_m { case 0b000: val = "bx + si" case 0b001: val = "bx + di" case 0b010: val = "bp + si" case 0b011: val = "bp + di" case 0b100: val = "si" case 0b101: val = "di" case 0b110: val = "bp" case 0b111: val = "bx" } return val } get_memory_string :: proc(memoryAddr: MemoryAddr) -> string { disp: string switch value in memoryAddr.displacement { case None: disp = "" case Disp8: if value != 0 { disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) } case Disp16: if value != 0 { disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) } } text := fmt.aprintf("[%s%s]", calculate_effective_address(memoryAddr.addr_id), disp) return text } get_memory_type_string :: proc(mem_type: OperandType, is_word: bool) -> string { string_val: string switch val in mem_type { case RegisterId: string_val = is_word ? registers[val].fullname : registers[val].bytename case Immediate8: string_val = fmt.aprintf("%d", val) case Immediate16: string_val = fmt.aprintf("%d", val) case MemoryAddr: string_val = get_memory_string(val) case Accumulator: string_val = fmt.aprintf("[%d]", val) } return string_val } get_i16 :: proc(data: []u8) -> i16 { return (i16)(data[1]) << 8 | (i16)(data[0]) } parse_displacement :: proc(data: []u8) -> (displacement: Displacement, disp_amount: int) { mod := (data[0] & 0b11000000) >> 6 disp: Displacement = None{} amount: int switch mod { case 1: disp = (i8)(data[1]) amount = 1 case 2: disp = get_i16(data[1:]) amount = 2 } return disp, amount } get_displacement_string :: proc(displacement: Displacement) -> string { disp := "" #partial switch value in displacement { case i8: if value != 0 { disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) } case i16: if value != 0 { disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value)) } } return disp } try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) { for inst in instructions { // fmt.print(inst.encoding, ",") if inst.encoding == (b & inst.mask) { return inst, true } } return InstructionInfo{}, false } get_opname :: proc(b: u8) -> string { name: string switch b & 0b00111000 >> 3 { case 0b000: name = "add" case 0b101: name = "sub" case 0b111: name = "cmp" } return name } main :: proc() { // f,err := os.open(len(os.args) > 1 ? os.args[1] : "./asm_files/01-02-39.bin") f,err := os.open(os.args[1]) if err != os.ERROR_NONE { fmt.eprintln("ERROR:", err) os.exit(1) } defer os.close(f) data := make([]u8, 512) bytes_read, err2 := os.read(f, data) if err2 != nil { // ... os.exit(1) } for inst in instructions { inst_map[inst.encoding] = inst } if false { os.exit(0) } // asdf :u16 = 0b00000011_11101000 // asdf2 :i16 = (i16)(asdf) // fmt.printfln("%d", asdf2) read_next := false src_dst := true idx := 0 added_label := false line_count := 0 instruction_builder := strings.builder_make() instruction_list := make([dynamic]string, 128) for idx < bytes_read { processed := 1 curr_byte := data[idx] instruction, ok := try_find_instruction(curr_byte) if !ok { txt := "unknown instruction" line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte) instruction_list[line_count] = line line_count += 1 idx += 1 continue } lhs2: OperandType rhs2: OperandType is_word: bool is_immediate := false flip_dst := false has_memory_addr := false has_immediate := false rm: u8 mod: u8 reg: u8 if instruction.has_flip { flip_dst = curr_byte & 2 != 0 } switch val in instruction.word_size { case LastBit: is_word = curr_byte & 1 == 1 case FourthBit: is_word = curr_byte & 0b0000_1000 != 0 case None: } if reg_info, ok := instruction.reg_info.(RegInfo); ok { b := reg_info.in_first_byte ? data[idx] : data[idx+1] reg = (b >> reg_info.shift_offset) & 0b111 } data_idx := idx + 1 if instruction.has_address { mod = data[idx+1] >> 6 rm = data[idx+1] & 0b00000111 data_idx += 1 + ((int)(mod) % 3) processed += 1 + ((int)(mod) % 3) if mod == 0 { if rm == 0b110 { lhs2 = (Accumulator)(get_i16(data[idx+2:])) processed += 2 data_idx += 2 } else { lhs2 = MemoryAddr{ addr_id = rm , displacement = None{} } } // NOTE: This also works when it's an Accumulator apparently has_memory_addr = true } else if mod == 1 { lhs2 = MemoryAddr{ addr_id = rm , displacement = (i8)(data[idx+2]) } has_memory_addr = true } else if mod == 2 { lhs2 = MemoryAddr{ addr_id = rm , displacement = get_i16(data[idx+2:]) } has_memory_addr = true } else if mod == 3 { lhs2 = (RegisterId)(registers[rm].code) } } else { lhs2 = (RegisterId)(registers[reg].code) } if instruction.has_data { word_signed := is_word if instruction.has_sign_extension { word_signed = is_word && curr_byte & 0b0000_0010 == 0 } processed += word_signed ? 2 : 1 rhs2 = (OperandType)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx])) has_immediate = true } else if instruction.has_accumulator { processed += is_word ? 2 : 1 rhs2 = (OperandType)(is_word ? (Accumulator)(get_i16(data[data_idx:])) : (Accumulator)(data[data_idx])) } else { rhs2 = (RegisterId)(reg) } if flip_dst { lhs2, rhs2 = rhs2, lhs2 } lhs := get_memory_type_string(lhs2, is_word) rhs := get_memory_type_string(rhs2, is_word) size_string := has_immediate && has_memory_addr ? is_word ? "word " : "byte " : "" full_inst: string opname: string if instruction.opname == .TBD { if instruction.opcode_id == .Second { opname = strings.to_lower(fmt.aprintf("%s", get_opname(data[idx+1]))) } else { opname = strings.to_lower(fmt.aprintf("%s", get_opname(curr_byte))) } } else { opname = strings.to_lower(fmt.aprintf("%s", instruction.opname)) } if instruction.is_jump { // NOTE: In order to mimic the label offset, you have to take the value you got and add two value := (i8)(data[idx+1]) + 2 full_inst = fmt.aprintf("%s $%s%d ; %d", strings.to_lower(opname), value >= 0 ? "+" : "", value, value - 2) processed += 1 } else { opname = strings.to_lower(opname) if opname == "mov" { full_inst = fmt.aprintf("%s %s, %s%s", opname, lhs, size_string, rhs) } else { full_inst = fmt.aprintf("%s %s%s, %s", opname, size_string, lhs, rhs) } } fmt.sbprintf(&instruction_builder, "%s %*[1]s", full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;") for i in 0..