diff --git a/.gitignore b/.gitignore index 1a0dc2f..46c75fa 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ /asm_files/*.bin /8086_family_Users_Manual_1_.pdf /decoder8086 +/performance-aware diff --git a/decoder8086.odin b/decoder8086.odin index 132b1e1..6d1a0fc 100644 --- a/decoder8086.odin +++ b/decoder8086.odin @@ -17,57 +17,6 @@ Register :: struct { code: u8, } -OpName :: enum { - TBD, - MOV, - PUSH, - POP, - XCHG, - IN, - OUT, - XLAT, - LEA, - LDS, - LES, - ADD, - ADC, - SUB, - CMP, - JMP, - JNZ, - JNGE, - JE, - JZ, - JL, - JLE, - JNG, - JB, - JNAE, - JP, - JPE, - JNA, - JBE, - JO, - JS, - JNE, - JNL, - JGE, - JNLE, - JG, - JNB, - JAE, - JNBE, - JA, - JNP, - JPO, - JNO, - JNS, - LOOP, - LOOPZ, - LOOPNZ, - JCXZ, -} - registers := [8]Register { {fullname = "ax", bytename = "al", code = 0b000}, {fullname = "cx", bytename = "cl", code = 0b001}, @@ -86,17 +35,13 @@ segment_registers := [4]Register { {fullname = "ds", code = 0b011}, } +variable_port := registers[2] + RegInfo :: struct { in_first_byte: bool, shift_offset: u8, } -OpCodeId :: enum { - None, - First, - Second, -} - LastBit :: struct{} FourthBit :: struct{} Force :: struct{} @@ -108,6 +53,15 @@ WordSize :: union { Force, } +WordSize2 :: enum { + None, + LastBit, + FourthBit, + Always8, + Always16, + Unsigned8, +} + None :: struct {} Disp8 :: i8 @@ -145,15 +99,44 @@ MemoryAddr :: struct { addr_id: u8, displacement: Displacement } -Accumulator :: distinct i16 +DirectAddress :: distinct i16 SegmentRegister :: distinct i8 -OperandType :: union { +Jump :: distinct i8 +VariablePort :: struct {} +Repeat :: string +Operand :: union { + None, RegisterId, Immediate8, Immediate16, MemoryAddr, - Accumulator, + DirectAddress, SegmentRegister, + Jump, + VariablePort, + Repeat, +} + +OperandInfo :: enum { + None, + Register, + SegmentRegister, + RegisterMemory, + Immediate, + Accumulator, + DirectAddress, + Jump, + VariablePort, + ShiftRotate, + Repeat, +} + +RegisterEncodingBits :: enum { + None, + FirstByteLast3, + SecondByteMiddle3, + SecondByteLast3, + FirstByteMiddle3, } InstructionInfo :: struct { @@ -161,129 +144,17 @@ InstructionInfo :: struct { encoding: u8, opname: OpName, desc: string, - opcode_id: OpCodeId, - word_size: WordSize, - reg_info: Maybe(RegInfo), - has_data: bool, - has_address: bool, - uses_accumulator: bool, - has_segreg: bool, + src: OperandInfo, + dst: OperandInfo, + word_size: WordSize2, + reg_info: RegisterEncodingBits, has_flip: bool, has_sign_extension: bool, - is_jump: bool, - is_unary: bool, + check_second_encoding: bool, + consume_extra_bytes: int, + shift_rotate_flag: bool, } -// TODO: Maybe we can get rid of it since I don't have to specify the shift_offset, -// not like it changes a lot -reg_first_last := RegInfo{ in_first_byte = true, shift_offset = 0 } -reg_second_middle := RegInfo{ in_first_byte = false, shift_offset = 3 } -reg_first_middle := RegInfo{ in_first_byte = true, shift_offset = 3 } - -instructions := [?]InstructionInfo { - { opname = .MOV, desc = "Register/memory to/from register", mask = 0b11111100, encoding = 0b10001000, - reg_info = reg_second_middle, has_address = true, word_size = LastBit{}, has_flip = true }, - { opname = .MOV, desc = "Immediate to register/memory", mask = 0b11111110, encoding = 0b11000110, - has_data = true, has_address = true, word_size = LastBit{}, }, - { opname = .MOV, desc = "Immediate to register", mask = 0b11110000, encoding = 0b10110000, - reg_info = reg_first_last, has_data = true, word_size = FourthBit{} }, - { opname = .MOV, desc = "Memory to accumulator", mask = 0b11111110, encoding = 0b10100000, - has_flip = true, word_size = LastBit{}, uses_accumulator = true }, - { opname = .MOV, desc = "Accumulator to memory", mask = 0b11111110, encoding = 0b10100010, - has_flip = true, word_size = LastBit{}, uses_accumulator = true }, - { opname = .MOV, desc = "Register/memory to segment register", mask = 0b11111111, encoding = 0b10001110, - has_segreg = true, has_address = true, word_size = None{} }, - { opname = .MOV, desc = "Segment register to register/memory", mask = 0b11111111, encoding = 0b10001100, - has_segreg = true, has_address = true, word_size = None{} }, - { opname = .PUSH, desc = "", mask = 0b11111111, encoding = 0b11111111, - has_address = true, word_size = None{}, is_unary = true }, - { opname = .PUSH, desc = "", mask = 0b11111000, encoding = 0b01010000, - reg_info = reg_first_last, word_size = Force{}, is_unary = true }, - { opname = .PUSH, desc = "", mask = 0b11100111, encoding = 0b00000110, - has_segreg = true, reg_info = reg_first_middle, word_size = Force{}, is_unary = true }, - { opname = .POP, desc = "", mask = 0b11111111, encoding = 0b10001111, - has_address = true, word_size = None{}, is_unary = true }, - { opname = .POP, desc = "", mask = 0b11111000, encoding = 0b01011000, - reg_info = reg_first_last, word_size = Force{}, is_unary = true }, - { opname = .POP, desc = "", mask = 0b11100111, encoding = 0b00000111, - has_segreg = true, reg_info = reg_first_middle, word_size = None{}, is_unary = true }, - { opname = .XCHG, desc = "", mask = 0b11111110, encoding = 0b10000110, - reg_info = reg_second_middle, has_address = true, word_size = LastBit{}, has_flip = true}, - { opname = .XCHG, desc = "", mask = 0b11111000, encoding = 0b10010000, - reg_info = reg_first_last, uses_accumulator = true, word_size = Force{}, }, - { opname = .IN, desc = "", mask = 0b11111110, encoding = 0b11100100, - has_data = true, word_size = LastBit{}, }, - { opname = .IN, desc = "", mask = 0b11111110, encoding = 0b11101100, - word_size = LastBit{}, }, - { opname = .OUT, desc = "", mask = 0b11111110, encoding = 0b11100110, - has_data = true, word_size = FourthBit{}, }, - { opname = .OUT, desc = "", mask = 0b11111110, encoding = 0b11101110, - word_size = LastBit{}, }, - { opname = .XLAT, desc = "", mask = 0b11111111, encoding = 0b11010111, - }, - { opname = .LEA, desc = "", mask = 0b11111111, encoding = 0b10001101, - has_address = true }, - { opname = .LDS, desc = "", mask = 0b11111111, encoding = 0b11000101, - has_address = true }, - { opname = .LES, desc = "", mask = 0b11111111, encoding = 0b11000100, - has_address = true }, - { opname = .TBD, desc = "Reg/memory with register to either", mask = 0b11000100, encoding = 0b00000000, - opcode_id = .First, reg_info = reg_second_middle, has_address = true, word_size = LastBit{}, has_flip = true }, - { opname = .TBD, desc = "Immediate to register/memory", mask = 0b11111100, encoding = 0b10000000, - opcode_id = .Second, has_data = true, has_address = true, - word_size = LastBit{}, has_sign_extension = true }, - { opname = .TBD, desc = "Immediate to accumulator", mask = 0b11000100, encoding = 0b00000100, - word_size = LastBit{}, has_data = true }, - - { opname = .JE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110100, is_jump = true}, - { opname = .JZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110100, is_jump = true}, - - { opname = .JL, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111100, is_jump = true}, - { opname = .JNGE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111100, is_jump = true}, - - { opname = .JLE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111110, is_jump = true}, - { opname = .JNG, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111110, is_jump = true}, - - { opname = .JB, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110010, is_jump = true}, - { opname = .JNAE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110010, is_jump = true}, - - { opname = .JBE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110110, is_jump = true}, - { opname = .JNA, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110110, is_jump = true}, - - { opname = .JP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111010, is_jump = true}, - { opname = .JPE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111010, is_jump = true}, - - { opname = .JO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110000, is_jump = true}, - { opname = .JS, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111000, is_jump = true}, - - { opname = .JNE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110101, is_jump = true}, - { opname = .JNZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110101, is_jump = true}, - - { opname = .JNL, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111101, is_jump = true}, - { opname = .JGE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111101, is_jump = true}, - - { opname = .JNLE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111111, is_jump = true}, - { opname = .JG, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111111, is_jump = true}, - - { opname = .JNB, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110011, is_jump = true}, - { opname = .JAE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110011, is_jump = true}, - - { opname = .JNBE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110111, is_jump = true}, - { opname = .JA, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110111, is_jump = true}, - - { opname = .JNP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111011, is_jump = true}, - { opname = .JPO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111011, is_jump = true}, - - { opname = .JNO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110001, is_jump = true}, - - { opname = .JNS, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111001, is_jump = true}, - { opname = .LOOP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100010, is_jump = true}, - { opname = .LOOPZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100001, is_jump = true}, - { opname = .LOOPNZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100000, is_jump = true}, - { opname = .JCXZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100011, is_jump = true}, -} - -inst_map := make(map[u8]InstructionInfo) RIGHT_ALIGN_AMOUNT := 35 calculate_effective_address :: proc(r_m: u8) -> string { @@ -327,9 +198,11 @@ get_memory_string :: proc(memoryAddr: MemoryAddr) -> string { return text } -get_memory_type_string :: proc(mem_type: OperandType, is_word: bool) -> string { +get_operand_string :: proc(operand: Operand, is_word: bool) -> string { string_val: string - switch val in mem_type { + switch val in operand { + case None: + string_val = "" case RegisterId: string_val = is_word ? registers[val].fullname : registers[val].bytename case Immediate8: @@ -338,10 +211,16 @@ get_memory_type_string :: proc(mem_type: OperandType, is_word: bool) -> string { string_val = fmt.aprintf("%d", val) case MemoryAddr: string_val = get_memory_string(val) - case Accumulator: + case DirectAddress: string_val = fmt.aprintf("[%d]", val) case SegmentRegister: string_val = segment_registers[val].fullname + case Jump: + string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val) + case VariablePort: + string_val = variable_port.fullname + case Repeat: + string_val = (string)(val) } return string_val } @@ -380,9 +259,22 @@ get_displacement_string :: proc(displacement: Displacement) -> string { return disp } +get_repeat_op :: proc(data: u8) -> Repeat { + bits := (data & 0b1110) >> 1 + w := (data & 0b1) == 1 ? "w" : "b" + rep: string + switch bits { + case 0b010: rep = "movs" + case 0b011: rep = "cmps" + case 0b101: rep = "stos" + case 0b110: rep = "lods" + case 0b111: rep = "scas" + } + return Repeat(fmt.aprintf("%s%s", rep, w)) +} + try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) { for inst in instructions { - // fmt.print(inst.encoding, ",") if inst.encoding == (b & inst.mask) { return inst, true } @@ -390,19 +282,149 @@ try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) { return InstructionInfo{}, false } -get_opname :: proc(b: u8) -> string { +get_opname :: proc(opname: OpName, data: []u8) -> string { name: string - switch b & 0b00111000 >> 3 { - case 0b000: name = "add" - case 0b010: name = "adc" - case 0b101: name = "sub" - case 0b111: name = "cmp" + if opname == .TBD2 { + switch data[1] & 0b00111000 >> 3 { + case 0b000: name = "inc" + case 0b001: name = "dec" + case 0b010: name = "call" + case 0b011: name = "call" + case 0b100: name = "jmp" + case 0b101: name = "jmp" + case 0b110: name = "push" + } + } else if opname == .TBD5 { + switch data[1] & 0b00111000 >> 3 { + case 0b000: name = "test" + case 0b001: name = "dec" + case 0b010: name = "not" + case 0b011: name = "neg" + case 0b100: name = "mul" + case 0b101: name = "imul" + case 0b110: name = "div" + case 0b111: name = "idiv" + } + } else if opname == .TBD6 { + switch data[1] & 0b00111000 >> 3 { + case 0b000: name = "rol" + case 0b001: name = "ror" + case 0b010: name = "rcl" + case 0b011: name = "rcr" + case 0b100: name = "shl" + case 0b101: name = "shr" + case 0b111: name = "sar" + } + } else { + bits: u8 + if opname == .TBD1 || opname == .TBD3 { + bits = data[0] & 0b00111000 >> 3 + } else { + bits = data[1] & 0b00111000 >> 3 + } + switch bits { + case 0b000: name = "add" + case 0b001: name = "or" + case 0b010: name = "adc" + case 0b011: name = "sbb" + case 0b100: name = "and" + case 0b101: name = "sub" + case 0b110: name = "xor" + case 0b111: name = "cmp" + } } return name } +parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, processed: ^int, word: bool) -> Operand { + operand: Operand = None{} + switch opinfo { + case .None: + case .Register: + // rm: u8 = data[1] & 0b111 + // dst_opr = (RegisterId)(registers[rm].code) + reg: u8 + // Read the RegisterEncodingBits + switch inst.reg_info { + case .None: + // panic("Register is required but the encoded location is not provided") + case .FirstByteLast3: + reg = data[0] & 0b111 + case .FirstByteMiddle3: + reg = (data[0] >> 3) & 0b111 + case .SecondByteMiddle3: + reg = (data[1] >> 3) & 0b111 + case .SecondByteLast3: + reg = data[1] & 0b111 + } + operand = (RegisterId)(registers[reg].code) + case .SegmentRegister: + reg: u8 + switch inst.reg_info { + case .None: + // panic("Register is required but the encoded location is not provided") + case .FirstByteLast3: + reg = data[0] & 0b111 + case .FirstByteMiddle3: + reg = (data[0] >> 3) & 0b111 + case .SecondByteMiddle3: + reg = (data[1] >> 3) & 0b111 + case .SecondByteLast3: + reg = data[1] & 0b111 + } + operand = (SegmentRegister)(segment_registers[reg].code) + case .RegisterMemory: + mod := data[1] >> 6 + rm := data[1] & 0b111 + processed^ += 1 + op: Operand + if mod == 0 { + if rm == 0b110 { + op = (DirectAddress)(get_i16(data[2:])) + processed^ += 2 + } else { + op = MemoryAddr{ addr_id = rm , displacement = None{} } + } + } else if mod == 1 { + op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) } + processed^ += 1 + } else if mod == 2 { + op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) } + processed^ += 2 + } else if mod == 3 { + op = (RegisterId)(registers[rm].code) + } + operand = op + case .Immediate: + data_idx := processed^ + word_signed := word + if inst.has_sign_extension { + word_signed &&= data[0] & 0b0000_0010 == 0 + } + operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx])) + processed^ += word_signed ? 2 : 1 + case .Accumulator: + operand = (RegisterId)(registers[0].code) + case .DirectAddress: + operand = (DirectAddress)(get_i16(data[1:])) + processed^ += 2 + case .Jump: + processed^ += 1 + // NOTE: In order to mimic the label offset, you have to take the value you got and add two + operand = (Jump)((i8)(data[1]) + 2) + case .VariablePort: + operand = VariablePort{} + case .ShiftRotate: + v_flag := data[0] & 0b10 != 0 + operand = v_flag ? (RegisterId)(registers[1].code) : (Immediate8)(1) + case .Repeat: + operand = get_repeat_op(data[1]) + processed^ += 1 + } + return operand +} + main :: proc() { - // f,err := os.open(len(os.args) > 1 ? os.args[1] : "./asm_files/01-02-39.bin") f,err := os.open(os.args[1]) if err != os.ERROR_NONE { fmt.eprintln("ERROR:", err) @@ -417,10 +439,6 @@ main :: proc() { os.exit(1) } - for inst in instructions { - inst_map[inst.encoding] = inst - } - if false { os.exit(0) } @@ -433,8 +451,10 @@ main :: proc() { idx := 0 added_label := false line_count := 0 - // last_opname: string + has_lock: bool + has_segment: bool last_opname: [3]byte + repeating_op_count := 0 instruction_builder := strings.builder_make() instruction_list := make([dynamic]string, 512) fmt.println("bits 16") @@ -442,150 +462,121 @@ main :: proc() { processed := 1 curr_byte := data[idx] - instruction, ok := try_find_instruction(curr_byte) + inst, ok := try_find_instruction(curr_byte) if !ok { txt := "unknown instruction" - line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte) - instruction_list[line_count] = line - line_count += 1 + if print_at_end { + line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte) + instruction_list[line_count] = line + line_count += 1 + } else { + fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte) + } idx += 1 continue } - lhs2: OperandType - rhs2: OperandType - is_word: bool - is_immediate := false - flip_dst := false - has_memory_addr := false - has_immediate := false - rm: u8 - mod: u8 - reg: u8 + // Here we check if the instruction affects the next instruction + if inst.opname == .LOCK { + has_lock = true + idx += 1 + continue + } else if inst.opname == .SEGMENT { + has_segment = true + idx += 1 + continue + } + has_segment = false - if instruction.has_flip { - flip_dst = curr_byte & 2 != 0 + src_opr: Operand + dst_opr: Operand + + word: bool + flip: bool + op: Operand + + if inst.has_flip { + flip = curr_byte & 2 != 0 } - switch val in instruction.word_size { - case LastBit: is_word = curr_byte & 1 == 1 - case FourthBit: is_word = curr_byte & 0b0000_1000 != 0 - case Force: is_word = true - case None: + #partial switch inst.word_size { + case .LastBit: word = curr_byte & 1 == 1 + case .FourthBit: word = curr_byte & 0b0000_1000 != 0 + case .Always16: word = true } - if reg_info, ok := instruction.reg_info.(RegInfo); ok { - b := reg_info.in_first_byte ? data[idx] : data[idx+1] - reg = (b >> reg_info.shift_offset) & 0b111 - } - - - data_idx := idx + 1 - - if instruction.has_address { - mod = data[idx+1] >> 6 - rm = data[idx+1] & 0b00000111 - - data_idx += 1 + ((int)(mod) % 3) - processed += 1 + ((int)(mod) % 3) - - if mod == 0 { - if rm == 0b110 { - lhs2 = (Accumulator)(get_i16(data[idx+2:])) - processed += 2 - data_idx += 2 - } else { - lhs2 = MemoryAddr{ addr_id = rm , displacement = None{} } - } - // NOTE: This also works when it's an Accumulator apparently - has_memory_addr = true - } else if mod == 1 { - lhs2 = MemoryAddr{ addr_id = rm , displacement = (i8)(data[idx+2]) } - has_memory_addr = true - } else if mod == 2 { - lhs2 = MemoryAddr{ addr_id = rm , displacement = get_i16(data[idx+2:]) } - has_memory_addr = true - } else if mod == 3 { - lhs2 = (RegisterId)(registers[rm].code) - } - } else if instruction.has_segreg { - lhs2 = (SegmentRegister)(segment_registers[reg].code) - } else if instruction.uses_accumulator { - lhs2 = (RegisterId)(registers[0].code) - } else { - lhs2 = (RegisterId)(registers[reg].code) - } - if instruction.has_data { - word_signed := is_word - if instruction.has_sign_extension { - word_signed = is_word && curr_byte & 0b0000_0010 == 0 - } - processed += word_signed ? 2 : 1 - rhs2 = (OperandType)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx])) - has_immediate = true - } else if instruction.uses_accumulator { - if _, ok := instruction.word_size.(LastBit); ok { - processed += is_word ? 2 : 1 - rhs2 = (OperandType)(is_word ? (Accumulator)(get_i16(data[data_idx:])) : (Accumulator)(data[data_idx])) - } else { - rhs2 = (RegisterId)(reg) - } - } else { - rhs2 = (RegisterId)(reg) - } - - if flip_dst { - lhs2, rhs2 = rhs2, lhs2 - } - - lhs := get_memory_type_string(lhs2, is_word) - rhs := get_memory_type_string(rhs2, is_word) - size_string := has_immediate && has_memory_addr ? is_word ? "word " : "byte " : "" - full_inst: string opname: string - if instruction.opname == .TBD { - if instruction.opcode_id == .Second { - opname = strings.to_lower(fmt.aprintf("%s", get_opname(data[idx+1]))) - } else { - opname = strings.to_lower(fmt.aprintf("%s", get_opname(curr_byte))) + // TODO: Figure out a way to do this in the string builder + if inst.check_second_encoding { + opname = strings.to_lower(fmt.aprintf("%s", get_opname(inst.opname, data[idx:]))) + // NOTE: This is a special case because it matches the bit pattern of .TBD5, + // but the instruction itself is different + if opname == "not" { + inst = not_inst } } else { - opname = strings.to_lower(fmt.aprintf("%s", instruction.opname)) + opname = strings.to_lower(fmt.aprintf("%s", inst.opname)) } - if instruction.is_jump { - // NOTE: In order to mimic the label offset, you have to take the value you got and add two - value := (i8)(data[idx+1]) + 2 - full_inst = fmt.aprintf("%s $%s%d ; %d", strings.to_lower(opname), value >= 0 ? "+" : "", value, value - 2) - processed += 1 - } else if instruction.is_unary { - if instruction.has_address { - size_string = "word " + + dst_opr = parse_operand(inst, inst.dst, data[idx:], &processed, word) + src_opr = parse_operand(inst, inst.src, data[idx:], &processed, word) + + // TODO: This is ugly as hell + _,ok_1 := src_opr.(Immediate8) + _,ok_2 := src_opr.(Immediate16) + _,ok_3 := dst_opr.(MemoryAddr); + _,ok_4 := dst_opr.(DirectAddress); + shiftrot := inst.src == .ShiftRotate + size_string := "" + if ((ok_1 || ok_2) && (ok_3 || ok_4)) || ((ok_3 || ok_4) && shiftrot) { + size_string = word ? "word " : "byte " + } + + if flip { + src_opr, dst_opr = dst_opr, src_opr + } + + dst_str := get_operand_string(dst_opr, word) + src_str := get_operand_string(src_opr, word) + full_inst: string + if dst_str == "" { + _,ok_1 := src_opr.(MemoryAddr); + _,ok_2 := src_opr.(DirectAddress); + if (ok_1 || ok_2) && inst.word_size != .Always16 { + size_string = word ? "word " : "byte " } - full_inst = fmt.aprintf("%s %s%s", opname, size_string, lhs) + full_inst = fmt.aprintf("%s %s%s", opname, size_string, src_str) } else { - opname = strings.to_lower(opname) + // NOTE: I don't know why this is the case, but only the move has the word/byte + // keyword next to the immediate, but other instructions have it on the memory address if opname == "mov" { - full_inst = fmt.aprintf("%s %s, %s%s", opname, lhs, size_string, rhs) + full_inst = fmt.aprintf("%s %s, %s%s", opname, dst_str, size_string, src_str) } else { - full_inst = fmt.aprintf("%s %s%s, %s", opname, size_string, lhs, rhs) + full_inst = fmt.aprintf("%s %s%s, %s", opname, size_string, dst_str, src_str) } } + + processed += inst.consume_extra_bytes + + // fmt.sbprintf(&instruction_builder, "%s%s%s %*[2]s", lock_string, seg_string, full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;") fmt.sbprintf(&instruction_builder, "%s %*[1]s", full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;") for i in 0.. 1 { fmt.println() } - copy(last_opname[:], op[0:3]) - fmt.println(op) + repeating_op_count = 0 + } else { + repeating_op_count += 1 } + copy(last_opname[:], op2[0:3]) + fmt.println(op2) + idx += processed - line_count += 1 strings.builder_reset(&instruction_builder) } if print_at_end { diff --git a/instructions.odin b/instructions.odin new file mode 100644 index 0000000..762134b --- /dev/null +++ b/instructions.odin @@ -0,0 +1,277 @@ +package decoder_8086 + +OpName :: enum { + TBD1, + TBD2, + TBD3, + TBD4, + TBD5, + TBD6, + MOV, + PUSH, + POP, + XCHG, + IN, + OUT, + XLAT, + LEA, + LDS, + LES, + LAHF, + SAHF, + PUSHF, + POPF, + ADD, + ADC, + INC, + AAA, + DAA, + SUB, + SBB, + DEC, + NEG, + CMP, + AAS, + DAS, + AAM, + DIV, + IDIV, + AAD, + CBW, + CWD, + NOT, + TEST, + REP, + RET, + INT, + INT3, + INTO, + IRET, + CLC, + CMC, + STC, + CLD, + STD, + CLI, + STI, + HLT, + WAIT, + ESC, + LOCK, + SEGMENT, + JMP, + JNZ, + JNGE, + JE, + JZ, + JL, + JLE, + JNG, + JB, + JNAE, + JP, + JPE, + JNA, + JBE, + JO, + JS, + JNE, + JNL, + JGE, + JNLE, + JG, + JNB, + JAE, + JNBE, + JA, + JNP, + JPO, + JNO, + JNS, + LOOP, + LOOPZ, + LOOPNZ, + JCXZ, +} + +not_inst := InstructionInfo { + opname = .NOT, desc = "", mask = 0b11111110, encoding = 0b11110110, + src = .RegisterMemory, word_size = .LastBit +} + + +instructions := [?]InstructionInfo { + { opname = .TBD1, desc = "Immediate to accumulator", + mask = 0b11000110, encoding = 0b00000100, check_second_encoding = true, + dst = .Accumulator, src = .Immediate, + word_size = .LastBit, }, + { opname = .TBD2, desc = "", check_second_encoding = true, + mask = 0b11111110, encoding = 0b11111110, + src = .RegisterMemory, + word_size = .LastBit, }, + { opname = .TBD3, desc = "", check_second_encoding = true, + mask = 0b11000100, encoding = 0b00000000, + dst = .RegisterMemory, src = .Register, + word_size = .LastBit, reg_info = .SecondByteMiddle3, has_flip = true }, + { opname = .TBD4, desc = "", check_second_encoding = true, + mask = 0b11111100, encoding = 0b10000000, + dst = .RegisterMemory, src = .Immediate, + word_size = .LastBit, has_sign_extension = true }, + { opname = .TBD5, desc = "", check_second_encoding = true, + mask = 0b11111110, encoding = 0b11110110, + dst = .Immediate, src = .RegisterMemory, word_size = .LastBit, }, + { opname = .TBD6, desc = "", check_second_encoding = true, + mask = 0b11111100, encoding = 0b11010000, + dst = .RegisterMemory, src = .ShiftRotate, word_size = .LastBit, }, + { opname = .MOV, desc = "Register/memory to/from register", + mask = 0b11111100, encoding = 0b10001000, + dst = .RegisterMemory, src = .Register, + word_size = .LastBit, reg_info = .SecondByteMiddle3, has_flip = true }, + { opname = .MOV, desc = "Immediate to register/memory", + mask = 0b11111110, encoding = 0b11000110, + dst = .RegisterMemory, src = .Immediate, + word_size = .LastBit, }, + { opname = .MOV, desc = "Immediate to register", + mask = 0b11110000, encoding = 0b10110000, + dst = .Register, src = .Immediate, + word_size = .FourthBit, reg_info = .FirstByteLast3 }, + { opname = .MOV, desc = "Memory to accumulator", + mask = 0b11111110, encoding = 0b10100000, + dst = .Accumulator, src = .DirectAddress, + word_size = .LastBit, }, + { opname = .MOV, desc = "Accumulator to memory", + mask = 0b11111110, encoding = 0b10100010, + dst = .DirectAddress, src = .Accumulator, + word_size = .LastBit, }, + { opname = .PUSH, desc = "", mask = 0b11111000, encoding = 0b01010000, + src = .Register, reg_info = .FirstByteLast3, + word_size = .Always16, }, + { opname = .PUSH, desc = "", mask = 0b11100111, encoding = 0b00000110, + src = .SegmentRegister, reg_info = .FirstByteMiddle3, + word_size = .Always16, }, + { opname = .POP, desc = "", mask = 0b11111111, encoding = 0b10001111, + src = .RegisterMemory,}, + { opname = .POP, desc = "", mask = 0b11111000, encoding = 0b01011000, + src = .Register, reg_info = .FirstByteLast3, + word_size = .Always16, }, + { opname = .POP, desc = "", mask = 0b11100111, encoding = 0b00000111, + src = .SegmentRegister, reg_info = .FirstByteMiddle3, + word_size = .Always16, }, + { opname = .XCHG, desc = "", mask = 0b11111110, encoding = 0b10000110, + dst = .RegisterMemory, src = .Register, + reg_info = .SecondByteMiddle3, has_flip = true }, + { opname = .XCHG, desc = "", mask = 0b11111000, encoding = 0b10010000, + dst = .Accumulator, src = .Register, + reg_info = .FirstByteLast3, has_flip = true, word_size = .Always16 }, + { opname = .IN, desc = "", mask = 0b11111110, encoding = 0b11100100, + dst = .Accumulator, src = .Immediate, + // TODO: Everything works just fine, but the problem here is that if you want it to + // show up as an unsigned int, then we have to change the types because the number + // 200, for instance, will show up as a negative, we would have to create an unsigned + // variant of the Immediate value. Maybe we can have the value and the sign as a struct + word_size = .Unsigned8, }, + { opname = .IN, desc = "", mask = 0b11111110, encoding = 0b11101100, + dst = .Accumulator, src = .VariablePort, + word_size = .LastBit, }, + { opname = .OUT, desc = "", mask = 0b11111110, encoding = 0b11100110, + dst = .Immediate, src = .Accumulator, + word_size = .Unsigned8, }, + { opname = .OUT, desc = "", mask = 0b11111110, encoding = 0b11101110, + dst = .VariablePort, src = .Accumulator, + word_size = .LastBit, }, + { opname = .XLAT, desc = "", mask = 0b11111111, encoding = 0b11010111,}, + { opname = .LEA, desc = "", mask = 0b11111111, encoding = 0b10001101, + dst = .Register, src = .RegisterMemory, + reg_info = .SecondByteMiddle3, word_size = .Always16 }, + { opname = .LDS, desc = "", mask = 0b11111111, encoding = 0b11000101, + dst = .Register, src = .RegisterMemory, + reg_info = .SecondByteMiddle3, word_size = .Always16 }, + { opname = .LES, desc = "", mask = 0b11111111, encoding = 0b11000100, + dst = .Register, src = .RegisterMemory, + reg_info = .SecondByteMiddle3, word_size = .Always16 }, + { opname = .LAHF, desc = "", mask = 0b11111111, encoding = 0b10011111,}, + { opname = .SAHF, desc = "", mask = 0b11111111, encoding = 0b10011110,}, + { opname = .PUSHF, desc = "", mask = 0b11111111, encoding = 0b10011100,}, + { opname = .POPF, desc = "", mask = 0b11111111, encoding = 0b10011101,}, + { opname = .INC, desc = "", mask = 0b11111000, encoding = 0b01000000, + src = .Register, reg_info = .FirstByteLast3, word_size = .Always16 }, + { opname = .AAA, desc = "", mask = 0b11111111, encoding = 0b00110111,}, + { opname = .DAA, desc = "", mask = 0b11111111, encoding = 0b00100111,}, + { opname = .DEC, desc = "", mask = 0b11111000, encoding = 0b01001000, + src = .Register, reg_info = .FirstByteLast3, word_size = .Always16 }, + { opname = .AAS, desc = "", mask = 0b11111111, encoding = 0b00111111,}, + { opname = .DAS, desc = "", mask = 0b11111111, encoding = 0b00101111,}, + { opname = .AAM, desc = "", mask = 0b11111111, encoding = 0b11010100,}, + { opname = .AAD, desc = "", mask = 0b11111111, encoding = 0b11010101, consume_extra_bytes = 1 }, + { opname = .CBW, desc = "", mask = 0b11111111, encoding = 0b10011000,}, + { opname = .CWD, desc = "", mask = 0b11111111, encoding = 0b10011001,}, + { opname = .TEST, desc = "", mask = 0b11111100, encoding = 0b10000100, + dst = .RegisterMemory, src = .Register, + word_size = .LastBit, reg_info = .SecondByteMiddle3, has_flip = true }, + { opname = .REP, desc = "", mask = 0b11111110, encoding = 0b11110010, src = .Repeat }, + { opname = .RET, desc = "", mask = 0b11111111, encoding = 0b11000011,}, + { opname = .RET, src = .Immediate, word_size = .Always16, + desc = "", mask = 0b11111111, encoding = 0b11000010,}, + { opname = .INT, src = .Immediate, desc = "", mask = 0b11111111, encoding = 0b11001101,}, + { opname = .INT3, desc = "", mask = 0b11111111, encoding = 0b11001100,}, + { opname = .INTO, desc = "", mask = 0b11111111, encoding = 0b11001110,}, + { opname = .IRET, desc = "", mask = 0b11111111, encoding = 0b11001111,}, + { opname = .CLC, desc = "", mask = 0b11111111, encoding = 0b11111000,}, + { opname = .CMC, desc = "", mask = 0b11111111, encoding = 0b11110101,}, + { opname = .STC, desc = "", mask = 0b11111111, encoding = 0b11111001,}, + { opname = .CLD, desc = "", mask = 0b11111111, encoding = 0b11111100,}, + { opname = .STD, desc = "", mask = 0b11111111, encoding = 0b11111101,}, + { opname = .CLI, desc = "", mask = 0b11111111, encoding = 0b11111010,}, + { opname = .STI, desc = "", mask = 0b11111111, encoding = 0b11111011,}, + { opname = .HLT, desc = "", mask = 0b11111111, encoding = 0b11110100,}, + { opname = .WAIT, desc = "", mask = 0b11111111, encoding = 0b10011011,}, + // { opname = .ESC, desc = "", mask = 0b11111111, encoding = 0b11111000, dst = }, + { opname = .LOCK, desc = "", mask = 0b11111111, encoding = 0b11110000,}, + { opname = .SEGMENT, desc = "", mask = 0b11100111, encoding = 0b00100110,}, + { opname = .JE, mask = 0b11111111, encoding = 0b01110100, src = .Jump, desc = "Jump on not zero", }, + { opname = .JZ, mask = 0b11111111, encoding = 0b01110100, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JL, mask = 0b11111111, encoding = 0b01111100, src = .Jump, desc = "Jump on not zero", }, + { opname = .JNGE, mask = 0b11111111, encoding = 0b01111100, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JLE, mask = 0b11111111, encoding = 0b01111110, src = .Jump, desc = "Jump on not zero", }, + { opname = .JNG, mask = 0b11111111, encoding = 0b01111110, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JB, mask = 0b11111111, encoding = 0b01110010, src = .Jump, desc = "Jump on not zero", }, + { opname = .JNAE, mask = 0b11111111, encoding = 0b01110010, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JBE, mask = 0b11111111, encoding = 0b01110110, src = .Jump, desc = "Jump on not zero", }, + { opname = .JNA, mask = 0b11111111, encoding = 0b01110110, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JP, mask = 0b11111111, encoding = 0b01111010, src = .Jump, desc = "Jump on not zero", }, + { opname = .JPE, mask = 0b11111111, encoding = 0b01111010, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JO, mask = 0b11111111, encoding = 0b01110000, src = .Jump, desc = "Jump on not zero", }, + { opname = .JS, mask = 0b11111111, encoding = 0b01111000, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JNE, mask = 0b11111111, encoding = 0b01110101, src = .Jump, desc = "Jump on not zero", }, + { opname = .JNZ, mask = 0b11111111, encoding = 0b01110101, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JNL, mask = 0b11111111, encoding = 0b01111101, src = .Jump, desc = "Jump on not zero", }, + { opname = .JGE, mask = 0b11111111, encoding = 0b01111101, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JNLE, mask = 0b11111111, encoding = 0b01111111, src = .Jump, desc = "Jump on not zero", }, + { opname = .JG, mask = 0b11111111, encoding = 0b01111111, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JNB, mask = 0b11111111, encoding = 0b01110011, src = .Jump, desc = "Jump on not zero", }, + { opname = .JAE, mask = 0b11111111, encoding = 0b01110011, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JNBE, mask = 0b11111111, encoding = 0b01110111, src = .Jump, desc = "Jump on not zero", }, + { opname = .JA, mask = 0b11111111, encoding = 0b01110111, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JNP, mask = 0b11111111, encoding = 0b01111011, src = .Jump, desc = "Jump on not zero", }, + { opname = .JPO, mask = 0b11111111, encoding = 0b01111011, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JNO, mask = 0b11111111, encoding = 0b01110001, src = .Jump, desc = "Jump on not zero", }, + + { opname = .JNS, mask = 0b11111111, encoding = 0b01111001, src = .Jump, desc = "Jump on not zero", }, + { opname = .LOOP, mask = 0b11111111, encoding = 0b11100010, src = .Jump, desc = "Jump on not zero", }, + { opname = .LOOPZ, mask = 0b11111111, encoding = 0b11100001, src = .Jump, desc = "Jump on not zero", }, + { opname = .LOOPNZ, mask = 0b11111111, encoding = 0b11100000, src = .Jump, desc = "Jump on not zero", }, + { opname = .JCXZ, mask = 0b11111111, encoding = 0b11100011, src = .Jump, desc = "Jump on not zero", }, +}