Convert TBD instruction op names to the actual enum variants, rather than strings

2025-03-19 21:09:42 +07:00 · 2025-03-19 21:09:42 +07:00 · 863ccfc583
commit 863ccfc583
parent fd8f696627
4 changed files with 76 additions and 10 deletions
--- a/decoding.odin
+++ b/decoding.odin
@ -4,6 +4,64 @@ import "core:fmt"
 import "core:math"
 import "core:strings"

+get_op :: proc(inst: Instruction) -> (Op, bool) {
+    op: Op
+    interseg: bool
+    if inst.opname == .TBD2 {
+        switch inst.raw_data[1] & 0b00111000 >> 3 {
+        case 0b000: op = .INC
+        case 0b001: op = .DEC
+        case 0b010: op = .CALL
+        // TODO: We really have to fix this because we shouldn't be figuring out if this
+        // is an intersegment here
+        case 0b011: op = .CALL; interseg = true
+        case 0b100: op = .JMP
+        case 0b101: op = .JMP; interseg = true
+        case 0b110: op = .PUSH
+        }
+    } else if inst.opname == .TBD5 {
+        switch inst.raw_data[1] & 0b00111000 >> 3 {
+        case 0b000: op = .TEST
+        case 0b001: op = .DEC
+        case 0b010: op = .NOT
+        case 0b011: op = .NEG
+        case 0b100: op = .MUL
+        case 0b101: op = .IMUL
+        case 0b110: op = .DIV
+        case 0b111: op = .IDIV
+        }
+    } else if inst.opname == .TBD6 {
+        switch inst.raw_data[1] & 0b00111000 >> 3 {
+        case 0b000: op = .ROL
+        case 0b001: op = .ROR
+        case 0b010: op = .RCL
+        case 0b011: op = .RCR
+        case 0b100: op = .SHL
+        case 0b101: op = .SHR
+        case 0b111: op = .SAR
+        }
+    } else if inst.opname == .TBD1 || inst.opname == .TBD3 || inst.opname == .TBD4 {
+        bits: u8
+        if inst.opname == .TBD1 || inst.opname == .TBD3 {
+            bits = inst.raw_data[0] & 0b00111000 >> 3
+        } else {
+            bits = inst.raw_data[1] & 0b00111000 >> 3
+        }
+        switch bits {
+        case 0b000: op = .ADD
+        case 0b001: op = .OR
+        case 0b010: op = .ADC
+        case 0b011: op = .SBB
+        case 0b100: op = .AND
+        case 0b101: op = .SUB
+        case 0b110: op = .XOR
+        case 0b111: op = .CMP
+        }
+    } else {
+        op = inst.opname
+    }
+    return op, interseg
+}
 parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, processed: ^int, word: bool, has_segreg: Maybe(Register)) -> Operand {
    operand: Operand = None{}
    switch opinfo {
@ -163,7 +221,6 @@ decode_data :: proc(inst_list: ^[dynamic]Instruction, data: []u8, bytes_to_read:

        processed += inst.consume_extra_bytes

-        instruction.opname = inst.opname
        instruction.src = src_opr
        instruction.dst = dst_opr
        instruction.is_word = word
@ -173,6 +230,8 @@ decode_data :: proc(inst_list: ^[dynamic]Instruction, data: []u8, bytes_to_read:
        instruction.info = inst
        instruction.has_lock = has_lock
        instruction.has_segment = has_segment
+        instruction.opname = inst.opname
+        instruction.opname,instruction.indirect_intersegment = get_op(instruction)

        // fmt.println(parsed_inst)
        append(inst_list, instruction)
--- a/instructions.odin
+++ b/instructions.odin
@ -31,6 +31,8 @@ Op :: enum {
    SBB,
    DEC,
    NEG,
+    MUL,
+    IMUL,
    CMP,
    AAS,
    DAS,
@ -40,8 +42,18 @@ Op :: enum {
    AAD,
    CBW,
    CWD,
+    ROL,
+    ROR,
+    RCL,
+    RCR,
+    SHL,
+    SHR,
+    SAR,
    NOT,
    TEST,
+    OR,
+    AND,
+    XOR,
    REP,
    RET,
    RETF,
--- a/printing.odin
+++ b/printing.odin
@ -194,16 +194,12 @@ get_instruction_string :: proc(inst_info: InstructionInfo, instruction: Instruct
    src_str := get_operand_string(inst.src, inst.has_segment)
    opname: string
    is_interseg: bool
-    if inst_info.check_second_encoding {
-        opname,is_interseg = get_opname(inst)
-    } else {
-        // TODO: Do the RTTI thing here with reflection
-        opname = strings.to_lower(fmt.aprintf("%s", inst.opname))
-    }
+    // TODO: Do the RTTI thing here with reflection
+    opname = strings.to_lower(fmt.aprintf("%s", inst.opname))

    if dst_str == "" {
        interseg_string: string
-        if is_interseg {
+        if instruction.indirect_intersegment {
            interseg_string = " far"
        }
        fmt.sbprintf(&instruction_builder, "%s%s %s%s", opname, interseg_string, size_string, src_str)
--- a/types.odin
+++ b/types.odin
@ -20,7 +20,6 @@ WordSize :: enum {
 }

 None :: struct {}
-
 Disp8 :: i8
 Disp16 :: i16
 Displacement :: union {
@ -118,7 +117,7 @@ Instruction :: struct {
    dst: Operand,
    info: InstructionInfo,
    is_word: bool,
-    // indirect_intersegment: bool,
+    indirect_intersegment: bool,
    // TODO: This is trickier than I thought, it's more than just the one instruction
    // that uses it
    has_segment: Maybe(Register),