performance-aware/decoder8086.odin

578 lines
19 KiB
Odin

package decoder_8086
import "core:os"
import "core:fmt"
import "core:math"
import "core:strings"
Register :: struct {
fullname: string,
bytename: string,
value: struct #raw_union {
using _: struct {
low, high: byte,
},
full: u16,
},
code: u8,
}
OpName :: enum {
TBD,
MOV,
PUSH,
POP,
XCHG,
ADD,
SUB,
CMP,
JMP,
JNZ,
JNGE,
JE,
JZ,
JL,
JLE,
JNG,
JB,
JNAE,
JP,
JPE,
JNA,
JBE,
JO,
JS,
JNE,
JNL,
JGE,
JNLE,
JG,
JNB,
JAE,
JNBE,
JA,
JNP,
JPO,
JNO,
JNS,
LOOP,
LOOPZ,
LOOPNZ,
JCXZ,
}
registers := [8]Register {
{fullname = "ax", bytename = "al", code = 0b000},
{fullname = "cx", bytename = "cl", code = 0b001},
{fullname = "dx", bytename = "dl", code = 0b010},
{fullname = "bx", bytename = "bl", code = 0b011},
{fullname = "sp", bytename = "ah", code = 0b100},
{fullname = "bp", bytename = "ch", code = 0b101},
{fullname = "si", bytename = "dh", code = 0b110},
{fullname = "di", bytename = "bh", code = 0b111},
}
segment_registers := [4]Register {
{fullname = "es", code = 0b000},
{fullname = "cs", code = 0b001},
{fullname = "ss", code = 0b010},
{fullname = "ds", code = 0b011},
}
RegInfo :: struct {
in_first_byte: bool,
shift_offset: u8,
}
OpCodeId :: enum {
None,
First,
Second,
}
LastBit :: struct{}
FourthBit :: struct{}
Force :: struct{}
WordSize :: union {
None,
LastBit,
FourthBit,
Force,
}
None :: struct {}
Disp8 :: i8
Disp16 :: i16
Displacement :: union {
None,
Disp8,
Disp16
}
Value8 :: i8
Value16 :: i16
Data :: union {
None,
Value8,
Value16
}
ModMemory :: struct {}
Mod8BitDisp :: i8
Mod16BitDisp :: i16
ModRegister :: struct {}
ModMode :: union {
ModMemory,
Mod8BitDisp,
Mod16BitDisp,
ModRegister,
}
RegisterId :: distinct u8
Immediate8 :: distinct i8
Immediate16 :: distinct i16
MemoryAddr :: struct {
addr_id: u8,
displacement: Displacement
}
Accumulator :: distinct i16
SegmentRegister :: distinct i8
OperandType :: union {
RegisterId,
Immediate8,
Immediate16,
MemoryAddr,
Accumulator,
SegmentRegister,
}
InstructionInfo :: struct {
mask: u8,
encoding: u8,
opname: OpName,
desc: string,
opcode_id: OpCodeId,
word_size: WordSize,
reg_info: Maybe(RegInfo),
has_data: bool,
has_address: bool,
uses_accumulator: bool,
has_segreg: bool,
has_flip: bool,
has_sign_extension: bool,
is_jump: bool,
is_unary: bool,
}
// TODO: Maybe we can get rid of it since I don't have to specify the shift_offset,
// not like it changes a lot
reg_first_last := RegInfo{ in_first_byte = true, shift_offset = 0 }
reg_second_middle := RegInfo{ in_first_byte = false, shift_offset = 3 }
reg_first_middle := RegInfo{ in_first_byte = true, shift_offset = 3 }
instructions := [?]InstructionInfo {
{ opname = .MOV, desc = "Register/memory to/from register", mask = 0b11111100, encoding = 0b10001000,
reg_info = reg_second_middle, has_address = true, word_size = LastBit{}, has_flip = true },
{ opname = .MOV, desc = "Immediate to register/memory", mask = 0b11111110, encoding = 0b11000110,
has_data = true, has_address = true, word_size = LastBit{}, },
{ opname = .MOV, desc = "Immediate to register", mask = 0b11110000, encoding = 0b10110000,
reg_info = reg_first_last, has_data = true, word_size = FourthBit{} },
{ opname = .MOV, desc = "Memory to accumulator", mask = 0b11111110, encoding = 0b10100000,
has_flip = true, word_size = LastBit{}, uses_accumulator = true },
{ opname = .MOV, desc = "Accumulator to memory", mask = 0b11111110, encoding = 0b10100010,
has_flip = true, word_size = LastBit{}, uses_accumulator = true },
{ opname = .MOV, desc = "Register/memory to segment register", mask = 0b11111111, encoding = 0b10001110,
has_segreg = true, has_address = true, word_size = None{} },
{ opname = .MOV, desc = "Segment register to register/memory", mask = 0b11111111, encoding = 0b10001100,
has_segreg = true, has_address = true, word_size = None{} },
{ opname = .PUSH, desc = "", mask = 0b11111111, encoding = 0b11111111,
has_address = true, word_size = None{}, is_unary = true },
{ opname = .PUSH, desc = "", mask = 0b11111000, encoding = 0b01010000,
reg_info = reg_first_last, word_size = Force{}, is_unary = true },
{ opname = .PUSH, desc = "", mask = 0b11100111, encoding = 0b00000110,
has_segreg = true, reg_info = reg_first_middle, word_size = Force{}, is_unary = true },
{ opname = .POP, desc = "", mask = 0b11111111, encoding = 0b10001111,
has_address = true, word_size = None{}, is_unary = true },
{ opname = .POP, desc = "", mask = 0b11111000, encoding = 0b01011000,
reg_info = reg_first_last, word_size = Force{}, is_unary = true },
{ opname = .POP, desc = "", mask = 0b11100111, encoding = 0b00000111,
has_segreg = true, reg_info = reg_first_middle, word_size = None{}, is_unary = true },
{ opname = .XCHG, desc = "", mask = 0b11111110, encoding = 0b10000110,
reg_info = reg_second_middle, has_address = true, word_size = LastBit{}, has_flip = true},
{ opname = .XCHG, desc = "", mask = 0b11111000, encoding = 0b10010000,
reg_info = reg_first_last, uses_accumulator = true, word_size = Force{}, },
{ opname = .TBD, desc = "Reg/memory with register to either", mask = 0b11000100, encoding = 0b00000000,
opcode_id = .First, reg_info = reg_second_middle, has_address = true, word_size = LastBit{}, has_flip = true },
{ opname = .TBD, desc = "Immediate to register/memory", mask = 0b11111100, encoding = 0b10000000,
opcode_id = .Second, has_data = true, has_address = true,
word_size = LastBit{}, has_sign_extension = true },
{ opname = .TBD, desc = "Immediate to accumulator", mask = 0b11000100, encoding = 0b00000100,
word_size = LastBit{}, has_data = true },
{ opname = .JE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110100, is_jump = true},
{ opname = .JZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110100, is_jump = true},
{ opname = .JL, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111100, is_jump = true},
{ opname = .JNGE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111100, is_jump = true},
{ opname = .JLE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111110, is_jump = true},
{ opname = .JNG, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111110, is_jump = true},
{ opname = .JB, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110010, is_jump = true},
{ opname = .JNAE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110010, is_jump = true},
{ opname = .JBE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110110, is_jump = true},
{ opname = .JNA, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110110, is_jump = true},
{ opname = .JP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111010, is_jump = true},
{ opname = .JPE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111010, is_jump = true},
{ opname = .JO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110000, is_jump = true},
{ opname = .JS, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111000, is_jump = true},
{ opname = .JNE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110101, is_jump = true},
{ opname = .JNZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110101, is_jump = true},
{ opname = .JNL, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111101, is_jump = true},
{ opname = .JGE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111101, is_jump = true},
{ opname = .JNLE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111111, is_jump = true},
{ opname = .JG, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111111, is_jump = true},
{ opname = .JNB, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110011, is_jump = true},
{ opname = .JAE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110011, is_jump = true},
{ opname = .JNBE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110111, is_jump = true},
{ opname = .JA, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110111, is_jump = true},
{ opname = .JNP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111011, is_jump = true},
{ opname = .JPO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111011, is_jump = true},
{ opname = .JNO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110001, is_jump = true},
{ opname = .JNS, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111001, is_jump = true},
{ opname = .LOOP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100010, is_jump = true},
{ opname = .LOOPZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100001, is_jump = true},
{ opname = .LOOPNZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100000, is_jump = true},
{ opname = .JCXZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100011, is_jump = true},
}
inst_map := make(map[u8]InstructionInfo)
RIGHT_ALIGN_AMOUNT := 35
calculate_effective_address :: proc(r_m: u8) -> string {
val: string
switch r_m {
case 0b000:
val = "bx + si"
case 0b001:
val = "bx + di"
case 0b010:
val = "bp + si"
case 0b011:
val = "bp + di"
case 0b100:
val = "si"
case 0b101:
val = "di"
case 0b110:
val = "bp"
case 0b111:
val = "bx"
}
return val
}
get_memory_string :: proc(memoryAddr: MemoryAddr) -> string {
disp: string
switch value in memoryAddr.displacement {
case None:
disp = ""
case Disp8:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
case Disp16:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
}
text := fmt.aprintf("[%s%s]", calculate_effective_address(memoryAddr.addr_id), disp)
return text
}
get_memory_type_string :: proc(mem_type: OperandType, is_word: bool) -> string {
string_val: string
switch val in mem_type {
case RegisterId:
string_val = is_word ? registers[val].fullname : registers[val].bytename
case Immediate8:
string_val = fmt.aprintf("%d", val)
case Immediate16:
string_val = fmt.aprintf("%d", val)
case MemoryAddr:
string_val = get_memory_string(val)
case Accumulator:
string_val = fmt.aprintf("[%d]", val)
case SegmentRegister:
string_val = segment_registers[val].fullname
}
return string_val
}
get_i16 :: proc(data: []u8) -> i16 {
return (i16)(data[1]) << 8 | (i16)(data[0])
}
parse_displacement :: proc(data: []u8) -> (displacement: Displacement, disp_amount: int) {
mod := (data[0] & 0b11000000) >> 6
disp: Displacement = None{}
amount: int
switch mod {
case 1:
disp = (i8)(data[1])
amount = 1
case 2:
disp = get_i16(data[1:])
amount = 2
}
return disp, amount
}
get_displacement_string :: proc(displacement: Displacement) -> string {
disp := ""
#partial switch value in displacement {
case i8:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
case i16:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
}
return disp
}
try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) {
for inst in instructions {
// fmt.print(inst.encoding, ",")
if inst.encoding == (b & inst.mask) {
return inst, true
}
}
return InstructionInfo{}, false
}
get_opname :: proc(b: u8) -> string {
name: string
switch b & 0b00111000 >> 3 {
case 0b000: name = "add"
case 0b101: name = "sub"
case 0b111: name = "cmp"
}
return name
}
main :: proc() {
// f,err := os.open(len(os.args) > 1 ? os.args[1] : "./asm_files/01-02-39.bin")
f,err := os.open(os.args[1])
if err != os.ERROR_NONE {
fmt.eprintln("ERROR:", err)
os.exit(1)
}
defer os.close(f)
data := make([]u8, 1024)
bytes_read, err2 := os.read(f, data)
if err2 != nil {
// ...
os.exit(1)
}
for inst in instructions {
inst_map[inst.encoding] = inst
}
if false {
os.exit(0)
}
// asdf :u16 = 0b00000011_11101000
// asdf2 :i16 = (i16)(asdf)
// fmt.printfln("%d", asdf2)
print_at_end := false
read_next := false
src_dst := true
idx := 0
added_label := false
line_count := 0
// last_opname: string
last_opname: [3]byte
instruction_builder := strings.builder_make()
instruction_list := make([dynamic]string, 512)
fmt.println("bits 16")
for idx < bytes_read {
processed := 1
curr_byte := data[idx]
instruction, ok := try_find_instruction(curr_byte)
if !ok {
txt := "unknown instruction"
line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
instruction_list[line_count] = line
line_count += 1
idx += 1
continue
}
lhs2: OperandType
rhs2: OperandType
is_word: bool
is_immediate := false
flip_dst := false
has_memory_addr := false
has_immediate := false
rm: u8
mod: u8
reg: u8
if instruction.has_flip {
flip_dst = curr_byte & 2 != 0
}
switch val in instruction.word_size {
case LastBit: is_word = curr_byte & 1 == 1
case FourthBit: is_word = curr_byte & 0b0000_1000 != 0
case Force: is_word = true
case None:
}
if reg_info, ok := instruction.reg_info.(RegInfo); ok {
b := reg_info.in_first_byte ? data[idx] : data[idx+1]
reg = (b >> reg_info.shift_offset) & 0b111
}
data_idx := idx + 1
if instruction.has_address {
mod = data[idx+1] >> 6
rm = data[idx+1] & 0b00000111
data_idx += 1 + ((int)(mod) % 3)
processed += 1 + ((int)(mod) % 3)
if mod == 0 {
if rm == 0b110 {
lhs2 = (Accumulator)(get_i16(data[idx+2:]))
processed += 2
data_idx += 2
} else {
lhs2 = MemoryAddr{ addr_id = rm , displacement = None{} }
}
// NOTE: This also works when it's an Accumulator apparently
has_memory_addr = true
} else if mod == 1 {
lhs2 = MemoryAddr{ addr_id = rm , displacement = (i8)(data[idx+2]) }
has_memory_addr = true
} else if mod == 2 {
lhs2 = MemoryAddr{ addr_id = rm , displacement = get_i16(data[idx+2:]) }
has_memory_addr = true
} else if mod == 3 {
lhs2 = (RegisterId)(registers[rm].code)
}
} else if instruction.has_segreg {
lhs2 = (SegmentRegister)(segment_registers[reg].code)
} else if instruction.uses_accumulator {
lhs2 = (RegisterId)(registers[0].code)
} else {
lhs2 = (RegisterId)(registers[reg].code)
}
if instruction.has_data {
word_signed := is_word
if instruction.has_sign_extension {
word_signed = is_word && curr_byte & 0b0000_0010 == 0
}
processed += word_signed ? 2 : 1
rhs2 = (OperandType)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx]))
has_immediate = true
} else if instruction.uses_accumulator {
if _, ok := instruction.word_size.(LastBit); ok {
processed += is_word ? 2 : 1
rhs2 = (OperandType)(is_word ? (Accumulator)(get_i16(data[data_idx:])) : (Accumulator)(data[data_idx]))
} else {
rhs2 = (RegisterId)(reg)
}
} else {
rhs2 = (RegisterId)(reg)
}
if flip_dst {
lhs2, rhs2 = rhs2, lhs2
}
lhs := get_memory_type_string(lhs2, is_word)
rhs := get_memory_type_string(rhs2, is_word)
size_string := has_immediate && has_memory_addr ? is_word ? "word " : "byte " : ""
full_inst: string
opname: string
if instruction.opname == .TBD {
if instruction.opcode_id == .Second {
opname = strings.to_lower(fmt.aprintf("%s", get_opname(data[idx+1])))
} else {
opname = strings.to_lower(fmt.aprintf("%s", get_opname(curr_byte)))
}
} else {
opname = strings.to_lower(fmt.aprintf("%s", instruction.opname))
}
if instruction.is_jump {
// NOTE: In order to mimic the label offset, you have to take the value you got and add two
value := (i8)(data[idx+1]) + 2
full_inst = fmt.aprintf("%s $%s%d ; %d", strings.to_lower(opname), value >= 0 ? "+" : "", value, value - 2)
processed += 1
} else if instruction.is_unary {
if instruction.has_address {
size_string = "word "
}
full_inst = fmt.aprintf("%s %s%s", opname, size_string, lhs)
} else {
opname = strings.to_lower(opname)
if opname == "mov" {
full_inst = fmt.aprintf("%s %s, %s%s", opname, lhs, size_string, rhs)
} else {
full_inst = fmt.aprintf("%s %s%s, %s", opname, size_string, lhs, rhs)
}
}
fmt.sbprintf(&instruction_builder, "%s %*[1]s", full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;")
for i in 0..<processed {
fmt.sbprintf(&instruction_builder, " %08b", data[idx + i])
}
if print_at_end {
instruction_list[line_count] = strings.clone(strings.to_string(instruction_builder))
} else {
op := strings.to_string(instruction_builder)
if op[0:3] != string(last_opname[:]) {
fmt.println()
}
copy(last_opname[:], op[0:3])
fmt.println(op)
}
idx += processed
line_count += 1
strings.builder_reset(&instruction_builder)
}
if print_at_end {
for i in 0..<line_count {
opname := instruction_list[i]
if !strings.has_prefix(opname, string(last_opname[:])) {
fmt.println()
}
copy(last_opname[:], opname[0:3])
fmt.println(instruction_list[i])
}
}
}