performance-aware/decoder8086.odin

612 lines
17 KiB
Odin

package decoder_8086
import "core:os"
import "core:fmt"
import "core:math"
import "core:strings"
Register :: struct {
fullname: string,
bytename: string,
value: struct #raw_union {
using _: struct {
low, high: byte,
},
full: u16,
},
code: u8,
}
WordSize :: enum {
None,
LastBit,
FourthBit,
Always8,
Always16,
}
None :: struct {}
Disp8 :: i8
Disp16 :: i16
Displacement :: union {
None,
Disp8,
Disp16
}
RegisterId :: distinct u8
Immediate8 :: distinct i8
Immediate16 :: distinct i16
ImmediateU8 :: distinct u8
MemoryAddr :: struct {
addr_id: u8,
displacement: Displacement,
}
DirectAddress :: distinct i16
SegmentRegister :: distinct i8
Jump :: distinct i8
VariablePort :: struct {}
ShiftRotate :: distinct bool
Repeat :: string
Intersegment :: struct {
ip: i16,
cs: i16,
}
DirectWithinSegment :: distinct u16
Operand :: union {
None,
RegisterId,
Immediate8,
ImmediateU8,
Immediate16,
MemoryAddr,
DirectAddress,
SegmentRegister,
Jump,
VariablePort,
ShiftRotate,
Repeat,
DirectWithinSegment,
Intersegment,
}
OperandInfo :: enum {
None,
Register,
SegmentRegister,
RegisterMemory,
Immediate,
ImmediateUnsigned,
Accumulator,
DirectAddress,
Jump,
VariablePort,
ShiftRotate,
Repeat,
DirectWithinSegment,
Intersegment,
}
RegisterEncodingBits :: enum {
None,
FirstByteLast3,
SecondByteMiddle3,
SecondByteLast3,
FirstByteMiddle3,
}
InstructionInfo :: struct {
mask: u8,
encoding: u8,
opname: OpName,
desc: string,
src: OperandInfo,
dst: OperandInfo,
word_size: WordSize,
reg_info: RegisterEncodingBits,
has_flip: bool,
has_sign_extension: bool,
check_second_encoding: bool,
consume_extra_bytes: int,
shift_rotate_flag: bool,
}
RIGHT_ALIGN_AMOUNT := 35
registers := [8]Register {
{fullname = "ax", bytename = "al", code = 0b000},
{fullname = "cx", bytename = "cl", code = 0b001},
{fullname = "dx", bytename = "dl", code = 0b010},
{fullname = "bx", bytename = "bl", code = 0b011},
{fullname = "sp", bytename = "ah", code = 0b100},
{fullname = "bp", bytename = "ch", code = 0b101},
{fullname = "si", bytename = "dh", code = 0b110},
{fullname = "di", bytename = "bh", code = 0b111},
}
segment_registers := [4]Register {
{fullname = "es", code = 0b000},
{fullname = "cs", code = 0b001},
{fullname = "ss", code = 0b010},
{fullname = "ds", code = 0b011},
}
variable_port := registers[2]
total_bytes_processed := 0
instruction_builder := strings.builder_make()
get_i16 :: proc(data: []u8) -> i16 {
return (i16)(data[1]) << 8 | (i16)(data[0])
}
operand_is :: proc($T: typeid, opr: Operand) -> bool {
_, ok := opr.(T)
return ok
}
calculate_effective_address :: proc(r_m: u8) -> string {
val: string
switch r_m {
case 0b000:
val = "bx + si"
case 0b001:
val = "bx + di"
case 0b010:
val = "bp + si"
case 0b011:
val = "bp + di"
case 0b100:
val = "si"
case 0b101:
val = "di"
case 0b110:
val = "bp"
case 0b111:
val = "bx"
}
return val
}
get_memory_string :: proc(memoryAddr: MemoryAddr, has_segment: Maybe(Register)) -> string {
disp: string
switch value in memoryAddr.displacement {
case None:
disp = ""
case Disp8:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
case Disp16:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
}
seg_string: string
if segreg, ok := has_segment.?; ok {
seg_string = fmt.aprintf("%s:", segreg.fullname)
}
text := fmt.aprintf("%s[%s%s]", seg_string, calculate_effective_address(memoryAddr.addr_id), disp)
return text
}
parse_displacement :: proc(data: []u8) -> (displacement: Displacement, disp_amount: int) {
mod := (data[0] & 0b11000000) >> 6
disp: Displacement = None{}
amount: int
switch mod {
case 1:
disp = (i8)(data[1])
amount = 1
case 2:
disp = get_i16(data[1:])
amount = 2
}
return disp, amount
}
get_displacement_string :: proc(displacement: Displacement) -> string {
disp := ""
#partial switch value in displacement {
case i8:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
case i16:
if value != 0 {
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
}
}
return disp
}
get_repeat_op :: proc(data: u8) -> Repeat {
bits := (data & 0b1110) >> 1
w := (data & 0b1) == 1 ? "w" : "b"
rep: string
switch bits {
case 0b010: rep = "movs"
case 0b011: rep = "cmps"
case 0b101: rep = "stos"
case 0b110: rep = "lods"
case 0b111: rep = "scas"
}
return Repeat(fmt.aprintf("%s%s", rep, w))
}
try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) {
for inst in instructions {
if inst.encoding == (b & inst.mask) {
return inst, true
}
}
return InstructionInfo{}, false
}
get_opname :: proc(opname: OpName, data: []u8) -> (string, bool) {
name: string
interseg: bool
if opname == .TBD2 {
switch data[1] & 0b00111000 >> 3 {
case 0b000: name = "inc"
case 0b001: name = "dec"
case 0b010: name = "call"
case 0b011: name = "call"; interseg = true
case 0b100: name = "jmp"
case 0b101: name = "jmp"; interseg = true
case 0b110: name = "push"
}
} else if opname == .TBD5 {
switch data[1] & 0b00111000 >> 3 {
case 0b000: name = "test"
case 0b001: name = "dec"
case 0b010: name = "not"
case 0b011: name = "neg"
case 0b100: name = "mul"
case 0b101: name = "imul"
case 0b110: name = "div"
case 0b111: name = "idiv"
}
} else if opname == .TBD6 {
switch data[1] & 0b00111000 >> 3 {
case 0b000: name = "rol"
case 0b001: name = "ror"
case 0b010: name = "rcl"
case 0b011: name = "rcr"
case 0b100: name = "shl"
case 0b101: name = "shr"
case 0b111: name = "sar"
}
} else {
bits: u8
if opname == .TBD1 || opname == .TBD3 {
bits = data[0] & 0b00111000 >> 3
} else {
bits = data[1] & 0b00111000 >> 3
}
switch bits {
case 0b000: name = "add"
case 0b001: name = "or"
case 0b010: name = "adc"
case 0b011: name = "sbb"
case 0b100: name = "and"
case 0b101: name = "sub"
case 0b110: name = "xor"
case 0b111: name = "cmp"
}
}
return name, interseg
}
parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, processed: ^int, word: bool, has_segreg: Maybe(Register)) -> Operand {
operand: Operand = None{}
switch opinfo {
case .None:
case .Register:
// rm: u8 = data[1] & 0b111
// dst_opr = (RegisterId)(registers[rm].code)
reg: u8
// Read the RegisterEncodingBits
switch inst.reg_info {
case .None:
// panic("Register is required but the encoded location is not provided")
case .FirstByteLast3:
reg = data[0] & 0b111
case .FirstByteMiddle3:
reg = (data[0] >> 3) & 0b111
case .SecondByteMiddle3:
reg = (data[1] >> 3) & 0b111
case .SecondByteLast3:
reg = data[1] & 0b111
}
operand = (RegisterId)(registers[reg].code)
case .SegmentRegister:
reg: u8
switch inst.reg_info {
case .None:
// panic("Register is required but the encoded location is not provided")
case .FirstByteLast3:
reg = data[0] & 0b111
case .FirstByteMiddle3:
reg = (data[0] >> 3) & 0b111
case .SecondByteMiddle3:
reg = (data[1] >> 3) & 0b111
case .SecondByteLast3:
reg = data[1] & 0b111
}
operand = (SegmentRegister)(segment_registers[reg].code)
case .RegisterMemory:
mod := data[1] >> 6
rm := data[1] & 0b111
processed^ += 1
op: Operand
if mod == 0 {
if rm == 0b110 {
op = (DirectAddress)(get_i16(data[2:]))
processed^ += 2
} else {
op = MemoryAddr{ addr_id = rm , displacement = None{} }
}
} else if mod == 1 {
op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) }
processed^ += 1
} else if mod == 2 {
op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) }
processed^ += 2
} else if mod == 3 {
op = (RegisterId)(registers[rm].code)
}
operand = op
case .Immediate:
data_idx := processed^
word_signed := word
if inst.has_sign_extension {
word_signed &&= data[0] & 0b0000_0010 == 0
}
operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx]))
processed^ += word_signed ? 2 : 1
case .ImmediateUnsigned:
operand = (ImmediateU8)(data[processed^])
processed^ += 1
case .Accumulator:
operand = (RegisterId)(registers[0].code)
case .DirectAddress:
operand = (DirectAddress)(get_i16(data[1:]))
processed^ += 2
case .Jump:
processed^ += 1
// NOTE: In order to mimic the label offset, you have to take the value you got and add two
operand = (Jump)((i8)(data[1]) + 2)
case .VariablePort:
operand = VariablePort{}
case .ShiftRotate:
v_flag := data[0] & 0b10 != 0
operand = (ShiftRotate)(v_flag)
case .Repeat:
operand = get_repeat_op(data[1])
processed^ += 1
case .DirectWithinSegment:
value := (int)(get_i16(data[1:])) + total_bytes_processed + 3
operand = (DirectWithinSegment)(value)
processed^ += 2
case .Intersegment:
operand = Intersegment {
ip = get_i16(data[1:]),
cs = get_i16(data[3:]),
}
processed^ += 4
}
return operand
}
get_operand_string :: proc(operand: Operand, is_word: bool, has_segment: Maybe(Register)) -> string {
string_val: string
switch val in operand {
case None:
string_val = ""
case RegisterId:
string_val = is_word ? registers[val].fullname : registers[val].bytename
case Immediate8, ImmediateU8, Immediate16, DirectWithinSegment:
string_val = fmt.aprintf("%d", val)
case MemoryAddr:
string_val = get_memory_string(val, has_segment)
case DirectAddress:
seg_string: string
if segreg, ok := has_segment.?; ok {
seg_string = fmt.aprintf("%s:", segreg.fullname)
}
string_val = fmt.aprintf("%s[%d]", seg_string, val)
case SegmentRegister:
string_val = segment_registers[val].fullname
case Jump:
string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val)
case VariablePort:
string_val = variable_port.fullname
case ShiftRotate:
string_val = val ? registers[1].bytename : "1"
case Repeat:
string_val = (string)(val)
case Intersegment:
string_val = fmt.aprintf("%d:%d", val.cs, val.ip)
}
return string_val
}
main :: proc() {
f,err := os.open(os.args[1])
if err != os.ERROR_NONE {
fmt.eprintln("ERROR:", err)
os.exit(1)
}
defer os.close(f)
data := make([]u8, 1024)
bytes_read, err2 := os.read(f, data)
if err2 != nil {
// ...
os.exit(1)
}
if false {
os.exit(0)
}
// asdf :u16 = 0b00000110_11011101
// asdf2 :i16 = (i16)(asdf)
// fmt.printfln("%d", asdf2)
print_at_end := false
idx := 0
line_count := 0
has_lock: bool
has_segment: Maybe(Register)
last_opname: [3]byte
repeating_op_count := 0
instruction_list := make([dynamic]string, 512)
fmt.println("bits 16\n")
for idx < bytes_read {
processed := 1
curr_byte := data[idx]
inst, ok := try_find_instruction(curr_byte)
if !ok {
txt := "unknown instruction"
if print_at_end {
line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
instruction_list[line_count] = line
line_count += 1
} else {
fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
}
idx += 1
continue
}
// Here we check if the instruction affects the next instruction
if inst.opname == .LOCK {
has_lock = true
idx += 1
continue
} else if inst.opname == .SEGMENT {
reg := (curr_byte & 0b11000) >> 3
has_segment = segment_registers[reg]
idx += 1
continue
} else if inst.opname == .AAM {
processed += 1
}
src_opr: Operand
dst_opr: Operand
word: bool
flip: bool
indirect_intersegment: bool
op: Operand
if inst.has_flip {
flip = curr_byte & 2 != 0
}
#partial switch inst.word_size {
case .LastBit: word = curr_byte & 1 == 1
case .FourthBit: word = curr_byte & 0b0000_1000 != 0
case .Always16: word = true
}
opname: string
if inst.check_second_encoding {
opname,indirect_intersegment = get_opname(inst.opname, data[idx:])
// NOTE: This is a special case because it matches the bit pattern of .TBD5,
// but the instruction itself is different
if opname == "test" && (curr_byte & 0xFF) == 0b11110110 {
inst = test_inst
}
} else {
opname = strings.to_lower(fmt.aprintf("%s", inst.opname))
}
dst_opr = parse_operand(inst, inst.dst, data[idx:], &processed, word, has_segment)
src_opr = parse_operand(inst, inst.src, data[idx:], &processed, word, has_segment)
src_is_imm := operand_is(Immediate8, src_opr) || operand_is(Immediate16, src_opr)
dst_is_bracketed := operand_is(MemoryAddr, dst_opr) || operand_is(DirectAddress, dst_opr)
src_is_bracketed := operand_is(MemoryAddr, src_opr) || operand_is(DirectAddress, src_opr)
shiftrot := inst.src == .ShiftRotate
size_string := ""
if ((src_is_imm && dst_is_bracketed) || (dst_is_bracketed && shiftrot)) || (src_is_bracketed && operand_is(None, dst_opr)) {
size_string = word ? "word " : "byte "
}
if flip {
src_opr, dst_opr = dst_opr, src_opr
}
dst_str := get_operand_string(dst_opr, word, has_segment)
src_str := get_operand_string(src_opr, word, has_segment)
full_inst: string
if dst_str == "" {
interseg_string: string
if indirect_intersegment {
interseg_string = " far"
}
full_inst = fmt.aprintf("%s%s %s%s", opname, interseg_string, size_string, src_str)
} else {
// NOTE: I don't know why this is the case, but only the move has the word/byte
// keyword next to the immediate, but other instructions have it on the memory address
if opname == "mov" {
full_inst = fmt.aprintf("%s %s, %s%s", opname, dst_str, size_string, src_str)
} else {
full_inst = fmt.aprintf("%s %s%s, %s", opname, size_string, dst_str, src_str)
}
}
processed += inst.consume_extra_bytes
lock_string: string
if has_lock {
lock_string = "lock "
}
fmt.sbprintf(&instruction_builder, "%s%s %*[2]s", lock_string, full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;")
if has_lock {
fmt.sbprintf(&instruction_builder, " lock")
}
if _,ok := has_segment.?; ok {
fmt.sbprintf(&instruction_builder, " segment")
}
for i in 0..<processed {
fmt.sbprintf(&instruction_builder, " %08b", data[idx + i])
}
op2 := strings.to_string(instruction_builder)
if op2[0:3] != string(last_opname[:]) {
if repeating_op_count > 0 {
fmt.println()
}
repeating_op_count = 0
} else {
repeating_op_count += 1
}
copy(last_opname[:], op2[0:3])
fmt.println(op2)
idx += processed
strings.builder_reset(&instruction_builder)
has_lock = false
has_segment = nil
total_bytes_processed = idx
}
if print_at_end {
for i in 0..<line_count {
opname := instruction_list[i]
if !strings.has_prefix(opname, string(last_opname[:])) {
fmt.println()
}
copy(last_opname[:], opname[0:3])
fmt.println(instruction_list[i])
}
}
}