603 lines
16 KiB
Odin
603 lines
16 KiB
Odin
package decoder_8086
|
|
|
|
import "core:os"
|
|
import "core:fmt"
|
|
import "core:math"
|
|
import "core:strings"
|
|
|
|
Register :: struct {
|
|
fullname: string,
|
|
bytename: string,
|
|
value: struct #raw_union {
|
|
using _: struct {
|
|
low, high: byte,
|
|
},
|
|
full: u16,
|
|
},
|
|
code: u8,
|
|
}
|
|
|
|
registers := [8]Register {
|
|
{fullname = "ax", bytename = "al", code = 0b000},
|
|
{fullname = "cx", bytename = "cl", code = 0b001},
|
|
{fullname = "dx", bytename = "dl", code = 0b010},
|
|
{fullname = "bx", bytename = "bl", code = 0b011},
|
|
{fullname = "sp", bytename = "ah", code = 0b100},
|
|
{fullname = "bp", bytename = "ch", code = 0b101},
|
|
{fullname = "si", bytename = "dh", code = 0b110},
|
|
{fullname = "di", bytename = "bh", code = 0b111},
|
|
}
|
|
|
|
segment_registers := [4]Register {
|
|
{fullname = "es", code = 0b000},
|
|
{fullname = "cs", code = 0b001},
|
|
{fullname = "ss", code = 0b010},
|
|
{fullname = "ds", code = 0b011},
|
|
}
|
|
|
|
variable_port := registers[2]
|
|
|
|
RegInfo :: struct {
|
|
in_first_byte: bool,
|
|
shift_offset: u8,
|
|
}
|
|
|
|
LastBit :: struct{}
|
|
FourthBit :: struct{}
|
|
Force :: struct{}
|
|
|
|
WordSize :: union {
|
|
None,
|
|
LastBit,
|
|
FourthBit,
|
|
Force,
|
|
}
|
|
|
|
WordSize2 :: enum {
|
|
None,
|
|
LastBit,
|
|
FourthBit,
|
|
Always8,
|
|
Always16,
|
|
Unsigned8,
|
|
}
|
|
|
|
None :: struct {}
|
|
|
|
Disp8 :: i8
|
|
Disp16 :: i16
|
|
Displacement :: union {
|
|
None,
|
|
Disp8,
|
|
Disp16
|
|
}
|
|
|
|
Value8 :: i8
|
|
Value16 :: i16
|
|
Data :: union {
|
|
None,
|
|
Value8,
|
|
Value16
|
|
}
|
|
|
|
ModMemory :: struct {}
|
|
Mod8BitDisp :: i8
|
|
Mod16BitDisp :: i16
|
|
ModRegister :: struct {}
|
|
|
|
ModMode :: union {
|
|
ModMemory,
|
|
Mod8BitDisp,
|
|
Mod16BitDisp,
|
|
ModRegister,
|
|
}
|
|
|
|
RegisterId :: distinct u8
|
|
Immediate8 :: distinct i8
|
|
Immediate16 :: distinct i16
|
|
MemoryAddr :: struct {
|
|
addr_id: u8,
|
|
displacement: Displacement,
|
|
segment: Maybe(Register),
|
|
}
|
|
DirectAddress :: distinct i16
|
|
SegmentRegister :: distinct i8
|
|
Jump :: distinct i8
|
|
VariablePort :: struct {}
|
|
Repeat :: string
|
|
Operand :: union {
|
|
None,
|
|
RegisterId,
|
|
Immediate8,
|
|
Immediate16,
|
|
MemoryAddr,
|
|
DirectAddress,
|
|
SegmentRegister,
|
|
Jump,
|
|
VariablePort,
|
|
Repeat,
|
|
}
|
|
|
|
OperandInfo :: enum {
|
|
None,
|
|
Register,
|
|
SegmentRegister,
|
|
RegisterMemory,
|
|
Immediate,
|
|
Accumulator,
|
|
DirectAddress,
|
|
Jump,
|
|
VariablePort,
|
|
ShiftRotate,
|
|
Repeat,
|
|
}
|
|
|
|
RegisterEncodingBits :: enum {
|
|
None,
|
|
FirstByteLast3,
|
|
SecondByteMiddle3,
|
|
SecondByteLast3,
|
|
FirstByteMiddle3,
|
|
}
|
|
|
|
InstructionInfo :: struct {
|
|
mask: u8,
|
|
encoding: u8,
|
|
opname: OpName,
|
|
desc: string,
|
|
src: OperandInfo,
|
|
dst: OperandInfo,
|
|
word_size: WordSize2,
|
|
reg_info: RegisterEncodingBits,
|
|
has_flip: bool,
|
|
has_sign_extension: bool,
|
|
check_second_encoding: bool,
|
|
consume_extra_bytes: int,
|
|
shift_rotate_flag: bool,
|
|
}
|
|
|
|
RIGHT_ALIGN_AMOUNT := 35
|
|
|
|
calculate_effective_address :: proc(r_m: u8) -> string {
|
|
val: string
|
|
switch r_m {
|
|
case 0b000:
|
|
val = "bx + si"
|
|
case 0b001:
|
|
val = "bx + di"
|
|
case 0b010:
|
|
val = "bp + si"
|
|
case 0b011:
|
|
val = "bp + di"
|
|
case 0b100:
|
|
val = "si"
|
|
case 0b101:
|
|
val = "di"
|
|
case 0b110:
|
|
val = "bp"
|
|
case 0b111:
|
|
val = "bx"
|
|
}
|
|
return val
|
|
}
|
|
|
|
get_memory_string :: proc(memoryAddr: MemoryAddr) -> string {
|
|
disp: string
|
|
switch value in memoryAddr.displacement {
|
|
case None:
|
|
disp = ""
|
|
case Disp8:
|
|
if value != 0 {
|
|
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
|
|
}
|
|
case Disp16:
|
|
if value != 0 {
|
|
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
|
|
}
|
|
}
|
|
seg_string: string
|
|
if segreg, ok := memoryAddr.segment.?; ok {
|
|
seg_string = fmt.aprintf("%s:", segreg.fullname)
|
|
}
|
|
text := fmt.aprintf("%s[%s%s]", seg_string, calculate_effective_address(memoryAddr.addr_id), disp)
|
|
return text
|
|
}
|
|
|
|
get_i16 :: proc(data: []u8) -> i16 {
|
|
return (i16)(data[1]) << 8 | (i16)(data[0])
|
|
}
|
|
|
|
parse_displacement :: proc(data: []u8) -> (displacement: Displacement, disp_amount: int) {
|
|
mod := (data[0] & 0b11000000) >> 6
|
|
disp: Displacement = None{}
|
|
amount: int
|
|
switch mod {
|
|
case 1:
|
|
disp = (i8)(data[1])
|
|
amount = 1
|
|
case 2:
|
|
disp = get_i16(data[1:])
|
|
amount = 2
|
|
}
|
|
return disp, amount
|
|
}
|
|
|
|
get_displacement_string :: proc(displacement: Displacement) -> string {
|
|
disp := ""
|
|
#partial switch value in displacement {
|
|
case i8:
|
|
if value != 0 {
|
|
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
|
|
}
|
|
case i16:
|
|
if value != 0 {
|
|
disp = fmt.aprintf(" %s %d", value > 0 ? "+" : "-", math.abs(value))
|
|
}
|
|
}
|
|
return disp
|
|
}
|
|
|
|
get_repeat_op :: proc(data: u8) -> Repeat {
|
|
bits := (data & 0b1110) >> 1
|
|
w := (data & 0b1) == 1 ? "w" : "b"
|
|
rep: string
|
|
switch bits {
|
|
case 0b010: rep = "movs"
|
|
case 0b011: rep = "cmps"
|
|
case 0b101: rep = "stos"
|
|
case 0b110: rep = "lods"
|
|
case 0b111: rep = "scas"
|
|
}
|
|
return Repeat(fmt.aprintf("%s%s", rep, w))
|
|
}
|
|
|
|
try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) {
|
|
for inst in instructions {
|
|
if inst.encoding == (b & inst.mask) {
|
|
return inst, true
|
|
}
|
|
}
|
|
return InstructionInfo{}, false
|
|
}
|
|
|
|
get_opname :: proc(opname: OpName, data: []u8) -> string {
|
|
name: string
|
|
if opname == .TBD2 {
|
|
switch data[1] & 0b00111000 >> 3 {
|
|
case 0b000: name = "inc"
|
|
case 0b001: name = "dec"
|
|
case 0b010: name = "call"
|
|
case 0b011: name = "call"
|
|
case 0b100: name = "jmp"
|
|
case 0b101: name = "jmp"
|
|
case 0b110: name = "push"
|
|
}
|
|
} else if opname == .TBD5 {
|
|
switch data[1] & 0b00111000 >> 3 {
|
|
case 0b000: name = "test"
|
|
case 0b001: name = "dec"
|
|
case 0b010: name = "not"
|
|
case 0b011: name = "neg"
|
|
case 0b100: name = "mul"
|
|
case 0b101: name = "imul"
|
|
case 0b110: name = "div"
|
|
case 0b111: name = "idiv"
|
|
}
|
|
} else if opname == .TBD6 {
|
|
switch data[1] & 0b00111000 >> 3 {
|
|
case 0b000: name = "rol"
|
|
case 0b001: name = "ror"
|
|
case 0b010: name = "rcl"
|
|
case 0b011: name = "rcr"
|
|
case 0b100: name = "shl"
|
|
case 0b101: name = "shr"
|
|
case 0b111: name = "sar"
|
|
}
|
|
} else {
|
|
bits: u8
|
|
if opname == .TBD1 || opname == .TBD3 {
|
|
bits = data[0] & 0b00111000 >> 3
|
|
} else {
|
|
bits = data[1] & 0b00111000 >> 3
|
|
}
|
|
switch bits {
|
|
case 0b000: name = "add"
|
|
case 0b001: name = "or"
|
|
case 0b010: name = "adc"
|
|
case 0b011: name = "sbb"
|
|
case 0b100: name = "and"
|
|
case 0b101: name = "sub"
|
|
case 0b110: name = "xor"
|
|
case 0b111: name = "cmp"
|
|
}
|
|
}
|
|
return name
|
|
}
|
|
|
|
parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, processed: ^int, word: bool, has_segreg: Maybe(Register)) -> Operand {
|
|
operand: Operand = None{}
|
|
switch opinfo {
|
|
case .None:
|
|
case .Register:
|
|
// rm: u8 = data[1] & 0b111
|
|
// dst_opr = (RegisterId)(registers[rm].code)
|
|
reg: u8
|
|
// Read the RegisterEncodingBits
|
|
switch inst.reg_info {
|
|
case .None:
|
|
// panic("Register is required but the encoded location is not provided")
|
|
case .FirstByteLast3:
|
|
reg = data[0] & 0b111
|
|
case .FirstByteMiddle3:
|
|
reg = (data[0] >> 3) & 0b111
|
|
case .SecondByteMiddle3:
|
|
reg = (data[1] >> 3) & 0b111
|
|
case .SecondByteLast3:
|
|
reg = data[1] & 0b111
|
|
}
|
|
operand = (RegisterId)(registers[reg].code)
|
|
case .SegmentRegister:
|
|
reg: u8
|
|
switch inst.reg_info {
|
|
case .None:
|
|
// panic("Register is required but the encoded location is not provided")
|
|
case .FirstByteLast3:
|
|
reg = data[0] & 0b111
|
|
case .FirstByteMiddle3:
|
|
reg = (data[0] >> 3) & 0b111
|
|
case .SecondByteMiddle3:
|
|
reg = (data[1] >> 3) & 0b111
|
|
case .SecondByteLast3:
|
|
reg = data[1] & 0b111
|
|
}
|
|
operand = (SegmentRegister)(segment_registers[reg].code)
|
|
case .RegisterMemory:
|
|
mod := data[1] >> 6
|
|
rm := data[1] & 0b111
|
|
processed^ += 1
|
|
op: Operand
|
|
if mod == 0 {
|
|
if rm == 0b110 {
|
|
op = (DirectAddress)(get_i16(data[2:]))
|
|
processed^ += 2
|
|
} else {
|
|
op = MemoryAddr{ addr_id = rm , displacement = None{} , segment = has_segreg }
|
|
}
|
|
} else if mod == 1 {
|
|
op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) , segment = has_segreg }
|
|
processed^ += 1
|
|
} else if mod == 2 {
|
|
op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) , segment = has_segreg }
|
|
processed^ += 2
|
|
} else if mod == 3 {
|
|
op = (RegisterId)(registers[rm].code)
|
|
}
|
|
operand = op
|
|
case .Immediate:
|
|
data_idx := processed^
|
|
word_signed := word
|
|
if inst.has_sign_extension {
|
|
word_signed &&= data[0] & 0b0000_0010 == 0
|
|
}
|
|
operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx]))
|
|
processed^ += word_signed ? 2 : 1
|
|
case .Accumulator:
|
|
operand = (RegisterId)(registers[0].code)
|
|
case .DirectAddress:
|
|
operand = (DirectAddress)(get_i16(data[1:]))
|
|
processed^ += 2
|
|
case .Jump:
|
|
processed^ += 1
|
|
// NOTE: In order to mimic the label offset, you have to take the value you got and add two
|
|
operand = (Jump)((i8)(data[1]) + 2)
|
|
case .VariablePort:
|
|
operand = VariablePort{}
|
|
case .ShiftRotate:
|
|
v_flag := data[0] & 0b10 != 0
|
|
operand = v_flag ? (RegisterId)(registers[1].code) : (Immediate8)(1)
|
|
case .Repeat:
|
|
operand = get_repeat_op(data[1])
|
|
processed^ += 1
|
|
}
|
|
return operand
|
|
}
|
|
|
|
get_operand_string :: proc(operand: Operand, is_word: bool) -> string {
|
|
string_val: string
|
|
switch val in operand {
|
|
case None:
|
|
string_val = ""
|
|
case RegisterId:
|
|
string_val = is_word ? registers[val].fullname : registers[val].bytename
|
|
case Immediate8:
|
|
string_val = fmt.aprintf("%d", val)
|
|
case Immediate16:
|
|
string_val = fmt.aprintf("%d", val)
|
|
case MemoryAddr:
|
|
string_val = get_memory_string(val)
|
|
case DirectAddress:
|
|
string_val = fmt.aprintf("[%d]", val)
|
|
case SegmentRegister:
|
|
string_val = segment_registers[val].fullname
|
|
case Jump:
|
|
string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val)
|
|
case VariablePort:
|
|
string_val = variable_port.fullname
|
|
case Repeat:
|
|
string_val = (string)(val)
|
|
}
|
|
return string_val
|
|
}
|
|
|
|
main :: proc() {
|
|
f,err := os.open(os.args[1])
|
|
if err != os.ERROR_NONE {
|
|
fmt.eprintln("ERROR:", err)
|
|
os.exit(1)
|
|
}
|
|
defer os.close(f)
|
|
|
|
data := make([]u8, 1024)
|
|
bytes_read, err2 := os.read(f, data)
|
|
if err2 != nil {
|
|
// ...
|
|
os.exit(1)
|
|
}
|
|
|
|
if false {
|
|
os.exit(0)
|
|
}
|
|
// asdf :u16 = 0b00000011_11101000
|
|
// asdf2 :i16 = (i16)(asdf)
|
|
// fmt.printfln("%d", asdf2)
|
|
print_at_end := false
|
|
read_next := false
|
|
src_dst := true
|
|
idx := 0
|
|
added_label := false
|
|
line_count := 0
|
|
has_lock: bool
|
|
has_segment: Maybe(Register)
|
|
last_opname: [3]byte
|
|
repeating_op_count := 0
|
|
instruction_builder := strings.builder_make()
|
|
instruction_list := make([dynamic]string, 512)
|
|
fmt.println("bits 16")
|
|
for idx < bytes_read {
|
|
processed := 1
|
|
curr_byte := data[idx]
|
|
|
|
inst, ok := try_find_instruction(curr_byte)
|
|
if !ok {
|
|
txt := "unknown instruction"
|
|
if print_at_end {
|
|
line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
|
|
instruction_list[line_count] = line
|
|
line_count += 1
|
|
} else {
|
|
fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
|
|
}
|
|
idx += 1
|
|
continue
|
|
}
|
|
|
|
// Here we check if the instruction affects the next instruction
|
|
if inst.opname == .LOCK {
|
|
has_lock = true
|
|
idx += 1
|
|
continue
|
|
} else if inst.opname == .SEGMENT {
|
|
reg := (curr_byte & 0b11000) >> 3
|
|
has_segment = segment_registers[reg]
|
|
idx += 1
|
|
continue
|
|
}
|
|
|
|
src_opr: Operand
|
|
dst_opr: Operand
|
|
|
|
word: bool
|
|
flip: bool
|
|
op: Operand
|
|
|
|
if inst.has_flip {
|
|
flip = curr_byte & 2 != 0
|
|
}
|
|
|
|
#partial switch inst.word_size {
|
|
case .LastBit: word = curr_byte & 1 == 1
|
|
case .FourthBit: word = curr_byte & 0b0000_1000 != 0
|
|
case .Always16: word = true
|
|
}
|
|
|
|
opname: string
|
|
// TODO: Figure out a way to do this in the string builder
|
|
if inst.check_second_encoding {
|
|
opname = strings.to_lower(fmt.aprintf("%s", get_opname(inst.opname, data[idx:])))
|
|
// NOTE: This is a special case because it matches the bit pattern of .TBD5,
|
|
// but the instruction itself is different
|
|
if opname == "not" {
|
|
inst = not_inst
|
|
}
|
|
} else {
|
|
opname = strings.to_lower(fmt.aprintf("%s", inst.opname))
|
|
}
|
|
|
|
dst_opr = parse_operand(inst, inst.dst, data[idx:], &processed, word, has_segment)
|
|
src_opr = parse_operand(inst, inst.src, data[idx:], &processed, word, has_segment)
|
|
|
|
// TODO: This is ugly as hell
|
|
_,ok_1 := src_opr.(Immediate8)
|
|
_,ok_2 := src_opr.(Immediate16)
|
|
_,ok_3 := dst_opr.(MemoryAddr);
|
|
_,ok_4 := dst_opr.(DirectAddress);
|
|
shiftrot := inst.src == .ShiftRotate
|
|
size_string := ""
|
|
if ((ok_1 || ok_2) && (ok_3 || ok_4)) || ((ok_3 || ok_4) && shiftrot) {
|
|
size_string = word ? "word " : "byte "
|
|
}
|
|
|
|
if flip {
|
|
src_opr, dst_opr = dst_opr, src_opr
|
|
}
|
|
|
|
dst_str := get_operand_string(dst_opr, word)
|
|
src_str := get_operand_string(src_opr, word)
|
|
full_inst: string
|
|
if dst_str == "" {
|
|
_,ok_1 := src_opr.(MemoryAddr);
|
|
_,ok_2 := src_opr.(DirectAddress);
|
|
if (ok_1 || ok_2) && inst.word_size != .Always16 {
|
|
size_string = word ? "word " : "byte "
|
|
}
|
|
full_inst = fmt.aprintf("%s %s%s", opname, size_string, src_str)
|
|
} else {
|
|
// NOTE: I don't know why this is the case, but only the move has the word/byte
|
|
// keyword next to the immediate, but other instructions have it on the memory address
|
|
if opname == "mov" {
|
|
full_inst = fmt.aprintf("%s %s, %s%s", opname, dst_str, size_string, src_str)
|
|
} else {
|
|
full_inst = fmt.aprintf("%s %s%s, %s", opname, size_string, dst_str, src_str)
|
|
}
|
|
}
|
|
|
|
processed += inst.consume_extra_bytes
|
|
|
|
lock_string: string
|
|
if has_lock {
|
|
lock_string = "lock "
|
|
}
|
|
fmt.sbprintf(&instruction_builder, "%s%s %*[2]s", lock_string, full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;")
|
|
for i in 0..<processed {
|
|
fmt.sbprintf(&instruction_builder, " %08b", data[idx + i])
|
|
}
|
|
|
|
op2 := strings.to_string(instruction_builder)
|
|
if op2[0:3] != string(last_opname[:]) {
|
|
if repeating_op_count > 1 {
|
|
fmt.println()
|
|
}
|
|
repeating_op_count = 0
|
|
} else {
|
|
repeating_op_count += 1
|
|
}
|
|
copy(last_opname[:], op2[0:3])
|
|
fmt.println(op2)
|
|
|
|
idx += processed
|
|
strings.builder_reset(&instruction_builder)
|
|
has_lock = false
|
|
has_segment = nil
|
|
}
|
|
if print_at_end {
|
|
for i in 0..<line_count {
|
|
opname := instruction_list[i]
|
|
if !strings.has_prefix(opname, string(last_opname[:])) {
|
|
fmt.println()
|
|
}
|
|
copy(last_opname[:], opname[0:3])
|
|
fmt.println(instruction_list[i])
|
|
}
|
|
}
|
|
}
|