Major refactor to define both operands so either can be empty

This commit is contained in:
Joseph Ferano 2025-03-12 11:07:40 +07:00
parent 08ef7d901b
commit 8967be44a0
3 changed files with 579 additions and 310 deletions

1
.gitignore vendored
View File

@ -3,3 +3,4 @@
/asm_files/*.bin
/8086_family_Users_Manual_1_.pdf
/decoder8086
/performance-aware

View File

@ -17,57 +17,6 @@ Register :: struct {
code: u8,
}
OpName :: enum {
TBD,
MOV,
PUSH,
POP,
XCHG,
IN,
OUT,
XLAT,
LEA,
LDS,
LES,
ADD,
ADC,
SUB,
CMP,
JMP,
JNZ,
JNGE,
JE,
JZ,
JL,
JLE,
JNG,
JB,
JNAE,
JP,
JPE,
JNA,
JBE,
JO,
JS,
JNE,
JNL,
JGE,
JNLE,
JG,
JNB,
JAE,
JNBE,
JA,
JNP,
JPO,
JNO,
JNS,
LOOP,
LOOPZ,
LOOPNZ,
JCXZ,
}
registers := [8]Register {
{fullname = "ax", bytename = "al", code = 0b000},
{fullname = "cx", bytename = "cl", code = 0b001},
@ -86,17 +35,13 @@ segment_registers := [4]Register {
{fullname = "ds", code = 0b011},
}
variable_port := registers[2]
RegInfo :: struct {
in_first_byte: bool,
shift_offset: u8,
}
OpCodeId :: enum {
None,
First,
Second,
}
LastBit :: struct{}
FourthBit :: struct{}
Force :: struct{}
@ -108,6 +53,15 @@ WordSize :: union {
Force,
}
WordSize2 :: enum {
None,
LastBit,
FourthBit,
Always8,
Always16,
Unsigned8,
}
None :: struct {}
Disp8 :: i8
@ -145,15 +99,44 @@ MemoryAddr :: struct {
addr_id: u8,
displacement: Displacement
}
Accumulator :: distinct i16
DirectAddress :: distinct i16
SegmentRegister :: distinct i8
OperandType :: union {
Jump :: distinct i8
VariablePort :: struct {}
Repeat :: string
Operand :: union {
None,
RegisterId,
Immediate8,
Immediate16,
MemoryAddr,
Accumulator,
DirectAddress,
SegmentRegister,
Jump,
VariablePort,
Repeat,
}
OperandInfo :: enum {
None,
Register,
SegmentRegister,
RegisterMemory,
Immediate,
Accumulator,
DirectAddress,
Jump,
VariablePort,
ShiftRotate,
Repeat,
}
RegisterEncodingBits :: enum {
None,
FirstByteLast3,
SecondByteMiddle3,
SecondByteLast3,
FirstByteMiddle3,
}
InstructionInfo :: struct {
@ -161,129 +144,17 @@ InstructionInfo :: struct {
encoding: u8,
opname: OpName,
desc: string,
opcode_id: OpCodeId,
word_size: WordSize,
reg_info: Maybe(RegInfo),
has_data: bool,
has_address: bool,
uses_accumulator: bool,
has_segreg: bool,
src: OperandInfo,
dst: OperandInfo,
word_size: WordSize2,
reg_info: RegisterEncodingBits,
has_flip: bool,
has_sign_extension: bool,
is_jump: bool,
is_unary: bool,
check_second_encoding: bool,
consume_extra_bytes: int,
shift_rotate_flag: bool,
}
// TODO: Maybe we can get rid of it since I don't have to specify the shift_offset,
// not like it changes a lot
reg_first_last := RegInfo{ in_first_byte = true, shift_offset = 0 }
reg_second_middle := RegInfo{ in_first_byte = false, shift_offset = 3 }
reg_first_middle := RegInfo{ in_first_byte = true, shift_offset = 3 }
instructions := [?]InstructionInfo {
{ opname = .MOV, desc = "Register/memory to/from register", mask = 0b11111100, encoding = 0b10001000,
reg_info = reg_second_middle, has_address = true, word_size = LastBit{}, has_flip = true },
{ opname = .MOV, desc = "Immediate to register/memory", mask = 0b11111110, encoding = 0b11000110,
has_data = true, has_address = true, word_size = LastBit{}, },
{ opname = .MOV, desc = "Immediate to register", mask = 0b11110000, encoding = 0b10110000,
reg_info = reg_first_last, has_data = true, word_size = FourthBit{} },
{ opname = .MOV, desc = "Memory to accumulator", mask = 0b11111110, encoding = 0b10100000,
has_flip = true, word_size = LastBit{}, uses_accumulator = true },
{ opname = .MOV, desc = "Accumulator to memory", mask = 0b11111110, encoding = 0b10100010,
has_flip = true, word_size = LastBit{}, uses_accumulator = true },
{ opname = .MOV, desc = "Register/memory to segment register", mask = 0b11111111, encoding = 0b10001110,
has_segreg = true, has_address = true, word_size = None{} },
{ opname = .MOV, desc = "Segment register to register/memory", mask = 0b11111111, encoding = 0b10001100,
has_segreg = true, has_address = true, word_size = None{} },
{ opname = .PUSH, desc = "", mask = 0b11111111, encoding = 0b11111111,
has_address = true, word_size = None{}, is_unary = true },
{ opname = .PUSH, desc = "", mask = 0b11111000, encoding = 0b01010000,
reg_info = reg_first_last, word_size = Force{}, is_unary = true },
{ opname = .PUSH, desc = "", mask = 0b11100111, encoding = 0b00000110,
has_segreg = true, reg_info = reg_first_middle, word_size = Force{}, is_unary = true },
{ opname = .POP, desc = "", mask = 0b11111111, encoding = 0b10001111,
has_address = true, word_size = None{}, is_unary = true },
{ opname = .POP, desc = "", mask = 0b11111000, encoding = 0b01011000,
reg_info = reg_first_last, word_size = Force{}, is_unary = true },
{ opname = .POP, desc = "", mask = 0b11100111, encoding = 0b00000111,
has_segreg = true, reg_info = reg_first_middle, word_size = None{}, is_unary = true },
{ opname = .XCHG, desc = "", mask = 0b11111110, encoding = 0b10000110,
reg_info = reg_second_middle, has_address = true, word_size = LastBit{}, has_flip = true},
{ opname = .XCHG, desc = "", mask = 0b11111000, encoding = 0b10010000,
reg_info = reg_first_last, uses_accumulator = true, word_size = Force{}, },
{ opname = .IN, desc = "", mask = 0b11111110, encoding = 0b11100100,
has_data = true, word_size = LastBit{}, },
{ opname = .IN, desc = "", mask = 0b11111110, encoding = 0b11101100,
word_size = LastBit{}, },
{ opname = .OUT, desc = "", mask = 0b11111110, encoding = 0b11100110,
has_data = true, word_size = FourthBit{}, },
{ opname = .OUT, desc = "", mask = 0b11111110, encoding = 0b11101110,
word_size = LastBit{}, },
{ opname = .XLAT, desc = "", mask = 0b11111111, encoding = 0b11010111,
},
{ opname = .LEA, desc = "", mask = 0b11111111, encoding = 0b10001101,
has_address = true },
{ opname = .LDS, desc = "", mask = 0b11111111, encoding = 0b11000101,
has_address = true },
{ opname = .LES, desc = "", mask = 0b11111111, encoding = 0b11000100,
has_address = true },
{ opname = .TBD, desc = "Reg/memory with register to either", mask = 0b11000100, encoding = 0b00000000,
opcode_id = .First, reg_info = reg_second_middle, has_address = true, word_size = LastBit{}, has_flip = true },
{ opname = .TBD, desc = "Immediate to register/memory", mask = 0b11111100, encoding = 0b10000000,
opcode_id = .Second, has_data = true, has_address = true,
word_size = LastBit{}, has_sign_extension = true },
{ opname = .TBD, desc = "Immediate to accumulator", mask = 0b11000100, encoding = 0b00000100,
word_size = LastBit{}, has_data = true },
{ opname = .JE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110100, is_jump = true},
{ opname = .JZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110100, is_jump = true},
{ opname = .JL, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111100, is_jump = true},
{ opname = .JNGE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111100, is_jump = true},
{ opname = .JLE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111110, is_jump = true},
{ opname = .JNG, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111110, is_jump = true},
{ opname = .JB, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110010, is_jump = true},
{ opname = .JNAE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110010, is_jump = true},
{ opname = .JBE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110110, is_jump = true},
{ opname = .JNA, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110110, is_jump = true},
{ opname = .JP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111010, is_jump = true},
{ opname = .JPE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111010, is_jump = true},
{ opname = .JO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110000, is_jump = true},
{ opname = .JS, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111000, is_jump = true},
{ opname = .JNE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110101, is_jump = true},
{ opname = .JNZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110101, is_jump = true},
{ opname = .JNL, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111101, is_jump = true},
{ opname = .JGE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111101, is_jump = true},
{ opname = .JNLE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111111, is_jump = true},
{ opname = .JG, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111111, is_jump = true},
{ opname = .JNB, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110011, is_jump = true},
{ opname = .JAE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110011, is_jump = true},
{ opname = .JNBE, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110111, is_jump = true},
{ opname = .JA, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110111, is_jump = true},
{ opname = .JNP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111011, is_jump = true},
{ opname = .JPO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111011, is_jump = true},
{ opname = .JNO, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01110001, is_jump = true},
{ opname = .JNS, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b01111001, is_jump = true},
{ opname = .LOOP, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100010, is_jump = true},
{ opname = .LOOPZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100001, is_jump = true},
{ opname = .LOOPNZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100000, is_jump = true},
{ opname = .JCXZ, desc = "Jump on not zero", mask = 0b11111111, encoding = 0b11100011, is_jump = true},
}
inst_map := make(map[u8]InstructionInfo)
RIGHT_ALIGN_AMOUNT := 35
calculate_effective_address :: proc(r_m: u8) -> string {
@ -327,9 +198,11 @@ get_memory_string :: proc(memoryAddr: MemoryAddr) -> string {
return text
}
get_memory_type_string :: proc(mem_type: OperandType, is_word: bool) -> string {
get_operand_string :: proc(operand: Operand, is_word: bool) -> string {
string_val: string
switch val in mem_type {
switch val in operand {
case None:
string_val = ""
case RegisterId:
string_val = is_word ? registers[val].fullname : registers[val].bytename
case Immediate8:
@ -338,10 +211,16 @@ get_memory_type_string :: proc(mem_type: OperandType, is_word: bool) -> string {
string_val = fmt.aprintf("%d", val)
case MemoryAddr:
string_val = get_memory_string(val)
case Accumulator:
case DirectAddress:
string_val = fmt.aprintf("[%d]", val)
case SegmentRegister:
string_val = segment_registers[val].fullname
case Jump:
string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val)
case VariablePort:
string_val = variable_port.fullname
case Repeat:
string_val = (string)(val)
}
return string_val
}
@ -380,9 +259,22 @@ get_displacement_string :: proc(displacement: Displacement) -> string {
return disp
}
get_repeat_op :: proc(data: u8) -> Repeat {
bits := (data & 0b1110) >> 1
w := (data & 0b1) == 1 ? "w" : "b"
rep: string
switch bits {
case 0b010: rep = "movs"
case 0b011: rep = "cmps"
case 0b101: rep = "stos"
case 0b110: rep = "lods"
case 0b111: rep = "scas"
}
return Repeat(fmt.aprintf("%s%s", rep, w))
}
try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) {
for inst in instructions {
// fmt.print(inst.encoding, ",")
if inst.encoding == (b & inst.mask) {
return inst, true
}
@ -390,19 +282,149 @@ try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) {
return InstructionInfo{}, false
}
get_opname :: proc(b: u8) -> string {
get_opname :: proc(opname: OpName, data: []u8) -> string {
name: string
switch b & 0b00111000 >> 3 {
case 0b000: name = "add"
case 0b010: name = "adc"
case 0b101: name = "sub"
case 0b111: name = "cmp"
if opname == .TBD2 {
switch data[1] & 0b00111000 >> 3 {
case 0b000: name = "inc"
case 0b001: name = "dec"
case 0b010: name = "call"
case 0b011: name = "call"
case 0b100: name = "jmp"
case 0b101: name = "jmp"
case 0b110: name = "push"
}
} else if opname == .TBD5 {
switch data[1] & 0b00111000 >> 3 {
case 0b000: name = "test"
case 0b001: name = "dec"
case 0b010: name = "not"
case 0b011: name = "neg"
case 0b100: name = "mul"
case 0b101: name = "imul"
case 0b110: name = "div"
case 0b111: name = "idiv"
}
} else if opname == .TBD6 {
switch data[1] & 0b00111000 >> 3 {
case 0b000: name = "rol"
case 0b001: name = "ror"
case 0b010: name = "rcl"
case 0b011: name = "rcr"
case 0b100: name = "shl"
case 0b101: name = "shr"
case 0b111: name = "sar"
}
} else {
bits: u8
if opname == .TBD1 || opname == .TBD3 {
bits = data[0] & 0b00111000 >> 3
} else {
bits = data[1] & 0b00111000 >> 3
}
switch bits {
case 0b000: name = "add"
case 0b001: name = "or"
case 0b010: name = "adc"
case 0b011: name = "sbb"
case 0b100: name = "and"
case 0b101: name = "sub"
case 0b110: name = "xor"
case 0b111: name = "cmp"
}
}
return name
}
parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, processed: ^int, word: bool) -> Operand {
operand: Operand = None{}
switch opinfo {
case .None:
case .Register:
// rm: u8 = data[1] & 0b111
// dst_opr = (RegisterId)(registers[rm].code)
reg: u8
// Read the RegisterEncodingBits
switch inst.reg_info {
case .None:
// panic("Register is required but the encoded location is not provided")
case .FirstByteLast3:
reg = data[0] & 0b111
case .FirstByteMiddle3:
reg = (data[0] >> 3) & 0b111
case .SecondByteMiddle3:
reg = (data[1] >> 3) & 0b111
case .SecondByteLast3:
reg = data[1] & 0b111
}
operand = (RegisterId)(registers[reg].code)
case .SegmentRegister:
reg: u8
switch inst.reg_info {
case .None:
// panic("Register is required but the encoded location is not provided")
case .FirstByteLast3:
reg = data[0] & 0b111
case .FirstByteMiddle3:
reg = (data[0] >> 3) & 0b111
case .SecondByteMiddle3:
reg = (data[1] >> 3) & 0b111
case .SecondByteLast3:
reg = data[1] & 0b111
}
operand = (SegmentRegister)(segment_registers[reg].code)
case .RegisterMemory:
mod := data[1] >> 6
rm := data[1] & 0b111
processed^ += 1
op: Operand
if mod == 0 {
if rm == 0b110 {
op = (DirectAddress)(get_i16(data[2:]))
processed^ += 2
} else {
op = MemoryAddr{ addr_id = rm , displacement = None{} }
}
} else if mod == 1 {
op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) }
processed^ += 1
} else if mod == 2 {
op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) }
processed^ += 2
} else if mod == 3 {
op = (RegisterId)(registers[rm].code)
}
operand = op
case .Immediate:
data_idx := processed^
word_signed := word
if inst.has_sign_extension {
word_signed &&= data[0] & 0b0000_0010 == 0
}
operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx]))
processed^ += word_signed ? 2 : 1
case .Accumulator:
operand = (RegisterId)(registers[0].code)
case .DirectAddress:
operand = (DirectAddress)(get_i16(data[1:]))
processed^ += 2
case .Jump:
processed^ += 1
// NOTE: In order to mimic the label offset, you have to take the value you got and add two
operand = (Jump)((i8)(data[1]) + 2)
case .VariablePort:
operand = VariablePort{}
case .ShiftRotate:
v_flag := data[0] & 0b10 != 0
operand = v_flag ? (RegisterId)(registers[1].code) : (Immediate8)(1)
case .Repeat:
operand = get_repeat_op(data[1])
processed^ += 1
}
return operand
}
main :: proc() {
// f,err := os.open(len(os.args) > 1 ? os.args[1] : "./asm_files/01-02-39.bin")
f,err := os.open(os.args[1])
if err != os.ERROR_NONE {
fmt.eprintln("ERROR:", err)
@ -417,10 +439,6 @@ main :: proc() {
os.exit(1)
}
for inst in instructions {
inst_map[inst.encoding] = inst
}
if false {
os.exit(0)
}
@ -433,8 +451,10 @@ main :: proc() {
idx := 0
added_label := false
line_count := 0
// last_opname: string
has_lock: bool
has_segment: bool
last_opname: [3]byte
repeating_op_count := 0
instruction_builder := strings.builder_make()
instruction_list := make([dynamic]string, 512)
fmt.println("bits 16")
@ -442,150 +462,121 @@ main :: proc() {
processed := 1
curr_byte := data[idx]
instruction, ok := try_find_instruction(curr_byte)
inst, ok := try_find_instruction(curr_byte)
if !ok {
txt := "unknown instruction"
line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
instruction_list[line_count] = line
line_count += 1
if print_at_end {
line := fmt.aprintf("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
instruction_list[line_count] = line
line_count += 1
} else {
fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
}
idx += 1
continue
}
lhs2: OperandType
rhs2: OperandType
is_word: bool
is_immediate := false
flip_dst := false
has_memory_addr := false
has_immediate := false
rm: u8
mod: u8
reg: u8
// Here we check if the instruction affects the next instruction
if inst.opname == .LOCK {
has_lock = true
idx += 1
continue
} else if inst.opname == .SEGMENT {
has_segment = true
idx += 1
continue
}
has_segment = false
if instruction.has_flip {
flip_dst = curr_byte & 2 != 0
src_opr: Operand
dst_opr: Operand
word: bool
flip: bool
op: Operand
if inst.has_flip {
flip = curr_byte & 2 != 0
}
switch val in instruction.word_size {
case LastBit: is_word = curr_byte & 1 == 1
case FourthBit: is_word = curr_byte & 0b0000_1000 != 0
case Force: is_word = true
case None:
#partial switch inst.word_size {
case .LastBit: word = curr_byte & 1 == 1
case .FourthBit: word = curr_byte & 0b0000_1000 != 0
case .Always16: word = true
}
if reg_info, ok := instruction.reg_info.(RegInfo); ok {
b := reg_info.in_first_byte ? data[idx] : data[idx+1]
reg = (b >> reg_info.shift_offset) & 0b111
}
data_idx := idx + 1
if instruction.has_address {
mod = data[idx+1] >> 6
rm = data[idx+1] & 0b00000111
data_idx += 1 + ((int)(mod) % 3)
processed += 1 + ((int)(mod) % 3)
if mod == 0 {
if rm == 0b110 {
lhs2 = (Accumulator)(get_i16(data[idx+2:]))
processed += 2
data_idx += 2
} else {
lhs2 = MemoryAddr{ addr_id = rm , displacement = None{} }
}
// NOTE: This also works when it's an Accumulator apparently
has_memory_addr = true
} else if mod == 1 {
lhs2 = MemoryAddr{ addr_id = rm , displacement = (i8)(data[idx+2]) }
has_memory_addr = true
} else if mod == 2 {
lhs2 = MemoryAddr{ addr_id = rm , displacement = get_i16(data[idx+2:]) }
has_memory_addr = true
} else if mod == 3 {
lhs2 = (RegisterId)(registers[rm].code)
}
} else if instruction.has_segreg {
lhs2 = (SegmentRegister)(segment_registers[reg].code)
} else if instruction.uses_accumulator {
lhs2 = (RegisterId)(registers[0].code)
} else {
lhs2 = (RegisterId)(registers[reg].code)
}
if instruction.has_data {
word_signed := is_word
if instruction.has_sign_extension {
word_signed = is_word && curr_byte & 0b0000_0010 == 0
}
processed += word_signed ? 2 : 1
rhs2 = (OperandType)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx]))
has_immediate = true
} else if instruction.uses_accumulator {
if _, ok := instruction.word_size.(LastBit); ok {
processed += is_word ? 2 : 1
rhs2 = (OperandType)(is_word ? (Accumulator)(get_i16(data[data_idx:])) : (Accumulator)(data[data_idx]))
} else {
rhs2 = (RegisterId)(reg)
}
} else {
rhs2 = (RegisterId)(reg)
}
if flip_dst {
lhs2, rhs2 = rhs2, lhs2
}
lhs := get_memory_type_string(lhs2, is_word)
rhs := get_memory_type_string(rhs2, is_word)
size_string := has_immediate && has_memory_addr ? is_word ? "word " : "byte " : ""
full_inst: string
opname: string
if instruction.opname == .TBD {
if instruction.opcode_id == .Second {
opname = strings.to_lower(fmt.aprintf("%s", get_opname(data[idx+1])))
} else {
opname = strings.to_lower(fmt.aprintf("%s", get_opname(curr_byte)))
// TODO: Figure out a way to do this in the string builder
if inst.check_second_encoding {
opname = strings.to_lower(fmt.aprintf("%s", get_opname(inst.opname, data[idx:])))
// NOTE: This is a special case because it matches the bit pattern of .TBD5,
// but the instruction itself is different
if opname == "not" {
inst = not_inst
}
} else {
opname = strings.to_lower(fmt.aprintf("%s", instruction.opname))
opname = strings.to_lower(fmt.aprintf("%s", inst.opname))
}
if instruction.is_jump {
// NOTE: In order to mimic the label offset, you have to take the value you got and add two
value := (i8)(data[idx+1]) + 2
full_inst = fmt.aprintf("%s $%s%d ; %d", strings.to_lower(opname), value >= 0 ? "+" : "", value, value - 2)
processed += 1
} else if instruction.is_unary {
if instruction.has_address {
size_string = "word "
dst_opr = parse_operand(inst, inst.dst, data[idx:], &processed, word)
src_opr = parse_operand(inst, inst.src, data[idx:], &processed, word)
// TODO: This is ugly as hell
_,ok_1 := src_opr.(Immediate8)
_,ok_2 := src_opr.(Immediate16)
_,ok_3 := dst_opr.(MemoryAddr);
_,ok_4 := dst_opr.(DirectAddress);
shiftrot := inst.src == .ShiftRotate
size_string := ""
if ((ok_1 || ok_2) && (ok_3 || ok_4)) || ((ok_3 || ok_4) && shiftrot) {
size_string = word ? "word " : "byte "
}
if flip {
src_opr, dst_opr = dst_opr, src_opr
}
dst_str := get_operand_string(dst_opr, word)
src_str := get_operand_string(src_opr, word)
full_inst: string
if dst_str == "" {
_,ok_1 := src_opr.(MemoryAddr);
_,ok_2 := src_opr.(DirectAddress);
if (ok_1 || ok_2) && inst.word_size != .Always16 {
size_string = word ? "word " : "byte "
}
full_inst = fmt.aprintf("%s %s%s", opname, size_string, lhs)
full_inst = fmt.aprintf("%s %s%s", opname, size_string, src_str)
} else {
opname = strings.to_lower(opname)
// NOTE: I don't know why this is the case, but only the move has the word/byte
// keyword next to the immediate, but other instructions have it on the memory address
if opname == "mov" {
full_inst = fmt.aprintf("%s %s, %s%s", opname, lhs, size_string, rhs)
full_inst = fmt.aprintf("%s %s, %s%s", opname, dst_str, size_string, src_str)
} else {
full_inst = fmt.aprintf("%s %s%s, %s", opname, size_string, lhs, rhs)
full_inst = fmt.aprintf("%s %s%s, %s", opname, size_string, dst_str, src_str)
}
}
processed += inst.consume_extra_bytes
// fmt.sbprintf(&instruction_builder, "%s%s%s %*[2]s", lock_string, seg_string, full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;")
fmt.sbprintf(&instruction_builder, "%s %*[1]s", full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;")
for i in 0..<processed {
fmt.sbprintf(&instruction_builder, " %08b", data[idx + i])
}
if print_at_end {
instruction_list[line_count] = strings.clone(strings.to_string(instruction_builder))
} else {
op := strings.to_string(instruction_builder)
if op[0:3] != string(last_opname[:]) {
op2 := strings.to_string(instruction_builder)
if op2[0:3] != string(last_opname[:]) {
if repeating_op_count > 1 {
fmt.println()
}
copy(last_opname[:], op[0:3])
fmt.println(op)
repeating_op_count = 0
} else {
repeating_op_count += 1
}
copy(last_opname[:], op2[0:3])
fmt.println(op2)
idx += processed
line_count += 1
strings.builder_reset(&instruction_builder)
}
if print_at_end {

277
instructions.odin Normal file
View File

@ -0,0 +1,277 @@
package decoder_8086
OpName :: enum {
TBD1,
TBD2,
TBD3,
TBD4,
TBD5,
TBD6,
MOV,
PUSH,
POP,
XCHG,
IN,
OUT,
XLAT,
LEA,
LDS,
LES,
LAHF,
SAHF,
PUSHF,
POPF,
ADD,
ADC,
INC,
AAA,
DAA,
SUB,
SBB,
DEC,
NEG,
CMP,
AAS,
DAS,
AAM,
DIV,
IDIV,
AAD,
CBW,
CWD,
NOT,
TEST,
REP,
RET,
INT,
INT3,
INTO,
IRET,
CLC,
CMC,
STC,
CLD,
STD,
CLI,
STI,
HLT,
WAIT,
ESC,
LOCK,
SEGMENT,
JMP,
JNZ,
JNGE,
JE,
JZ,
JL,
JLE,
JNG,
JB,
JNAE,
JP,
JPE,
JNA,
JBE,
JO,
JS,
JNE,
JNL,
JGE,
JNLE,
JG,
JNB,
JAE,
JNBE,
JA,
JNP,
JPO,
JNO,
JNS,
LOOP,
LOOPZ,
LOOPNZ,
JCXZ,
}
not_inst := InstructionInfo {
opname = .NOT, desc = "", mask = 0b11111110, encoding = 0b11110110,
src = .RegisterMemory, word_size = .LastBit
}
instructions := [?]InstructionInfo {
{ opname = .TBD1, desc = "Immediate to accumulator",
mask = 0b11000110, encoding = 0b00000100, check_second_encoding = true,
dst = .Accumulator, src = .Immediate,
word_size = .LastBit, },
{ opname = .TBD2, desc = "", check_second_encoding = true,
mask = 0b11111110, encoding = 0b11111110,
src = .RegisterMemory,
word_size = .LastBit, },
{ opname = .TBD3, desc = "", check_second_encoding = true,
mask = 0b11000100, encoding = 0b00000000,
dst = .RegisterMemory, src = .Register,
word_size = .LastBit, reg_info = .SecondByteMiddle3, has_flip = true },
{ opname = .TBD4, desc = "", check_second_encoding = true,
mask = 0b11111100, encoding = 0b10000000,
dst = .RegisterMemory, src = .Immediate,
word_size = .LastBit, has_sign_extension = true },
{ opname = .TBD5, desc = "", check_second_encoding = true,
mask = 0b11111110, encoding = 0b11110110,
dst = .Immediate, src = .RegisterMemory, word_size = .LastBit, },
{ opname = .TBD6, desc = "", check_second_encoding = true,
mask = 0b11111100, encoding = 0b11010000,
dst = .RegisterMemory, src = .ShiftRotate, word_size = .LastBit, },
{ opname = .MOV, desc = "Register/memory to/from register",
mask = 0b11111100, encoding = 0b10001000,
dst = .RegisterMemory, src = .Register,
word_size = .LastBit, reg_info = .SecondByteMiddle3, has_flip = true },
{ opname = .MOV, desc = "Immediate to register/memory",
mask = 0b11111110, encoding = 0b11000110,
dst = .RegisterMemory, src = .Immediate,
word_size = .LastBit, },
{ opname = .MOV, desc = "Immediate to register",
mask = 0b11110000, encoding = 0b10110000,
dst = .Register, src = .Immediate,
word_size = .FourthBit, reg_info = .FirstByteLast3 },
{ opname = .MOV, desc = "Memory to accumulator",
mask = 0b11111110, encoding = 0b10100000,
dst = .Accumulator, src = .DirectAddress,
word_size = .LastBit, },
{ opname = .MOV, desc = "Accumulator to memory",
mask = 0b11111110, encoding = 0b10100010,
dst = .DirectAddress, src = .Accumulator,
word_size = .LastBit, },
{ opname = .PUSH, desc = "", mask = 0b11111000, encoding = 0b01010000,
src = .Register, reg_info = .FirstByteLast3,
word_size = .Always16, },
{ opname = .PUSH, desc = "", mask = 0b11100111, encoding = 0b00000110,
src = .SegmentRegister, reg_info = .FirstByteMiddle3,
word_size = .Always16, },
{ opname = .POP, desc = "", mask = 0b11111111, encoding = 0b10001111,
src = .RegisterMemory,},
{ opname = .POP, desc = "", mask = 0b11111000, encoding = 0b01011000,
src = .Register, reg_info = .FirstByteLast3,
word_size = .Always16, },
{ opname = .POP, desc = "", mask = 0b11100111, encoding = 0b00000111,
src = .SegmentRegister, reg_info = .FirstByteMiddle3,
word_size = .Always16, },
{ opname = .XCHG, desc = "", mask = 0b11111110, encoding = 0b10000110,
dst = .RegisterMemory, src = .Register,
reg_info = .SecondByteMiddle3, has_flip = true },
{ opname = .XCHG, desc = "", mask = 0b11111000, encoding = 0b10010000,
dst = .Accumulator, src = .Register,
reg_info = .FirstByteLast3, has_flip = true, word_size = .Always16 },
{ opname = .IN, desc = "", mask = 0b11111110, encoding = 0b11100100,
dst = .Accumulator, src = .Immediate,
// TODO: Everything works just fine, but the problem here is that if you want it to
// show up as an unsigned int, then we have to change the types because the number
// 200, for instance, will show up as a negative, we would have to create an unsigned
// variant of the Immediate value. Maybe we can have the value and the sign as a struct
word_size = .Unsigned8, },
{ opname = .IN, desc = "", mask = 0b11111110, encoding = 0b11101100,
dst = .Accumulator, src = .VariablePort,
word_size = .LastBit, },
{ opname = .OUT, desc = "", mask = 0b11111110, encoding = 0b11100110,
dst = .Immediate, src = .Accumulator,
word_size = .Unsigned8, },
{ opname = .OUT, desc = "", mask = 0b11111110, encoding = 0b11101110,
dst = .VariablePort, src = .Accumulator,
word_size = .LastBit, },
{ opname = .XLAT, desc = "", mask = 0b11111111, encoding = 0b11010111,},
{ opname = .LEA, desc = "", mask = 0b11111111, encoding = 0b10001101,
dst = .Register, src = .RegisterMemory,
reg_info = .SecondByteMiddle3, word_size = .Always16 },
{ opname = .LDS, desc = "", mask = 0b11111111, encoding = 0b11000101,
dst = .Register, src = .RegisterMemory,
reg_info = .SecondByteMiddle3, word_size = .Always16 },
{ opname = .LES, desc = "", mask = 0b11111111, encoding = 0b11000100,
dst = .Register, src = .RegisterMemory,
reg_info = .SecondByteMiddle3, word_size = .Always16 },
{ opname = .LAHF, desc = "", mask = 0b11111111, encoding = 0b10011111,},
{ opname = .SAHF, desc = "", mask = 0b11111111, encoding = 0b10011110,},
{ opname = .PUSHF, desc = "", mask = 0b11111111, encoding = 0b10011100,},
{ opname = .POPF, desc = "", mask = 0b11111111, encoding = 0b10011101,},
{ opname = .INC, desc = "", mask = 0b11111000, encoding = 0b01000000,
src = .Register, reg_info = .FirstByteLast3, word_size = .Always16 },
{ opname = .AAA, desc = "", mask = 0b11111111, encoding = 0b00110111,},
{ opname = .DAA, desc = "", mask = 0b11111111, encoding = 0b00100111,},
{ opname = .DEC, desc = "", mask = 0b11111000, encoding = 0b01001000,
src = .Register, reg_info = .FirstByteLast3, word_size = .Always16 },
{ opname = .AAS, desc = "", mask = 0b11111111, encoding = 0b00111111,},
{ opname = .DAS, desc = "", mask = 0b11111111, encoding = 0b00101111,},
{ opname = .AAM, desc = "", mask = 0b11111111, encoding = 0b11010100,},
{ opname = .AAD, desc = "", mask = 0b11111111, encoding = 0b11010101, consume_extra_bytes = 1 },
{ opname = .CBW, desc = "", mask = 0b11111111, encoding = 0b10011000,},
{ opname = .CWD, desc = "", mask = 0b11111111, encoding = 0b10011001,},
{ opname = .TEST, desc = "", mask = 0b11111100, encoding = 0b10000100,
dst = .RegisterMemory, src = .Register,
word_size = .LastBit, reg_info = .SecondByteMiddle3, has_flip = true },
{ opname = .REP, desc = "", mask = 0b11111110, encoding = 0b11110010, src = .Repeat },
{ opname = .RET, desc = "", mask = 0b11111111, encoding = 0b11000011,},
{ opname = .RET, src = .Immediate, word_size = .Always16,
desc = "", mask = 0b11111111, encoding = 0b11000010,},
{ opname = .INT, src = .Immediate, desc = "", mask = 0b11111111, encoding = 0b11001101,},
{ opname = .INT3, desc = "", mask = 0b11111111, encoding = 0b11001100,},
{ opname = .INTO, desc = "", mask = 0b11111111, encoding = 0b11001110,},
{ opname = .IRET, desc = "", mask = 0b11111111, encoding = 0b11001111,},
{ opname = .CLC, desc = "", mask = 0b11111111, encoding = 0b11111000,},
{ opname = .CMC, desc = "", mask = 0b11111111, encoding = 0b11110101,},
{ opname = .STC, desc = "", mask = 0b11111111, encoding = 0b11111001,},
{ opname = .CLD, desc = "", mask = 0b11111111, encoding = 0b11111100,},
{ opname = .STD, desc = "", mask = 0b11111111, encoding = 0b11111101,},
{ opname = .CLI, desc = "", mask = 0b11111111, encoding = 0b11111010,},
{ opname = .STI, desc = "", mask = 0b11111111, encoding = 0b11111011,},
{ opname = .HLT, desc = "", mask = 0b11111111, encoding = 0b11110100,},
{ opname = .WAIT, desc = "", mask = 0b11111111, encoding = 0b10011011,},
// { opname = .ESC, desc = "", mask = 0b11111111, encoding = 0b11111000, dst = },
{ opname = .LOCK, desc = "", mask = 0b11111111, encoding = 0b11110000,},
{ opname = .SEGMENT, desc = "", mask = 0b11100111, encoding = 0b00100110,},
{ opname = .JE, mask = 0b11111111, encoding = 0b01110100, src = .Jump, desc = "Jump on not zero", },
{ opname = .JZ, mask = 0b11111111, encoding = 0b01110100, src = .Jump, desc = "Jump on not zero", },
{ opname = .JL, mask = 0b11111111, encoding = 0b01111100, src = .Jump, desc = "Jump on not zero", },
{ opname = .JNGE, mask = 0b11111111, encoding = 0b01111100, src = .Jump, desc = "Jump on not zero", },
{ opname = .JLE, mask = 0b11111111, encoding = 0b01111110, src = .Jump, desc = "Jump on not zero", },
{ opname = .JNG, mask = 0b11111111, encoding = 0b01111110, src = .Jump, desc = "Jump on not zero", },
{ opname = .JB, mask = 0b11111111, encoding = 0b01110010, src = .Jump, desc = "Jump on not zero", },
{ opname = .JNAE, mask = 0b11111111, encoding = 0b01110010, src = .Jump, desc = "Jump on not zero", },
{ opname = .JBE, mask = 0b11111111, encoding = 0b01110110, src = .Jump, desc = "Jump on not zero", },
{ opname = .JNA, mask = 0b11111111, encoding = 0b01110110, src = .Jump, desc = "Jump on not zero", },
{ opname = .JP, mask = 0b11111111, encoding = 0b01111010, src = .Jump, desc = "Jump on not zero", },
{ opname = .JPE, mask = 0b11111111, encoding = 0b01111010, src = .Jump, desc = "Jump on not zero", },
{ opname = .JO, mask = 0b11111111, encoding = 0b01110000, src = .Jump, desc = "Jump on not zero", },
{ opname = .JS, mask = 0b11111111, encoding = 0b01111000, src = .Jump, desc = "Jump on not zero", },
{ opname = .JNE, mask = 0b11111111, encoding = 0b01110101, src = .Jump, desc = "Jump on not zero", },
{ opname = .JNZ, mask = 0b11111111, encoding = 0b01110101, src = .Jump, desc = "Jump on not zero", },
{ opname = .JNL, mask = 0b11111111, encoding = 0b01111101, src = .Jump, desc = "Jump on not zero", },
{ opname = .JGE, mask = 0b11111111, encoding = 0b01111101, src = .Jump, desc = "Jump on not zero", },
{ opname = .JNLE, mask = 0b11111111, encoding = 0b01111111, src = .Jump, desc = "Jump on not zero", },
{ opname = .JG, mask = 0b11111111, encoding = 0b01111111, src = .Jump, desc = "Jump on not zero", },
{ opname = .JNB, mask = 0b11111111, encoding = 0b01110011, src = .Jump, desc = "Jump on not zero", },
{ opname = .JAE, mask = 0b11111111, encoding = 0b01110011, src = .Jump, desc = "Jump on not zero", },
{ opname = .JNBE, mask = 0b11111111, encoding = 0b01110111, src = .Jump, desc = "Jump on not zero", },
{ opname = .JA, mask = 0b11111111, encoding = 0b01110111, src = .Jump, desc = "Jump on not zero", },
{ opname = .JNP, mask = 0b11111111, encoding = 0b01111011, src = .Jump, desc = "Jump on not zero", },
{ opname = .JPO, mask = 0b11111111, encoding = 0b01111011, src = .Jump, desc = "Jump on not zero", },
{ opname = .JNO, mask = 0b11111111, encoding = 0b01110001, src = .Jump, desc = "Jump on not zero", },
{ opname = .JNS, mask = 0b11111111, encoding = 0b01111001, src = .Jump, desc = "Jump on not zero", },
{ opname = .LOOP, mask = 0b11111111, encoding = 0b11100010, src = .Jump, desc = "Jump on not zero", },
{ opname = .LOOPZ, mask = 0b11111111, encoding = 0b11100001, src = .Jump, desc = "Jump on not zero", },
{ opname = .LOOPNZ, mask = 0b11111111, encoding = 0b11100000, src = .Jump, desc = "Jump on not zero", },
{ opname = .JCXZ, mask = 0b11111111, encoding = 0b11100011, src = .Jump, desc = "Jump on not zero", },
}