New declarative style instruction definition, generalized parsing

This commit is contained in:
Joseph Ferano 2025-02-21 21:53:11 +07:00
parent f8f5744cd3
commit bc0a8b65eb

View File

@ -16,14 +16,7 @@ Register :: struct {
code: u8, code: u8,
} }
RegMemMode :: enum { OpName :: enum {
Memory00 = 0b00,
Memory08 = 0b01,
Memory16 = 0b10,
Register = 0b11,
};
OpCode :: enum {
MOV, MOV,
ADD, ADD,
SUB, SUB,
@ -42,35 +35,116 @@ registers := [8]Register {
{fullname = "di", bytename = "bh", code = 0b111}, {fullname = "di", bytename = "bh", code = 0b111},
} }
Instruction :: struct { RegInfo :: struct {
in_first_byte: bool,
shift_offset: u8,
}
LastBit :: struct{}
FourthBit :: struct{}
WordSize :: union {
None,
LastBit,
FourthBit,
}
InstructionInfo :: struct {
mask: u8, mask: u8,
encoding: u8, encoding: u8,
name: string, name: string,
desc: string, desc: string,
has_mod_rm: bool,
word_size: WordSize,
reg_info: Maybe(RegInfo),
has_data: bool,
has_displacement: bool,
has_segreg: bool,
has_flip: bool,
has_explicit_size: bool,
has_accumulator: bool,
} }
instructions := [?]Instruction { reg_first_last := RegInfo{ in_first_byte = true, shift_offset = 0 }
{ mask = 0b11111100, encoding = 0b10001000, name = "mov", desc = "Register/memory to/from register" }, reg_second_middle := RegInfo{ in_first_byte = false, shift_offset = 3 }
{ mask = 0b11111110, encoding = 0b11000110, name = "mov", desc = "Immediate to register/memory" },
{ mask = 0b11110000, encoding = 0b10110000, name = "mov", desc = "Immediate to register" }, instructions := [?]InstructionInfo {
{ mask = 0b11111110, encoding = 0b10100000, name = "mov", desc = "Memory to accumulator" }, { name = "mov", desc = "Register/memory to/from register", mask = 0b11111100, encoding = 0b10001000,
{ mask = 0b11111110, encoding = 0b10100010, name = "mov", desc = "Accumulator to memory" }, has_mod_rm = true, reg_info = reg_second_middle, has_data = false, has_displacement = true,
{ mask = 0b11111111, encoding = 0b10001110, name = "mov", desc = "Register/memory to segment register" }, word_size = LastBit{}, has_flip = true },
{ mask = 0b11111111, encoding = 0b10001100, name = "mov", desc = "Segment register to register/memory" }, { name = "mov", desc = "Immediate to register/memory", mask = 0b11111110, encoding = 0b11000110,
has_mod_rm = true, reg_info = nil, has_data = true, has_displacement = true,
word_size = LastBit{}, has_explicit_size = true },
{ name = "mov", desc = "Immediate to register", mask = 0b11110000, encoding = 0b10110000,
has_mod_rm = false, reg_info = reg_first_last, has_data = true, has_displacement = false,
word_size = FourthBit{} },
{ name = "mov", desc = "Memory to accumulator", mask = 0b11111110, encoding = 0b10100000,
has_mod_rm = false, reg_info = nil, has_data = true, has_displacement = false, has_flip = true,
word_size = LastBit{}, has_accumulator = true },
{ name = "mov", desc = "Accumulator to memory", mask = 0b11111110, encoding = 0b10100010,
has_mod_rm = false, reg_info = nil, has_data = true, has_displacement = false, has_flip = true,
word_size = LastBit{}, has_accumulator = true },
{ name = "mov", desc = "Register/memory to segment register", mask = 0b11111111, encoding = 0b10001110,
has_mod_rm = true, reg_info = nil, has_segreg = true, has_displacement = true,
word_size = None{} },
{ name = "mov", desc = "Segment register to register/memory", mask = 0b11111111, encoding = 0b10001100,
has_mod_rm = true, reg_info = nil, has_segreg = true, has_displacement = true,
word_size = None{} },
} }
ParsedInstruction :: struct { None :: struct {}
code: OpCode,
displacement: DisplacementMode, Disp8 :: i8
Disp16 :: i16
Displacement :: union {
None,
Disp8,
Disp16
} }
inst_map := make(map[u8]Instruction) Value8 :: i8
Value16 :: i16
Data :: union {
None,
Value8,
Value16
}
ModMemory :: struct {}
Mod8BitDisp :: i8
Mod16BitDisp :: i16
ModRegister :: struct {}
ModMode :: union {
ModMemory,
Mod8BitDisp,
Mod16BitDisp,
ModRegister,
}
RegisterId :: distinct u8
Immediate8 :: distinct i8
Immediate16 :: distinct i16
MemoryAddr :: struct {
addr_id: u8,
displacement: Displacement
}
DirectAddress :: distinct i16
Accumulator8 :: distinct i8
Accumulator16 :: distinct i16
OperandType :: union {
RegisterId,
Immediate8,
Immediate16,
MemoryAddr,
DirectAddress,
Accumulator8,
Accumulator16,
}
inst_map := make(map[u8]InstructionInfo)
RIGHT_ALIGN_AMOUNT := 30 RIGHT_ALIGN_AMOUNT := 30
get_instruction :: proc(bytes: []u8) -> (Instruction, u8) {
return {}, 0
}
calculate_effective_address :: proc(r_m: u8) -> string { calculate_effective_address :: proc(r_m: u8) -> string {
val: string val: string
switch r_m { switch r_m {
@ -94,39 +168,6 @@ calculate_effective_address :: proc(r_m: u8) -> string {
return val return val
} }
ModMemory :: struct {}
Mod8BitDisp :: i8
Mod16BitDisp :: i16
ModRegister :: struct {}
DisplacementMode :: union {
ModMemory,
Mod8BitDisp,
Mod16BitDisp,
ModRegister,
}
ModField :: struct {
displacement: DisplacementMode
}
None :: struct {}
Disp8 :: i8
Disp16 :: i16
Displacement :: union {
None,
Disp8,
Disp16
}
RegisterId :: u8
Immediate8 :: i8
Immediate16 :: i16
MemoryAddr :: struct {
addr_id: u8,
displacement: Displacement
}
get_memory_string :: proc(memoryAddr: MemoryAddr) -> string { get_memory_string :: proc(memoryAddr: MemoryAddr) -> string {
disp: string disp: string
switch value in memoryAddr.displacement { switch value in memoryAddr.displacement {
@ -145,42 +186,46 @@ get_memory_string :: proc(memoryAddr: MemoryAddr) -> string {
return text return text
} }
MemoryType :: union { get_memory_type_string :: proc(mem_type: OperandType, is_word: bool) -> string {
RegisterId, switch val in mem_type {
MemoryAddr case RegisterId:
return is_word ? registers[val].fullname : registers[val].bytename
case Immediate8:
return fmt.aprintf("%d", val)
case Immediate16:
return fmt.aprintf("%d", val)
case MemoryAddr:
return get_memory_string(val)
case DirectAddress:
return fmt.aprintf("[%d]", val)
case Accumulator8:
return fmt.aprintf("[%d]", val)
case Accumulator16:
return fmt.aprintf("[%d]", val)
} }
return ""
OperandType :: union {
RegisterId,
Immediate8,
Immediate16,
MemoryAddr
} }
get_i16 :: proc(data: []u8) -> i16 { get_i16 :: proc(data: []u8) -> i16 {
return (i16)(data[1]) << 8 | (i16)(data[0]) return (i16)(data[1]) << 8 | (i16)(data[0])
} }
parse_displacement :: proc(data: []u8) -> (displacement: DisplacementMode, disp_amount: int) { parse_displacement :: proc(data: []u8) -> (displacement: Displacement, disp_amount: int) {
mod := (data[0] & 0b11000000) >> 6 mod := (data[0] & 0b11000000) >> 6
disp: DisplacementMode disp: Displacement = None{}
amount: int amount: int
switch mod { switch mod {
case 0:
disp = ModMemory{}
case 1: case 1:
disp = (i8)(data[1]) disp = (i8)(data[1])
amount = 1 amount = 1
case 2: case 2:
disp = get_i16(data[1:]) disp = get_i16(data[1:])
amount = 2 amount = 2
case 3:
disp = ModRegister{}
} }
return disp, amount return disp, amount
} }
get_displacement_string :: proc(displacement: DisplacementMode) -> string { get_displacement_string :: proc(displacement: Displacement) -> string {
disp := "" disp := ""
#partial switch value in displacement { #partial switch value in displacement {
case i8: case i8:
@ -195,7 +240,7 @@ get_displacement_string :: proc(displacement: DisplacementMode) -> string {
return disp return disp
} }
try_find_instruction :: proc(b: u8) -> (Instruction, bool) { try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) {
mask: u8 = 0xFF mask: u8 = 0xFF
for j in 0..=4 { for j in 0..=4 {
encoding := b & mask encoding := b & mask
@ -204,7 +249,7 @@ try_find_instruction :: proc(b: u8) -> (Instruction, bool) {
} }
mask <<= 1 mask <<= 1
} }
return Instruction{}, false return InstructionInfo{}, false
} }
main :: proc() { main :: proc() {
@ -229,20 +274,20 @@ main :: proc() {
if false { if false {
os.exit(0) os.exit(0)
} }
// asdf :u16 = 0b1111_0000_1001_0100
// asdf2 :i16 = (i16)(asdf)
// fmt.printfln("%d", asdf2)
read_next := false read_next := false
src_dst := true src_dst := true
fmt.println("bits 16\n") fmt.println("bits 16\n")
idx := 0 idx := 0
for idx < bytes_read { for idx < bytes_read {
processed := 0 processed := 1
curr_byte := data[idx] curr_byte := data[idx]
inst_name: string instruction, ok := try_find_instruction(curr_byte)
if instruction, ok := try_find_instruction(curr_byte); ok { if !ok {
inst_name = instruction.name
} else {
txt := "unknown instruction" txt := "unknown instruction"
fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte) fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
idx += 1 idx += 1
@ -251,149 +296,78 @@ main :: proc() {
lhs2: OperandType lhs2: OperandType
rhs2: OperandType rhs2: OperandType
lhs: string
rhs: string
is_word: bool is_word: bool
is_immediate := false is_immediate := false
flip_dst := false flip_dst := false
rm: u8
mod: u8
reg: u8
if curr_byte & 0b11110000 == 0b10110000 { if instruction.has_flip {
is_word = curr_byte & 0b0000_1000 != 0
reg := registers[curr_byte & 0b00000111]
lhs = is_word ? reg.fullname : reg.bytename
processed += is_word ? 1 : 0
lhs2 := (RegisterId)(reg.code)
rhs2 := (OperandType)(is_word ? ((Immediate16)(get_i16(data[idx+1:]))) : ((Immediate8)(data[idx+1])))
} else if curr_byte & 0b11111000 == 0b10001000 {
mod_reg_rm := data[idx + 1]
is_word = curr_byte & 1 == 1
flip_dst = curr_byte & 2 != 0 flip_dst = curr_byte & 2 != 0
reg := (mod_reg_rm & 0b00111000) >> 3
rm := mod_reg_rm & 0b00000111
mod, disp_amount := parse_displacement(data[idx + 1:])
switch disp_val in mod {
case ModMemory:
lhs2 = (RegisterId)(reg)
rhs2 = MemoryAddr{ addr_id = rm , displacement = None{} }
processed += 1
case Mod8BitDisp:
lhs2 = (RegisterId)(reg)
rhs2 = MemoryAddr{ addr_id = rm , displacement = disp_val }
processed += 1
case Mod16BitDisp:
lhs2 = (RegisterId)(reg)
rhs2 = MemoryAddr{ addr_id = rm , displacement = disp_val }
processed += 2
case ModRegister:
lhs2 = (RegisterId)(rm)
rhs2 = (RegisterId)(reg)
processed += 1
} }
dst_reg := registers[rm]
switch val in instruction.word_size {
case LastBit: is_word = curr_byte & 1 == 1
case FourthBit: is_word = curr_byte & 0b0000_1000 != 0
case None:
}
if reg_info, ok := instruction.reg_info.(RegInfo); ok {
b := reg_info.in_first_byte ? data[idx] : data[idx+1]
reg = (b >> reg_info.shift_offset) & 0b111
}
if instruction.has_mod_rm {
mod = data[idx+1] >> 6
rm = data[idx+1] & 0b00000111
processed += 1 + ((int)(mod) % 3)
if mod == 0 {
if rm == 0b110 {
lhs2 = (DirectAddress)(get_i16(data[idx+2:]))
processed += 2
} else {
lhs2 = MemoryAddr{ addr_id = rm , displacement = None{} }
}
} else if mod == 1 {
lhs2 = MemoryAddr{ addr_id = rm , displacement = (i8)(data[idx+2]) }
} else if mod == 2 {
lhs2 = MemoryAddr{ addr_id = rm , displacement = get_i16(data[idx+2:]) }
} else if mod == 3 {
lhs2 = (RegisterId)(registers[rm].code)
}
if instruction.has_explicit_size {
imm_idx := idx + 2 + ((int)(mod) % 3)
rhs2 = (OperandType)(is_word ? (Immediate16)(get_i16(data[imm_idx:])) : (Immediate8)(data[imm_idx]))
processed += is_word ? 2 : 1
} else {
rhs2 = (RegisterId)(reg)
}
} else {
lhs2 = (RegisterId)(registers[reg].code)
if instruction.has_accumulator {
rhs2 = (OperandType)(is_word ? ((Accumulator16)(get_i16(data[idx+1:]))) : ((Accumulator8)(data[idx+1])))
} else {
rhs2 = (OperandType)(is_word ? ((Immediate16)(get_i16(data[idx+1:]))) : ((Immediate8)(data[idx+1])))
}
processed += is_word ? 2 : 1
} }
if flip_dst { if flip_dst {
lhs2, rhs2 = rhs2, lhs2 lhs2, rhs2 = rhs2, lhs2
} }
switch val in lhs2 {
case RegisterId: lhs := get_memory_type_string(lhs2, is_word)
lhs = fmt.aprintf("%s", is_word ? registers[val].fullname : registers[val].bytename) rhs := get_memory_type_string(rhs2, is_word)
case Immediate8: size_string := instruction.has_explicit_size ? is_word ? "word " : "byte " : ""
lhs = fmt.aprintf("%d", val) full_inst := fmt.aprintf("%s %s, %s%s", instruction.name, lhs, size_string, rhs)
case Immediate16: fmt.printf("%s %*[1]s a", full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;")
lhs = fmt.aprintf("%d", val) for i in 0..<processed {
case MemoryAddr: fmt.printf(" %08b", data[idx + i])
lhs = get_memory_string(val)
}
switch val in rhs2 {
case RegisterId:
rhs = is_word ? registers[val].fullname : registers[val].bytename
case Immediate8:
rhs = fmt.aprintf("%d", val)
case Immediate16:
rhs = fmt.aprintf("%d", val)
case MemoryAddr:
rhs = get_memory_string(val)
}
full_inst := fmt.aprintf("%s %s, %s", inst_name, lhs, rhs)
processed += 1
fmt.printf("%s %*[1]s a %08b", full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;", curr_byte)
for i in 0..=processed {
fmt.printf(" %08b", data[processed + 1 + i])
} }
fmt.println() fmt.println()
idx += processed idx += processed
if true {
continue
}
if curr_byte & 0b11111000 == 0b10001000 || curr_byte & 0b11111110 == 0b11000110 {
is_imm_mode := curr_byte & 0b11111110 == 0b11000110
is_word := curr_byte & 1 == 1
flip_src := curr_byte & 2 != 0
next_byte := data[processed + 1]
reg := (next_byte & 0b00111000) >> 3
rm := next_byte & 0b00000111
dst_reg := registers[rm]
displacement, disp_amount := parse_displacement(data[processed + 1:])
src_name, dst_name: string
// switch disp_val in displacement {
// case DisplaceMemoryMode:
// src_name = is_word ? registers[rm].fullname : registers[rm].bytename
// if is_imm_mode {
// if is_word {
// dst_name = fmt.aprintf("word %d", get_i16(data[processed+2:]))
// } else {
// dst_name = fmt.aprintf("byte %d", (i8)(data[processed+2]))
// }
// }
// disp_amount += is_word ? 2 : 1
// case Displace8Bits:
// case Displace16Bits:
// case DisplaceRegisterMode:
// }
// if disp_val, ok := displacement.(DisplaceRegisterMode); ok {
// src_name = is_word ? registers[rm].fullname : registers[rm].bytename
// } else {
// src_name = fmt.aprintf("[%s%s]", calculate_effective_address(rm), get_displacement_string(displacement))
// }
if flip_src && !is_imm_mode { src_name, dst_name = dst_name, src_name }
inst_string := fmt.aprintf("mov %s, %s", src_name, dst_name)
fmt.printf("%s %*[1]s a %08b", inst_string, RIGHT_ALIGN_AMOUNT - len(inst_string), ";;", curr_byte)
for i in 0..=disp_amount {
fmt.printf(" %08b", data[processed + 1 + i])
}
fmt.println()
processed += 1 + disp_amount
} else if curr_byte & 0b11110000 == 0b10110000 {
is_word := curr_byte & 0b0000_1000 != 0
reg := curr_byte & 0b00000111
dst_name: string
imm: i16
if is_word {
dst_name = registers[reg].fullname
imm = (i16)(data[processed+2]) << 8 | (i16)(data[processed+1])
processed += 2
} else {
dst_name = registers[reg].bytename
imm = (i16)(data[processed+1])
processed += 1
}
inst_string := fmt.aprintf("mov %s, %d", dst_name, imm)
fmt.printfln("%s %*[1]s b %08b %08b", inst_string, RIGHT_ALIGN_AMOUNT - len(inst_string), ";; 2", curr_byte, data[processed + 1])
} else if curr_byte & 0b11111110 == 0b11000110 {
is_word := curr_byte & 1 != 0
fmt.printfln("mov [%s], asdf ;; %08b %8b %8b", "", curr_byte, data[processed + 1], data[processed + 2])
} else {
txt := "unknown instruction"
fmt.printfln("%s %*[1]s %8b", txt, RIGHT_ALIGN_AMOUNT - len(txt), ";;", curr_byte)
}
processed += 1
} }
} }