Last bunch of instructions to get listing 42 fully read

This commit is contained in:
Joseph Ferano 2025-03-12 23:46:47 +07:00
parent babb07cb43
commit fc02debf65
2 changed files with 113 additions and 54 deletions

View File

@ -37,23 +37,14 @@ segment_registers := [4]Register {
variable_port := registers[2]
total_bytes_processed := 0
RegInfo :: struct {
in_first_byte: bool,
shift_offset: u8,
}
LastBit :: struct{}
FourthBit :: struct{}
Force :: struct{}
WordSize :: union {
None,
LastBit,
FourthBit,
Force,
}
WordSize2 :: enum {
WordSize :: enum {
None,
LastBit,
FourthBit,
@ -95,27 +86,38 @@ ModMode :: union {
RegisterId :: distinct u8
Immediate8 :: distinct i8
Immediate16 :: distinct i16
ImmediateU8 :: distinct u8
MemoryAddr :: struct {
addr_id: u8,
displacement: Displacement,
segment: Maybe(Register),
}
DirectAddress :: distinct i16
SegmentRegister :: distinct i8
Jump :: distinct i8
VariablePort :: struct {}
ShiftRotate :: distinct bool
Repeat :: string
Intersegment :: struct {
ip: i16,
cs: i16,
}
DirectWithinSegment :: distinct u16
Operand :: union {
None,
RegisterId,
Immediate8,
ImmediateU8,
Immediate16,
MemoryAddr,
DirectAddress,
SegmentRegister,
Jump,
VariablePort,
ShiftRotate,
Repeat,
DirectWithinSegment,
Intersegment,
}
OperandInfo :: enum {
@ -124,12 +126,15 @@ OperandInfo :: enum {
SegmentRegister,
RegisterMemory,
Immediate,
ImmediateUnsigned,
Accumulator,
DirectAddress,
Jump,
VariablePort,
ShiftRotate,
Repeat,
DirectWithinSegment,
Intersegment,
}
RegisterEncodingBits :: enum {
@ -147,7 +152,7 @@ InstructionInfo :: struct {
desc: string,
src: OperandInfo,
dst: OperandInfo,
word_size: WordSize2,
word_size: WordSize,
reg_info: RegisterEncodingBits,
has_flip: bool,
has_sign_extension: bool,
@ -158,6 +163,10 @@ InstructionInfo :: struct {
RIGHT_ALIGN_AMOUNT := 35
get_i16 :: proc(data: []u8) -> i16 {
return (i16)(data[1]) << 8 | (i16)(data[0])
}
calculate_effective_address :: proc(r_m: u8) -> string {
val: string
switch r_m {
@ -181,7 +190,7 @@ calculate_effective_address :: proc(r_m: u8) -> string {
return val
}
get_memory_string :: proc(memoryAddr: MemoryAddr) -> string {
get_memory_string :: proc(memoryAddr: MemoryAddr, has_segment: Maybe(Register)) -> string {
disp: string
switch value in memoryAddr.displacement {
case None:
@ -196,17 +205,13 @@ get_memory_string :: proc(memoryAddr: MemoryAddr) -> string {
}
}
seg_string: string
if segreg, ok := memoryAddr.segment.?; ok {
if segreg, ok := has_segment.?; ok {
seg_string = fmt.aprintf("%s:", segreg.fullname)
}
text := fmt.aprintf("%s[%s%s]", seg_string, calculate_effective_address(memoryAddr.addr_id), disp)
return text
}
get_i16 :: proc(data: []u8) -> i16 {
return (i16)(data[1]) << 8 | (i16)(data[0])
}
parse_displacement :: proc(data: []u8) -> (displacement: Displacement, disp_amount: int) {
mod := (data[0] & 0b11000000) >> 6
disp: Displacement = None{}
@ -260,16 +265,17 @@ try_find_instruction :: proc(b: u8) -> (InstructionInfo, bool) {
return InstructionInfo{}, false
}
get_opname :: proc(opname: OpName, data: []u8) -> string {
get_opname :: proc(opname: OpName, data: []u8) -> (string, bool) {
name: string
interseg: bool
if opname == .TBD2 {
switch data[1] & 0b00111000 >> 3 {
case 0b000: name = "inc"
case 0b001: name = "dec"
case 0b010: name = "call"
case 0b011: name = "call"
case 0b011: name = "call"; interseg = true
case 0b100: name = "jmp"
case 0b101: name = "jmp"
case 0b101: name = "jmp"; interseg = true
case 0b110: name = "push"
}
} else if opname == .TBD5 {
@ -311,7 +317,7 @@ get_opname :: proc(opname: OpName, data: []u8) -> string {
case 0b111: name = "cmp"
}
}
return name
return name, interseg
}
parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, processed: ^int, word: bool, has_segreg: Maybe(Register)) -> Operand {
@ -361,13 +367,13 @@ parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, pr
op = (DirectAddress)(get_i16(data[2:]))
processed^ += 2
} else {
op = MemoryAddr{ addr_id = rm , displacement = None{} , segment = has_segreg }
op = MemoryAddr{ addr_id = rm , displacement = None{} }
}
} else if mod == 1 {
op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) , segment = has_segreg }
op = MemoryAddr{ addr_id = rm , displacement = (i8)(data[2]) }
processed^ += 1
} else if mod == 2 {
op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) , segment = has_segreg }
op = MemoryAddr{ addr_id = rm , displacement = get_i16(data[2:]) }
processed^ += 2
} else if mod == 3 {
op = (RegisterId)(registers[rm].code)
@ -381,6 +387,9 @@ parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, pr
}
operand = (Operand)(word_signed ? (Immediate16)(get_i16(data[data_idx:])) : (Immediate8)(data[data_idx]))
processed^ += word_signed ? 2 : 1
case .ImmediateUnsigned:
operand = (ImmediateU8)(data[processed^])
processed^ += 1
case .Accumulator:
operand = (RegisterId)(registers[0].code)
case .DirectAddress:
@ -394,37 +403,53 @@ parse_operand :: proc(inst: InstructionInfo, opinfo: OperandInfo, data: []u8, pr
operand = VariablePort{}
case .ShiftRotate:
v_flag := data[0] & 0b10 != 0
operand = v_flag ? (RegisterId)(registers[1].code) : (Immediate8)(1)
operand = (ShiftRotate)(v_flag)
case .Repeat:
operand = get_repeat_op(data[1])
processed^ += 1
case .DirectWithinSegment:
value := (int)(get_i16(data[1:])) + total_bytes_processed + 3
operand = (DirectWithinSegment)(value)
processed^ += 2
case .Intersegment:
operand = Intersegment {
ip = get_i16(data[1:]),
cs = get_i16(data[3:]),
}
processed^ += 4
}
return operand
}
get_operand_string :: proc(operand: Operand, is_word: bool) -> string {
get_operand_string :: proc(operand: Operand, is_word: bool, has_segment: Maybe(Register)) -> string {
string_val: string
switch val in operand {
case None:
string_val = ""
case RegisterId:
string_val = is_word ? registers[val].fullname : registers[val].bytename
case Immediate8:
string_val = fmt.aprintf("%d", val)
case Immediate16:
case Immediate8, ImmediateU8, Immediate16, DirectWithinSegment:
string_val = fmt.aprintf("%d", val)
case MemoryAddr:
string_val = get_memory_string(val)
string_val = get_memory_string(val, has_segment)
case DirectAddress:
string_val = fmt.aprintf("[%d]", val)
seg_string: string
if segreg, ok := has_segment.?; ok {
seg_string = fmt.aprintf("%s:", segreg.fullname)
}
string_val = fmt.aprintf("%s[%d]", seg_string, val)
case SegmentRegister:
string_val = segment_registers[val].fullname
case Jump:
string_val = fmt.aprintf("$%s%d", val >= 0 ? "+" : "", val)
case VariablePort:
string_val = variable_port.fullname
case ShiftRotate:
string_val = val ? registers[1].bytename : "1"
case Repeat:
string_val = (string)(val)
case Intersegment:
string_val = fmt.aprintf("%d:%d", val.cs, val.ip)
}
return string_val
}
@ -447,7 +472,8 @@ main :: proc() {
if false {
os.exit(0)
}
// asdf :u16 = 0b00000011_11101000
// asdf :u16 = 0b00000110_11011101
// asdf2 :i16 = (i16)(asdf)
// fmt.printfln("%d", asdf2)
print_at_end := false
@ -462,7 +488,7 @@ main :: proc() {
repeating_op_count := 0
instruction_builder := strings.builder_make()
instruction_list := make([dynamic]string, 512)
fmt.println("bits 16")
fmt.println("bits 16\n")
for idx < bytes_read {
processed := 1
curr_byte := data[idx]
@ -491,6 +517,8 @@ main :: proc() {
has_segment = segment_registers[reg]
idx += 1
continue
} else if inst.opname == .AAM {
processed += 1
}
src_opr: Operand
@ -498,6 +526,7 @@ main :: proc() {
word: bool
flip: bool
indirect_intersegment: bool
op: Operand
if inst.has_flip {
@ -513,11 +542,15 @@ main :: proc() {
opname: string
// TODO: Figure out a way to do this in the string builder
if inst.check_second_encoding {
opname = strings.to_lower(fmt.aprintf("%s", get_opname(inst.opname, data[idx:])))
op,interseg := get_opname(inst.opname, data[idx:])
indirect_intersegment = interseg
opname = strings.to_lower(fmt.aprintf("%s", op))
// NOTE: This is a special case because it matches the bit pattern of .TBD5,
// but the instruction itself is different
if opname == "not" {
inst = not_inst
if opname == "test" && (curr_byte & 0xFF) == 0b11110110 {
inst = test_inst
// } else if opname == "neg" {
// inst = neg_inst
}
} else {
opname = strings.to_lower(fmt.aprintf("%s", inst.opname))
@ -541,16 +574,20 @@ main :: proc() {
src_opr, dst_opr = dst_opr, src_opr
}
dst_str := get_operand_string(dst_opr, word)
src_str := get_operand_string(src_opr, word)
dst_str := get_operand_string(dst_opr, word, has_segment)
src_str := get_operand_string(src_opr, word, has_segment)
full_inst: string
if dst_str == "" {
_,ok_1 := src_opr.(MemoryAddr);
_,ok_2 := src_opr.(DirectAddress);
if (ok_1 || ok_2) && inst.word_size != .Always16 {
if (ok_1 || ok_2) {
size_string = word ? "word " : "byte "
}
full_inst = fmt.aprintf("%s %s%s", opname, size_string, src_str)
interseg_string: string
if indirect_intersegment {
interseg_string = " far"
}
full_inst = fmt.aprintf("%s%s %s%s", opname, interseg_string, size_string, src_str)
} else {
// NOTE: I don't know why this is the case, but only the move has the word/byte
// keyword next to the immediate, but other instructions have it on the memory address
@ -568,13 +605,19 @@ main :: proc() {
lock_string = "lock "
}
fmt.sbprintf(&instruction_builder, "%s%s %*[2]s", lock_string, full_inst, RIGHT_ALIGN_AMOUNT - len(full_inst), ";;")
if has_lock {
fmt.sbprintf(&instruction_builder, " lock")
}
if _,ok := has_segment.?; ok {
fmt.sbprintf(&instruction_builder, " segment")
}
for i in 0..<processed {
fmt.sbprintf(&instruction_builder, " %08b", data[idx + i])
}
op2 := strings.to_string(instruction_builder)
if op2[0:3] != string(last_opname[:]) {
if repeating_op_count > 1 {
if repeating_op_count > 0 {
fmt.println()
}
repeating_op_count = 0
@ -588,6 +631,7 @@ main :: proc() {
strings.builder_reset(&instruction_builder)
has_lock = false
has_segment = nil
total_bytes_processed = idx
}
if print_at_end {
for i in 0..<line_count {

View File

@ -43,6 +43,7 @@ OpName :: enum {
TEST,
REP,
RET,
RETF,
INT,
INT3,
INTO,
@ -59,6 +60,7 @@ OpName :: enum {
ESC,
LOCK,
SEGMENT,
CALL,
JMP,
JNZ,
JNGE,
@ -94,12 +96,11 @@ OpName :: enum {
JCXZ,
}
not_inst := InstructionInfo {
test_inst := InstructionInfo {
opname = .NOT, desc = "", mask = 0b11111110, encoding = 0b11110110,
src = .RegisterMemory, word_size = .LastBit
dst = .RegisterMemory, src = .Immediate, word_size = .LastBit
}
instructions := [?]InstructionInfo {
{ opname = .TBD1, desc = "Immediate to accumulator",
mask = 0b11000110, encoding = 0b00000100, check_second_encoding = true,
@ -119,7 +120,8 @@ instructions := [?]InstructionInfo {
word_size = .LastBit, has_sign_extension = true },
{ opname = .TBD5, desc = "", check_second_encoding = true,
mask = 0b11111110, encoding = 0b11110110,
dst = .Immediate, src = .RegisterMemory, word_size = .LastBit, },
// dst = .Immediate, src = .RegisterMemory, word_size = .LastBit, },
src = .RegisterMemory, word_size = .LastBit, },
{ opname = .TBD6, desc = "", check_second_encoding = true,
mask = 0b11111100, encoding = 0b11010000,
dst = .RegisterMemory, src = .ShiftRotate, word_size = .LastBit, },
@ -143,6 +145,10 @@ instructions := [?]InstructionInfo {
mask = 0b11111110, encoding = 0b10100010,
dst = .DirectAddress, src = .Accumulator,
word_size = .LastBit, },
{ opname = .MOV, desc = "Accumulator to memory",
mask = 0b11111111, encoding = 0b10001100,
dst = .RegisterMemory, src = .SegmentRegister,
reg_info = .SecondByteMiddle3 },
{ opname = .PUSH, desc = "", mask = 0b11111000, encoding = 0b01010000,
src = .Register, reg_info = .FirstByteLast3,
word_size = .Always16, },
@ -150,7 +156,7 @@ instructions := [?]InstructionInfo {
src = .SegmentRegister, reg_info = .FirstByteMiddle3,
word_size = .Always16, },
{ opname = .POP, desc = "", mask = 0b11111111, encoding = 0b10001111,
src = .RegisterMemory,},
src = .RegisterMemory, word_size = .Always16 },
{ opname = .POP, desc = "", mask = 0b11111000, encoding = 0b01011000,
src = .Register, reg_info = .FirstByteLast3,
word_size = .Always16, },
@ -159,12 +165,12 @@ instructions := [?]InstructionInfo {
word_size = .Always16, },
{ opname = .XCHG, desc = "", mask = 0b11111110, encoding = 0b10000110,
dst = .RegisterMemory, src = .Register,
reg_info = .SecondByteMiddle3, has_flip = true },
reg_info = .SecondByteMiddle3, word_size = .LastBit, has_flip = true },
{ opname = .XCHG, desc = "", mask = 0b11111000, encoding = 0b10010000,
dst = .Accumulator, src = .Register,
reg_info = .FirstByteLast3, has_flip = true, word_size = .Always16 },
{ opname = .IN, desc = "", mask = 0b11111110, encoding = 0b11100100,
dst = .Accumulator, src = .Immediate,
dst = .Accumulator, src = .ImmediateUnsigned,
// TODO: Everything works just fine, but the problem here is that if you want it to
// show up as an unsigned int, then we have to change the types because the number
// 200, for instance, will show up as a negative, we would have to create an unsigned
@ -174,11 +180,13 @@ instructions := [?]InstructionInfo {
dst = .Accumulator, src = .VariablePort,
word_size = .LastBit, },
{ opname = .OUT, desc = "", mask = 0b11111110, encoding = 0b11100110,
dst = .Immediate, src = .Accumulator,
word_size = .Unsigned8, },
dst = .ImmediateUnsigned, src = .Accumulator,
word_size = .LastBit, },
{ opname = .OUT, desc = "", mask = 0b11111110, encoding = 0b11101110,
dst = .VariablePort, src = .Accumulator,
word_size = .LastBit, },
{ opname = .TEST, desc = "", mask = 0b11111110, encoding = 0b10101000,
dst = .Accumulator, src = .Immediate, word_size = .LastBit },
{ opname = .XLAT, desc = "", mask = 0b11111111, encoding = 0b11010111,},
{ opname = .LEA, desc = "", mask = 0b11111111, encoding = 0b10001101,
dst = .Register, src = .RegisterMemory,
@ -212,6 +220,9 @@ instructions := [?]InstructionInfo {
{ opname = .RET, desc = "", mask = 0b11111111, encoding = 0b11000011,},
{ opname = .RET, src = .Immediate, word_size = .Always16,
desc = "", mask = 0b11111111, encoding = 0b11000010,},
{ opname = .RETF, desc = "", mask = 0b11111111, encoding = 0b11001011,},
{ opname = .RETF, desc = "", mask = 0b11111111, encoding = 0b11001010,
src = .Immediate, word_size = .Always16 },
{ opname = .INT, src = .Immediate, desc = "", mask = 0b11111111, encoding = 0b11001101,},
{ opname = .INT3, desc = "", mask = 0b11111111, encoding = 0b11001100,},
{ opname = .INTO, desc = "", mask = 0b11111111, encoding = 0b11001110,},
@ -228,6 +239,10 @@ instructions := [?]InstructionInfo {
// { opname = .ESC, desc = "", mask = 0b11111111, encoding = 0b11111000, dst = },
{ opname = .LOCK, desc = "", mask = 0b11111111, encoding = 0b11110000,},
{ opname = .SEGMENT, desc = "", mask = 0b11100111, encoding = 0b00100110,},
{ opname = .CALL, desc = "", mask = 0b11111111, encoding = 0b10011010, src = .Intersegment },
{ opname = .JMP, desc = "", mask = 0b11111111, encoding = 0b11101010, src = .Intersegment },
{ opname = .JMP, desc = "", mask = 0b11111111, encoding = 0b11101001, src = .DirectWithinSegment },
{ opname = .CALL, desc = "", mask = 0b11111111, encoding = 0b11101000, src = .DirectWithinSegment },
{ opname = .JE, mask = 0b11111111, encoding = 0b01110100, src = .Jump, desc = "Jump on not zero", },
{ opname = .JZ, mask = 0b11111111, encoding = 0b01110100, src = .Jump, desc = "Jump on not zero", },