performance improvement

This commit is contained in:
Mwa
2026-03-20 11:08:18 +01:00
parent c72e133cde
commit ff6427b020
2 changed files with 297 additions and 282 deletions

View File

@@ -396,7 +396,7 @@ impl Computer {
new
}
#[inline(always)]
pub fn step(&mut self) {
pub fn step(&mut self, s: usize) {
match self.interupts {
InteruptState::Disabled => {}
InteruptState::Enabled => {
@@ -415,311 +415,326 @@ impl Computer {
}
InteruptState::Serving(..) => {}
}
for _ in 0..s {
//potentially just changed by interupt.
let next_opcode = self.ram[self.pc];
//potentially just changed by interupt.
let next_opcode = self.ram[self.pc];
let instruction = Instruction::try_from(next_opcode);
let instruction = Instruction::try_from(next_opcode);
match instruction {
Ok(instruction) => {
match instruction {
Instruction::Copy(reg, op2) => {
self[reg] = self.resolve(op2);
self.pc += 1;
}
Instruction::Add(reg, reg1, op2) => {
self[reg] = self[reg1] + self.resolve(op2);
self.pc += 1;
}
Instruction::Sub(reg, reg1, op2) => {
self[reg] = self[reg1] - self.resolve(op2);
self.pc += 1;
}
Instruction::Or(reg, reg1, op2) => {
self[reg] = self[reg1] | self.resolve(op2);
self.pc += 1;
}
Instruction::And(reg, reg1, op2) => {
self[reg] = self[reg1] & self.resolve(op2);
self.pc += 1;
}
Instruction::Xor(reg, reg1, op2) => {
self[reg] = self[reg1] ^ self.resolve(op2);
self.pc += 1;
}
Instruction::Lsl(reg, reg1, op2) => {
self[reg] = (self[reg1] as u64).wrapping_shl(self.resolve(op2)) as u32;
self.pc += 1;
}
Instruction::Lsr(reg, reg1, op2) => {
self[reg] = (self[reg1] as u64).wrapping_shr(self.resolve(op2)) as u32;
self.pc += 1;
}
Instruction::Asr(reg, reg1, op2) => {
self[reg] = (self[reg1] as i64).wrapping_shr(self.resolve(op2)) as u32;
self.pc += 1;
}
Instruction::Umull(reg, reg1, op2) => {
self[reg] = self[reg1].wrapping_mul(self.resolve(op2));
self.pc += 1;
}
Instruction::Smull(reg, reg1, op2) => {
self[reg] =
(self[reg1] as i32).wrapping_mul(self.resolve(op2) as i32) as u32;
self.pc += 1;
}
Instruction::Umulh(reg, reg1, op2) => {
self[reg] = self[reg1].widening_mul(self.resolve(op2)).1;
self.pc += 1;
}
Instruction::Smulh(reg, reg1, op2) => {
self[reg] =
(self[reg1] as i32).widening_mul(self.resolve(op2) as i32).1 as u32;
self.pc += 1;
}
Instruction::Div(reg, reg1, op2) => {
self.pc += 1;
let d = self.resolve(op2);
if unlikely(d == 0) {
self.serve_interupt(
InteruptKind::DivByZero,
[reg.0.into(), self[reg1]],
);
return;
match instruction {
Ok(instruction) => {
match instruction {
Instruction::Copy(reg, op2) => {
self[reg] = self.resolve(op2);
self.pc += 1;
}
self[reg] = self[reg1] / d;
}
Instruction::Mod(reg, reg1, op2) => {
self[reg] = self[reg1] % self.resolve(op2);
self.pc += 1
}
Instruction::Store(reg, op2, reg1) => {
self.pc += 1;
let addr = (self[reg].wrapping_add(self.resolve(op2))) as usize;
if !addr.is_multiple_of(4) {
self.serve_interupt(
InteruptKind::IllegalLoadStore,
[1, addr as u32, self[reg1]],
);
return;
Instruction::Add(reg, reg1, op2) => {
self[reg] = self[reg1] + self.resolve(op2);
self.pc += 1;
}
if addr <= 0x00ffffff {
self.ram[addr / 4] = self[reg1];
} else if addr <= 0x00ff_ffff + 480 * 640 * 4 {
let buf_addr = (addr - 0x0100_0000) / 4;
let dat = if cfg!(feature = "rgba") {
self[reg1]
} else {
self[reg1] & 0x00FF_FFFF
};
(&SHARED.screen_buf[buf_addr])
.store(dat, std::sync::atomic::Ordering::Relaxed);
} else if addr == 0x0120_1000 {
(&SHARED.external_enabled_interupts)
.store(self[reg1], std::sync::atomic::Ordering::Relaxed);
} else {
self.serve_interupt(InteruptKind::IllegalOpcode, [next_opcode]);
Instruction::Sub(reg, reg1, op2) => {
self[reg] = self[reg1] - self.resolve(op2);
self.pc += 1;
}
}
Instruction::Load(reg, reg1, op2) => {
self.pc += 1;
let addr = (self[reg1].wrapping_add(self.resolve(op2))) as usize;
if !addr.is_multiple_of(4) {
self.serve_interupt(
InteruptKind::IllegalLoadStore,
[0, addr as u32, reg.0 as u32],
);
return;
Instruction::Or(reg, reg1, op2) => {
self[reg] = self[reg1] | self.resolve(op2);
self.pc += 1;
}
self[reg] = if addr <= 0x00ffffff {
self.ram[addr / 4]
} else if addr <= 0x00ffffff + 480 * 640 * 4 {
let buf_addr = (addr - 0x0100_0000) / 4;
(&SHARED.screen_buf[buf_addr])
.load(std::sync::atomic::Ordering::Relaxed)
} else {
match addr as isize - 0x0120_0000 {
#[cfg(feature = "rich_keyboard")]
-12 => {
SHARED.keyboard[0].load(std::sync::atomic::Ordering::Relaxed)
}
#[cfg(feature = "rich_keyboard")]
-8 => SHARED.keyboard[1].load(std::sync::atomic::Ordering::Relaxed),
#[cfg(feature = "rich_keyboard")]
-4 => SHARED.keyboard[2].load(std::sync::atomic::Ordering::Relaxed),
0 => SHARED.keyboard[3].load(std::sync::atomic::Ordering::Relaxed),
4 => time::Instant::now()
.duration_since(self.creation)
.as_millis() as u32,
8 => SHARED.mouse[0].load(std::sync::atomic::Ordering::Relaxed),
12 => SHARED.mouse[1].load(std::sync::atomic::Ordering::Relaxed),
16 => SHARED.mouse[2].load(std::sync::atomic::Ordering::Relaxed),
//guaranted by the inequality and is multiple of 4
_ => {
self.serve_interupt(InteruptKind::IllegalOpcode, [next_opcode]);
return;
}
Instruction::And(reg, reg1, op2) => {
self[reg] = self[reg1] & self.resolve(op2);
self.pc += 1;
}
Instruction::Xor(reg, reg1, op2) => {
self[reg] = self[reg1] ^ self.resolve(op2);
self.pc += 1;
}
Instruction::Lsl(reg, reg1, op2) => {
self[reg] = (self[reg1] as u64).wrapping_shl(self.resolve(op2)) as u32;
self.pc += 1;
}
Instruction::Lsr(reg, reg1, op2) => {
self[reg] = (self[reg1] as u64).wrapping_shr(self.resolve(op2)) as u32;
self.pc += 1;
}
Instruction::Asr(reg, reg1, op2) => {
self[reg] = (self[reg1] as i64).wrapping_shr(self.resolve(op2)) as u32;
self.pc += 1;
}
Instruction::Umull(reg, reg1, op2) => {
self[reg] = self[reg1].wrapping_mul(self.resolve(op2));
self.pc += 1;
}
Instruction::Smull(reg, reg1, op2) => {
self[reg] =
(self[reg1] as i32).wrapping_mul(self.resolve(op2) as i32) as u32;
self.pc += 1;
}
Instruction::Umulh(reg, reg1, op2) => {
self[reg] = self[reg1].widening_mul(self.resolve(op2)).1;
self.pc += 1;
}
Instruction::Smulh(reg, reg1, op2) => {
self[reg] =
(self[reg1] as i32).widening_mul(self.resolve(op2) as i32).1 as u32;
self.pc += 1;
}
Instruction::Div(reg, reg1, op2) => {
self.pc += 1;
let d = self.resolve(op2);
if unlikely(d == 0) {
self.serve_interupt(
InteruptKind::DivByZero,
[reg.0.into(), self[reg1]],
);
return;
}
};
}
Instruction::Push(op2) => {
self.sp -= 1;
self.ram[self.sp] = self.resolve(op2);
self.pc += 1;
}
Instruction::Pop(reg) => {
self[reg] = self.ram[self.sp];
self.sp += 1;
self.pc += 1;
}
Instruction::Skip(d, cond, reg, op2) => {
self.pc += 1;
if cond.eval(self[reg], self.resolve(op2)) {
self.pc += d as usize
self[reg] = self[reg1] / d;
}
}
Instruction::Jump(mut addr) => {
if addr & (1 << 28) != 0 {
addr += 7 << 29;
} else if addr == 0 {
#[cfg(feature = "debug")]
{
match self.interupts {
InteruptState::Disabled => {
println!("program terminated");
Instruction::Mod(reg, reg1, op2) => {
self[reg] = self[reg1] % self.resolve(op2);
self.pc += 1
}
Instruction::Store(reg, op2, reg1) => {
self.pc += 1;
let addr = (self[reg].wrapping_add(self.resolve(op2))) as usize;
if !addr.is_multiple_of(4) {
self.serve_interupt(
InteruptKind::IllegalLoadStore,
[1, addr as u32, self[reg1]],
);
return;
}
if addr <= 0x00ffffff {
self.ram[addr / 4] = self[reg1];
} else if addr <= 0x00ff_ffff + 480 * 640 * 4 {
let buf_addr = (addr - 0x0100_0000) / 4;
let dat = if cfg!(feature = "rgba") {
self[reg1]
} else {
self[reg1] & 0x00FF_FFFF
};
(&SHARED.screen_buf[buf_addr])
.store(dat, std::sync::atomic::Ordering::Relaxed);
} else if addr == 0x0120_1000 {
(&SHARED.external_enabled_interupts)
.store(self[reg1], std::sync::atomic::Ordering::Relaxed);
} else {
self.serve_interupt(InteruptKind::IllegalOpcode, [next_opcode]);
}
}
Instruction::Load(reg, reg1, op2) => {
self.pc += 1;
let addr = (self[reg1].wrapping_add(self.resolve(op2))) as usize;
if !addr.is_multiple_of(4) {
self.serve_interupt(
InteruptKind::IllegalLoadStore,
[0, addr as u32, reg.0 as u32],
);
return;
}
self[reg] = if addr <= 0x00ffffff {
self.ram[addr / 4]
} else if addr <= 0x00ffffff + 480 * 640 * 4 {
let buf_addr = (addr - 0x0100_0000) / 4;
(&SHARED.screen_buf[buf_addr])
.load(std::sync::atomic::Ordering::Relaxed)
} else {
match addr as isize - 0x0120_0000 {
#[cfg(feature = "rich_keyboard")]
-12 => SHARED.keyboard[0]
.load(std::sync::atomic::Ordering::Relaxed),
#[cfg(feature = "rich_keyboard")]
-8 => SHARED.keyboard[1]
.load(std::sync::atomic::Ordering::Relaxed),
#[cfg(feature = "rich_keyboard")]
-4 => SHARED.keyboard[2]
.load(std::sync::atomic::Ordering::Relaxed),
0 => SHARED.keyboard[3]
.load(std::sync::atomic::Ordering::Relaxed),
4 => time::Instant::now()
.duration_since(self.creation)
.as_millis()
as u32,
8 => SHARED.mouse[0].load(std::sync::atomic::Ordering::Relaxed),
12 => {
SHARED.mouse[1].load(std::sync::atomic::Ordering::Relaxed)
}
16 => {
SHARED.mouse[2].load(std::sync::atomic::Ordering::Relaxed)
}
//guaranted by the inequality and is multiple of 4
_ => {
self.serve_interupt(
InteruptKind::IllegalOpcode,
[next_opcode],
);
return;
}
}
};
}
Instruction::Push(op2) => {
self.sp -= 1;
self.ram[self.sp] = self.resolve(op2);
self.pc += 1;
}
Instruction::Pop(reg) => {
self[reg] = self.ram[self.sp];
self.sp += 1;
self.pc += 1;
}
Instruction::Skip(d, cond, reg, op2) => {
self.pc += 1;
if cond.eval(self[reg], self.resolve(op2)) {
self.pc += d as usize
}
}
Instruction::Jump(mut addr) => {
if addr & (1 << 28) != 0 {
addr += 7 << 29;
} else if addr == 0 {
#[cfg(feature = "debug")]
{
match self.interupts {
InteruptState::Disabled => {
println!("program terminated");
self.error = true;
return;
}
_ => (),
}
if SHARED
.external_enabled_interupts
.load(std::sync::atomic::Ordering::Relaxed)
== 0
{
println!("Program terminated");
self.error = true;
return;
}
_ => (),
println!("awaiting interupt...");
}
if SHARED
.external_enabled_interupts
.load(std::sync::atomic::Ordering::Relaxed)
== 0
SHARED.external_interupts.wait(0);
}
self.pc = (addr + self.pc as u32) as usize;
}
Instruction::Call(mut addr) => {
self.sp -= 1;
self.ram[self.sp] = ((self.pc << 2) + 4) as u32;
if addr & (1 << 28) != 0 {
addr += 7 << 29;
} else if unlikely(addr == 0) {
#[cfg(feature = "debug")]
{
println!("Program terminated");
println!("program terminated");
self.error = true;
return;
}
println!("awaiting interupt...");
#[cfg(not(feature = "debug"))]
exit(0);
}
SHARED.external_interupts.wait(0);
self.pc = (addr + self.pc as u32) as usize;
}
self.pc = (addr + self.pc as u32) as usize;
}
Instruction::Call(mut addr) => {
self.sp -= 1;
self.ram[self.sp] = ((self.pc << 2) + 4) as u32;
if addr & (1 << 28) != 0 {
addr += 7 << 29;
} else if unlikely(addr == 0) {
#[cfg(feature = "debug")]
{
println!("program terminated");
self.error = true;
return;
}
#[cfg(not(feature = "debug"))]
exit(0);
Instruction::Ret() => {
self.pc = (self.ram[self.sp] >> 2) as usize;
self.sp += 1;
}
self.pc = (addr + self.pc as u32) as usize;
}
Instruction::Ret() => {
self.pc = (self.ram[self.sp] >> 2) as usize;
self.sp += 1;
}
Instruction::Reti() => {
let mut ret_index = None;
let mut ret_value = 0;
Instruction::Reti() => {
let mut ret_index = None;
let mut ret_value = 0;
match self.interupts {
InteruptState::Serving(kind, prev) => {
match prev.highest_one() {
None => self.interupts = InteruptState::Enabled,
Some(i) => {
self.interupts =
InteruptState::Serving(i.into(), prev ^ (1 << i))
}
}
match kind {
InteruptKind::MMIO => {
(&SHARED.external_interupts)
.store(0, std::sync::atomic::Ordering::Release);
SHARED.external_interupts.signal();
//no need to check prev because MMIO is the lowest priority
self.interupts = InteruptState::Enabled
}
InteruptKind::Swi => {}
InteruptKind::DivByZero | InteruptKind::UnsupportedOpcode => {
ret_index = Some(self.regs[0]);
ret_value = self.regs[1];
}
InteruptKind::IllegalLoadStore => {
if self.regs[0] == 0 {
ret_value = self.regs[1];
ret_index = Some(self.regs[2]);
match self.interupts {
InteruptState::Serving(kind, prev) => {
match prev.highest_one() {
None => self.interupts = InteruptState::Enabled,
Some(i) => {
self.interupts =
InteruptState::Serving(i.into(), prev ^ (1 << i))
}
}
InteruptKind::IllegalOpcode => {}
match kind {
InteruptKind::MMIO => {
(&SHARED.external_interupts)
.store(0, std::sync::atomic::Ordering::Release);
SHARED.external_interupts.signal();
//no need to check prev because MMIO is the lowest priority
self.interupts = InteruptState::Enabled
}
InteruptKind::Swi => {}
InteruptKind::DivByZero
| InteruptKind::UnsupportedOpcode => {
ret_index = Some(self.regs[0]);
ret_value = self.regs[1];
}
InteruptKind::IllegalLoadStore => {
if self.regs[0] == 0 {
ret_value = self.regs[1];
ret_index = Some(self.regs[2]);
}
}
InteruptKind::IllegalOpcode => {}
}
}
_ => { /* This is a troubling case but ... well it's ok */ }
}
let ret = self.ram[self.sp];
self.pc = (ret & 0x0FFF_FFFF) as usize;
self.sp += 1 as usize;
for i in (0..(ret >> 28) as usize).rev() {
self.regs[i] = self.ram[self.sp];
self.sp += 1
}
if let Some(idx) = ret_index {
self.regs[idx as usize] = ret_value;
}
_ => { /* This is a troubling case but ... well it's ok */ }
}
let ret = self.ram[self.sp];
self.pc = (ret & 0x0FFF_FFFF) as usize;
self.sp += 1 as usize;
for i in (0..(ret >> 28) as usize).rev() {
self.regs[i] = self.ram[self.sp];
self.sp += 1
Instruction::Eint() => {
match self.interupts {
InteruptState::Disabled => self.interupts = InteruptState::Enabled,
_ => {}
}
self.pc += 1;
}
if let Some(idx) = ret_index {
self.regs[idx as usize] = ret_value;
}
}
Instruction::Eint() => {
match self.interupts {
InteruptState::Disabled => self.interupts = InteruptState::Enabled,
_ => {}
}
self.pc += 1;
}
Instruction::Dint() => {
self.interupts = InteruptState::Disabled;
self.pc += 1;
Instruction::Dint() => {
self.interupts = InteruptState::Disabled;
self.pc += 1;
(&SHARED.external_enabled_interupts)
.store(0, std::sync::atomic::Ordering::Relaxed);
(&SHARED.external_interupts).store(0, std::sync::atomic::Ordering::Relaxed);
SHARED.external_interupts.signal();
}
Instruction::Swi() => {
self.pc += 1;
self.serve_interupt(InteruptKind::Swi, []);
}
Instruction::GetStack(reg) => {
self[reg] = (self.sp << 2) as u32;
self.pc += 1;
}
Instruction::SetStack(op2) => {
let v = self.resolve(op2);
if likely(v.is_multiple_of(4)) {
self.sp = (v >> 2) as usize;
} else {
self.sp = usize::MAX //Yes, that means that clever program using sp to store information wont work on my emulator. Deal with it
(&SHARED.external_enabled_interupts)
.store(0, std::sync::atomic::Ordering::Relaxed);
(&SHARED.external_interupts)
.store(0, std::sync::atomic::Ordering::Relaxed);
SHARED.external_interupts.signal();
}
self.pc += 1;
Instruction::Swi() => {
self.pc += 1;
self.serve_interupt(InteruptKind::Swi, []);
}
Instruction::GetStack(reg) => {
self[reg] = (self.sp << 2) as u32;
self.pc += 1;
}
Instruction::SetStack(op2) => {
let v = self.resolve(op2);
if likely(v.is_multiple_of(4)) {
self.sp = (v >> 2) as usize;
} else {
self.sp = usize::MAX //Yes, that means that clever program using sp to store information wont work on my emulator. Deal with it
}
self.pc += 1;
}
};
}
Err((kind, rx, ry, op2, opcode)) => {
self.pc += 1;
match kind {
InteruptKind::UnsupportedOpcode => self.serve_interupt(
kind,
[rx.0.into(), self[ry], self.resolve(op2), opcode],
),
InteruptKind::IllegalOpcode => self.serve_interupt(kind, [next_opcode]),
_ => unsafe { unreachable_unchecked() },
}
};
}
Err((kind, rx, ry, op2, opcode)) => {
self.pc += 1;
match kind {
InteruptKind::UnsupportedOpcode => self
.serve_interupt(kind, [rx.0.into(), self[ry], self.resolve(op2), opcode]),
InteruptKind::IllegalOpcode => self.serve_interupt(kind, [next_opcode]),
_ => unsafe { unreachable_unchecked() },
}
}
}

View File

@@ -303,7 +303,7 @@ fn main() -> Result<(), Error> {
let mut simulation = Computer::new(program);
#[cfg(not(feature = "debug"))]
loop {
simulation.step();
simulation.step(64);
}
#[cfg(feature = "debug")]
{
@@ -371,13 +371,13 @@ fn main() -> Result<(), Error> {
println!("cannot step, cpu killed");
break;
}
simulation.step();
simulation.step(1);
}
false
}
"r" | "run" => {
while !simulation.error {
simulation.step();
simulation.step(64);
}
false
}
@@ -408,7 +408,7 @@ fn main() -> Result<(), Error> {
while !simulation.error
&& simulation.ram[simulation.pc] != 0x8800_0000
{
simulation.step();
simulation.step(1);
}
false
}
@@ -419,7 +419,7 @@ fn main() -> Result<(), Error> {
while !simulation.error
&& simulation.pc != (v as usize / 4)
{
simulation.step();
simulation.step(1);
}
false
}