From ff6427b02083216093f67dcb9c22f9ecb5088f45 Mon Sep 17 00:00:00 2001 From: Mwa Date: Fri, 20 Mar 2026 11:08:18 +0100 Subject: [PATCH] performance improvement --- simu/src/cpu.rs | 569 ++++++++++++++++++++++++----------------------- simu/src/main.rs | 10 +- 2 files changed, 297 insertions(+), 282 deletions(-) diff --git a/simu/src/cpu.rs b/simu/src/cpu.rs index b8ecc3a..9fd92ed 100644 --- a/simu/src/cpu.rs +++ b/simu/src/cpu.rs @@ -396,7 +396,7 @@ impl Computer { new } #[inline(always)] - pub fn step(&mut self) { + pub fn step(&mut self, s: usize) { match self.interupts { InteruptState::Disabled => {} InteruptState::Enabled => { @@ -415,311 +415,326 @@ impl Computer { } InteruptState::Serving(..) => {} } + for _ in 0..s { + //potentially just changed by interupt. + let next_opcode = self.ram[self.pc]; - //potentially just changed by interupt. - let next_opcode = self.ram[self.pc]; + let instruction = Instruction::try_from(next_opcode); - let instruction = Instruction::try_from(next_opcode); - - match instruction { - Ok(instruction) => { - match instruction { - Instruction::Copy(reg, op2) => { - self[reg] = self.resolve(op2); - self.pc += 1; - } - Instruction::Add(reg, reg1, op2) => { - self[reg] = self[reg1] + self.resolve(op2); - self.pc += 1; - } - Instruction::Sub(reg, reg1, op2) => { - self[reg] = self[reg1] - self.resolve(op2); - self.pc += 1; - } - Instruction::Or(reg, reg1, op2) => { - self[reg] = self[reg1] | self.resolve(op2); - self.pc += 1; - } - Instruction::And(reg, reg1, op2) => { - self[reg] = self[reg1] & self.resolve(op2); - self.pc += 1; - } - Instruction::Xor(reg, reg1, op2) => { - self[reg] = self[reg1] ^ self.resolve(op2); - self.pc += 1; - } - Instruction::Lsl(reg, reg1, op2) => { - self[reg] = (self[reg1] as u64).wrapping_shl(self.resolve(op2)) as u32; - self.pc += 1; - } - Instruction::Lsr(reg, reg1, op2) => { - self[reg] = (self[reg1] as u64).wrapping_shr(self.resolve(op2)) as u32; - self.pc += 1; - } - Instruction::Asr(reg, reg1, op2) => { - self[reg] = (self[reg1] as i64).wrapping_shr(self.resolve(op2)) as u32; - self.pc += 1; - } - Instruction::Umull(reg, reg1, op2) => { - self[reg] = self[reg1].wrapping_mul(self.resolve(op2)); - self.pc += 1; - } - Instruction::Smull(reg, reg1, op2) => { - self[reg] = - (self[reg1] as i32).wrapping_mul(self.resolve(op2) as i32) as u32; - self.pc += 1; - } - Instruction::Umulh(reg, reg1, op2) => { - self[reg] = self[reg1].widening_mul(self.resolve(op2)).1; - self.pc += 1; - } - Instruction::Smulh(reg, reg1, op2) => { - self[reg] = - (self[reg1] as i32).widening_mul(self.resolve(op2) as i32).1 as u32; - self.pc += 1; - } - Instruction::Div(reg, reg1, op2) => { - self.pc += 1; - let d = self.resolve(op2); - if unlikely(d == 0) { - self.serve_interupt( - InteruptKind::DivByZero, - [reg.0.into(), self[reg1]], - ); - return; + match instruction { + Ok(instruction) => { + match instruction { + Instruction::Copy(reg, op2) => { + self[reg] = self.resolve(op2); + self.pc += 1; } - self[reg] = self[reg1] / d; - } - Instruction::Mod(reg, reg1, op2) => { - self[reg] = self[reg1] % self.resolve(op2); - self.pc += 1 - } - Instruction::Store(reg, op2, reg1) => { - self.pc += 1; - let addr = (self[reg].wrapping_add(self.resolve(op2))) as usize; - if !addr.is_multiple_of(4) { - self.serve_interupt( - InteruptKind::IllegalLoadStore, - [1, addr as u32, self[reg1]], - ); - return; + Instruction::Add(reg, reg1, op2) => { + self[reg] = self[reg1] + self.resolve(op2); + self.pc += 1; } - if addr <= 0x00ffffff { - self.ram[addr / 4] = self[reg1]; - } else if addr <= 0x00ff_ffff + 480 * 640 * 4 { - let buf_addr = (addr - 0x0100_0000) / 4; - let dat = if cfg!(feature = "rgba") { - self[reg1] - } else { - self[reg1] & 0x00FF_FFFF - }; - (&SHARED.screen_buf[buf_addr]) - .store(dat, std::sync::atomic::Ordering::Relaxed); - } else if addr == 0x0120_1000 { - (&SHARED.external_enabled_interupts) - .store(self[reg1], std::sync::atomic::Ordering::Relaxed); - } else { - self.serve_interupt(InteruptKind::IllegalOpcode, [next_opcode]); + Instruction::Sub(reg, reg1, op2) => { + self[reg] = self[reg1] - self.resolve(op2); + self.pc += 1; } - } - Instruction::Load(reg, reg1, op2) => { - self.pc += 1; - let addr = (self[reg1].wrapping_add(self.resolve(op2))) as usize; - if !addr.is_multiple_of(4) { - self.serve_interupt( - InteruptKind::IllegalLoadStore, - [0, addr as u32, reg.0 as u32], - ); - return; + Instruction::Or(reg, reg1, op2) => { + self[reg] = self[reg1] | self.resolve(op2); + self.pc += 1; } - self[reg] = if addr <= 0x00ffffff { - self.ram[addr / 4] - } else if addr <= 0x00ffffff + 480 * 640 * 4 { - let buf_addr = (addr - 0x0100_0000) / 4; - (&SHARED.screen_buf[buf_addr]) - .load(std::sync::atomic::Ordering::Relaxed) - } else { - match addr as isize - 0x0120_0000 { - #[cfg(feature = "rich_keyboard")] - -12 => { - SHARED.keyboard[0].load(std::sync::atomic::Ordering::Relaxed) - } - #[cfg(feature = "rich_keyboard")] - -8 => SHARED.keyboard[1].load(std::sync::atomic::Ordering::Relaxed), - #[cfg(feature = "rich_keyboard")] - -4 => SHARED.keyboard[2].load(std::sync::atomic::Ordering::Relaxed), - 0 => SHARED.keyboard[3].load(std::sync::atomic::Ordering::Relaxed), - 4 => time::Instant::now() - .duration_since(self.creation) - .as_millis() as u32, - 8 => SHARED.mouse[0].load(std::sync::atomic::Ordering::Relaxed), - 12 => SHARED.mouse[1].load(std::sync::atomic::Ordering::Relaxed), - 16 => SHARED.mouse[2].load(std::sync::atomic::Ordering::Relaxed), - //guaranted by the inequality and is multiple of 4 - _ => { - self.serve_interupt(InteruptKind::IllegalOpcode, [next_opcode]); - return; - } + Instruction::And(reg, reg1, op2) => { + self[reg] = self[reg1] & self.resolve(op2); + self.pc += 1; + } + Instruction::Xor(reg, reg1, op2) => { + self[reg] = self[reg1] ^ self.resolve(op2); + self.pc += 1; + } + Instruction::Lsl(reg, reg1, op2) => { + self[reg] = (self[reg1] as u64).wrapping_shl(self.resolve(op2)) as u32; + self.pc += 1; + } + Instruction::Lsr(reg, reg1, op2) => { + self[reg] = (self[reg1] as u64).wrapping_shr(self.resolve(op2)) as u32; + self.pc += 1; + } + Instruction::Asr(reg, reg1, op2) => { + self[reg] = (self[reg1] as i64).wrapping_shr(self.resolve(op2)) as u32; + self.pc += 1; + } + Instruction::Umull(reg, reg1, op2) => { + self[reg] = self[reg1].wrapping_mul(self.resolve(op2)); + self.pc += 1; + } + Instruction::Smull(reg, reg1, op2) => { + self[reg] = + (self[reg1] as i32).wrapping_mul(self.resolve(op2) as i32) as u32; + self.pc += 1; + } + Instruction::Umulh(reg, reg1, op2) => { + self[reg] = self[reg1].widening_mul(self.resolve(op2)).1; + self.pc += 1; + } + Instruction::Smulh(reg, reg1, op2) => { + self[reg] = + (self[reg1] as i32).widening_mul(self.resolve(op2) as i32).1 as u32; + self.pc += 1; + } + Instruction::Div(reg, reg1, op2) => { + self.pc += 1; + let d = self.resolve(op2); + if unlikely(d == 0) { + self.serve_interupt( + InteruptKind::DivByZero, + [reg.0.into(), self[reg1]], + ); + return; } - }; - } - Instruction::Push(op2) => { - self.sp -= 1; - self.ram[self.sp] = self.resolve(op2); - self.pc += 1; - } - Instruction::Pop(reg) => { - self[reg] = self.ram[self.sp]; - self.sp += 1; - self.pc += 1; - } - Instruction::Skip(d, cond, reg, op2) => { - self.pc += 1; - if cond.eval(self[reg], self.resolve(op2)) { - self.pc += d as usize + self[reg] = self[reg1] / d; } - } - Instruction::Jump(mut addr) => { - if addr & (1 << 28) != 0 { - addr += 7 << 29; - } else if addr == 0 { - #[cfg(feature = "debug")] - { - match self.interupts { - InteruptState::Disabled => { - println!("program terminated"); + Instruction::Mod(reg, reg1, op2) => { + self[reg] = self[reg1] % self.resolve(op2); + self.pc += 1 + } + Instruction::Store(reg, op2, reg1) => { + self.pc += 1; + let addr = (self[reg].wrapping_add(self.resolve(op2))) as usize; + if !addr.is_multiple_of(4) { + self.serve_interupt( + InteruptKind::IllegalLoadStore, + [1, addr as u32, self[reg1]], + ); + return; + } + if addr <= 0x00ffffff { + self.ram[addr / 4] = self[reg1]; + } else if addr <= 0x00ff_ffff + 480 * 640 * 4 { + let buf_addr = (addr - 0x0100_0000) / 4; + let dat = if cfg!(feature = "rgba") { + self[reg1] + } else { + self[reg1] & 0x00FF_FFFF + }; + (&SHARED.screen_buf[buf_addr]) + .store(dat, std::sync::atomic::Ordering::Relaxed); + } else if addr == 0x0120_1000 { + (&SHARED.external_enabled_interupts) + .store(self[reg1], std::sync::atomic::Ordering::Relaxed); + } else { + self.serve_interupt(InteruptKind::IllegalOpcode, [next_opcode]); + } + } + Instruction::Load(reg, reg1, op2) => { + self.pc += 1; + let addr = (self[reg1].wrapping_add(self.resolve(op2))) as usize; + if !addr.is_multiple_of(4) { + self.serve_interupt( + InteruptKind::IllegalLoadStore, + [0, addr as u32, reg.0 as u32], + ); + return; + } + self[reg] = if addr <= 0x00ffffff { + self.ram[addr / 4] + } else if addr <= 0x00ffffff + 480 * 640 * 4 { + let buf_addr = (addr - 0x0100_0000) / 4; + (&SHARED.screen_buf[buf_addr]) + .load(std::sync::atomic::Ordering::Relaxed) + } else { + match addr as isize - 0x0120_0000 { + #[cfg(feature = "rich_keyboard")] + -12 => SHARED.keyboard[0] + .load(std::sync::atomic::Ordering::Relaxed), + #[cfg(feature = "rich_keyboard")] + -8 => SHARED.keyboard[1] + .load(std::sync::atomic::Ordering::Relaxed), + #[cfg(feature = "rich_keyboard")] + -4 => SHARED.keyboard[2] + .load(std::sync::atomic::Ordering::Relaxed), + 0 => SHARED.keyboard[3] + .load(std::sync::atomic::Ordering::Relaxed), + 4 => time::Instant::now() + .duration_since(self.creation) + .as_millis() + as u32, + 8 => SHARED.mouse[0].load(std::sync::atomic::Ordering::Relaxed), + 12 => { + SHARED.mouse[1].load(std::sync::atomic::Ordering::Relaxed) + } + 16 => { + SHARED.mouse[2].load(std::sync::atomic::Ordering::Relaxed) + } + //guaranted by the inequality and is multiple of 4 + _ => { + self.serve_interupt( + InteruptKind::IllegalOpcode, + [next_opcode], + ); + return; + } + } + }; + } + Instruction::Push(op2) => { + self.sp -= 1; + self.ram[self.sp] = self.resolve(op2); + self.pc += 1; + } + Instruction::Pop(reg) => { + self[reg] = self.ram[self.sp]; + self.sp += 1; + self.pc += 1; + } + Instruction::Skip(d, cond, reg, op2) => { + self.pc += 1; + if cond.eval(self[reg], self.resolve(op2)) { + self.pc += d as usize + } + } + Instruction::Jump(mut addr) => { + if addr & (1 << 28) != 0 { + addr += 7 << 29; + } else if addr == 0 { + #[cfg(feature = "debug")] + { + match self.interupts { + InteruptState::Disabled => { + println!("program terminated"); + self.error = true; + return; + } + _ => (), + } + if SHARED + .external_enabled_interupts + .load(std::sync::atomic::Ordering::Relaxed) + == 0 + { + println!("Program terminated"); self.error = true; return; } - _ => (), + println!("awaiting interupt..."); } - if SHARED - .external_enabled_interupts - .load(std::sync::atomic::Ordering::Relaxed) - == 0 + SHARED.external_interupts.wait(0); + } + self.pc = (addr + self.pc as u32) as usize; + } + Instruction::Call(mut addr) => { + self.sp -= 1; + self.ram[self.sp] = ((self.pc << 2) + 4) as u32; + + if addr & (1 << 28) != 0 { + addr += 7 << 29; + } else if unlikely(addr == 0) { + #[cfg(feature = "debug")] { - println!("Program terminated"); + println!("program terminated"); self.error = true; return; } - println!("awaiting interupt..."); + #[cfg(not(feature = "debug"))] + exit(0); } - SHARED.external_interupts.wait(0); + self.pc = (addr + self.pc as u32) as usize; } - self.pc = (addr + self.pc as u32) as usize; - } - Instruction::Call(mut addr) => { - self.sp -= 1; - self.ram[self.sp] = ((self.pc << 2) + 4) as u32; - - if addr & (1 << 28) != 0 { - addr += 7 << 29; - } else if unlikely(addr == 0) { - #[cfg(feature = "debug")] - { - println!("program terminated"); - self.error = true; - return; - } - #[cfg(not(feature = "debug"))] - exit(0); + Instruction::Ret() => { + self.pc = (self.ram[self.sp] >> 2) as usize; + self.sp += 1; } - self.pc = (addr + self.pc as u32) as usize; - } - Instruction::Ret() => { - self.pc = (self.ram[self.sp] >> 2) as usize; - self.sp += 1; - } - Instruction::Reti() => { - let mut ret_index = None; - let mut ret_value = 0; + Instruction::Reti() => { + let mut ret_index = None; + let mut ret_value = 0; - match self.interupts { - InteruptState::Serving(kind, prev) => { - match prev.highest_one() { - None => self.interupts = InteruptState::Enabled, - Some(i) => { - self.interupts = - InteruptState::Serving(i.into(), prev ^ (1 << i)) - } - } - match kind { - InteruptKind::MMIO => { - (&SHARED.external_interupts) - .store(0, std::sync::atomic::Ordering::Release); - SHARED.external_interupts.signal(); - //no need to check prev because MMIO is the lowest priority - self.interupts = InteruptState::Enabled - } - InteruptKind::Swi => {} - InteruptKind::DivByZero | InteruptKind::UnsupportedOpcode => { - ret_index = Some(self.regs[0]); - ret_value = self.regs[1]; - } - InteruptKind::IllegalLoadStore => { - if self.regs[0] == 0 { - ret_value = self.regs[1]; - ret_index = Some(self.regs[2]); + match self.interupts { + InteruptState::Serving(kind, prev) => { + match prev.highest_one() { + None => self.interupts = InteruptState::Enabled, + Some(i) => { + self.interupts = + InteruptState::Serving(i.into(), prev ^ (1 << i)) } } - InteruptKind::IllegalOpcode => {} + match kind { + InteruptKind::MMIO => { + (&SHARED.external_interupts) + .store(0, std::sync::atomic::Ordering::Release); + SHARED.external_interupts.signal(); + //no need to check prev because MMIO is the lowest priority + self.interupts = InteruptState::Enabled + } + InteruptKind::Swi => {} + InteruptKind::DivByZero + | InteruptKind::UnsupportedOpcode => { + ret_index = Some(self.regs[0]); + ret_value = self.regs[1]; + } + InteruptKind::IllegalLoadStore => { + if self.regs[0] == 0 { + ret_value = self.regs[1]; + ret_index = Some(self.regs[2]); + } + } + InteruptKind::IllegalOpcode => {} + } } + _ => { /* This is a troubling case but ... well it's ok */ } + } + let ret = self.ram[self.sp]; + self.pc = (ret & 0x0FFF_FFFF) as usize; + self.sp += 1 as usize; + for i in (0..(ret >> 28) as usize).rev() { + self.regs[i] = self.ram[self.sp]; + self.sp += 1 + } + if let Some(idx) = ret_index { + self.regs[idx as usize] = ret_value; } - _ => { /* This is a troubling case but ... well it's ok */ } } - let ret = self.ram[self.sp]; - self.pc = (ret & 0x0FFF_FFFF) as usize; - self.sp += 1 as usize; - for i in (0..(ret >> 28) as usize).rev() { - self.regs[i] = self.ram[self.sp]; - self.sp += 1 + Instruction::Eint() => { + match self.interupts { + InteruptState::Disabled => self.interupts = InteruptState::Enabled, + _ => {} + } + self.pc += 1; } - if let Some(idx) = ret_index { - self.regs[idx as usize] = ret_value; - } - } - Instruction::Eint() => { - match self.interupts { - InteruptState::Disabled => self.interupts = InteruptState::Enabled, - _ => {} - } - self.pc += 1; - } - Instruction::Dint() => { - self.interupts = InteruptState::Disabled; - self.pc += 1; + Instruction::Dint() => { + self.interupts = InteruptState::Disabled; + self.pc += 1; - (&SHARED.external_enabled_interupts) - .store(0, std::sync::atomic::Ordering::Relaxed); - (&SHARED.external_interupts).store(0, std::sync::atomic::Ordering::Relaxed); - SHARED.external_interupts.signal(); - } - Instruction::Swi() => { - self.pc += 1; - self.serve_interupt(InteruptKind::Swi, []); - } - Instruction::GetStack(reg) => { - self[reg] = (self.sp << 2) as u32; - self.pc += 1; - } - Instruction::SetStack(op2) => { - let v = self.resolve(op2); - if likely(v.is_multiple_of(4)) { - self.sp = (v >> 2) as usize; - } else { - self.sp = usize::MAX //Yes, that means that clever program using sp to store information wont work on my emulator. Deal with it + (&SHARED.external_enabled_interupts) + .store(0, std::sync::atomic::Ordering::Relaxed); + (&SHARED.external_interupts) + .store(0, std::sync::atomic::Ordering::Relaxed); + SHARED.external_interupts.signal(); } - self.pc += 1; + Instruction::Swi() => { + self.pc += 1; + self.serve_interupt(InteruptKind::Swi, []); + } + Instruction::GetStack(reg) => { + self[reg] = (self.sp << 2) as u32; + self.pc += 1; + } + Instruction::SetStack(op2) => { + let v = self.resolve(op2); + if likely(v.is_multiple_of(4)) { + self.sp = (v >> 2) as usize; + } else { + self.sp = usize::MAX //Yes, that means that clever program using sp to store information wont work on my emulator. Deal with it + } + self.pc += 1; + } + }; + } + Err((kind, rx, ry, op2, opcode)) => { + self.pc += 1; + match kind { + InteruptKind::UnsupportedOpcode => self.serve_interupt( + kind, + [rx.0.into(), self[ry], self.resolve(op2), opcode], + ), + InteruptKind::IllegalOpcode => self.serve_interupt(kind, [next_opcode]), + _ => unsafe { unreachable_unchecked() }, } - }; - } - Err((kind, rx, ry, op2, opcode)) => { - self.pc += 1; - match kind { - InteruptKind::UnsupportedOpcode => self - .serve_interupt(kind, [rx.0.into(), self[ry], self.resolve(op2), opcode]), - InteruptKind::IllegalOpcode => self.serve_interupt(kind, [next_opcode]), - _ => unsafe { unreachable_unchecked() }, } } } diff --git a/simu/src/main.rs b/simu/src/main.rs index b6f4c0c..74bc09a 100644 --- a/simu/src/main.rs +++ b/simu/src/main.rs @@ -303,7 +303,7 @@ fn main() -> Result<(), Error> { let mut simulation = Computer::new(program); #[cfg(not(feature = "debug"))] loop { - simulation.step(); + simulation.step(64); } #[cfg(feature = "debug")] { @@ -371,13 +371,13 @@ fn main() -> Result<(), Error> { println!("cannot step, cpu killed"); break; } - simulation.step(); + simulation.step(1); } false } "r" | "run" => { while !simulation.error { - simulation.step(); + simulation.step(64); } false } @@ -408,7 +408,7 @@ fn main() -> Result<(), Error> { while !simulation.error && simulation.ram[simulation.pc] != 0x8800_0000 { - simulation.step(); + simulation.step(1); } false } @@ -419,7 +419,7 @@ fn main() -> Result<(), Error> { while !simulation.error && simulation.pc != (v as usize / 4) { - simulation.step(); + simulation.step(1); } false }