performance improvement

This commit is contained in:
Mwa
2026-03-20 11:08:18 +01:00
parent c72e133cde
commit ff6427b020
2 changed files with 297 additions and 282 deletions

View File

@@ -396,7 +396,7 @@ impl Computer {
new new
} }
#[inline(always)] #[inline(always)]
pub fn step(&mut self) { pub fn step(&mut self, s: usize) {
match self.interupts { match self.interupts {
InteruptState::Disabled => {} InteruptState::Disabled => {}
InteruptState::Enabled => { InteruptState::Enabled => {
@@ -415,311 +415,326 @@ impl Computer {
} }
InteruptState::Serving(..) => {} InteruptState::Serving(..) => {}
} }
for _ in 0..s {
//potentially just changed by interupt.
let next_opcode = self.ram[self.pc];
//potentially just changed by interupt. let instruction = Instruction::try_from(next_opcode);
let next_opcode = self.ram[self.pc];
let instruction = Instruction::try_from(next_opcode); match instruction {
Ok(instruction) => {
match instruction { match instruction {
Ok(instruction) => { Instruction::Copy(reg, op2) => {
match instruction { self[reg] = self.resolve(op2);
Instruction::Copy(reg, op2) => { self.pc += 1;
self[reg] = self.resolve(op2);
self.pc += 1;
}
Instruction::Add(reg, reg1, op2) => {
self[reg] = self[reg1] + self.resolve(op2);
self.pc += 1;
}
Instruction::Sub(reg, reg1, op2) => {
self[reg] = self[reg1] - self.resolve(op2);
self.pc += 1;
}
Instruction::Or(reg, reg1, op2) => {
self[reg] = self[reg1] | self.resolve(op2);
self.pc += 1;
}
Instruction::And(reg, reg1, op2) => {
self[reg] = self[reg1] & self.resolve(op2);
self.pc += 1;
}
Instruction::Xor(reg, reg1, op2) => {
self[reg] = self[reg1] ^ self.resolve(op2);
self.pc += 1;
}
Instruction::Lsl(reg, reg1, op2) => {
self[reg] = (self[reg1] as u64).wrapping_shl(self.resolve(op2)) as u32;
self.pc += 1;
}
Instruction::Lsr(reg, reg1, op2) => {
self[reg] = (self[reg1] as u64).wrapping_shr(self.resolve(op2)) as u32;
self.pc += 1;
}
Instruction::Asr(reg, reg1, op2) => {
self[reg] = (self[reg1] as i64).wrapping_shr(self.resolve(op2)) as u32;
self.pc += 1;
}
Instruction::Umull(reg, reg1, op2) => {
self[reg] = self[reg1].wrapping_mul(self.resolve(op2));
self.pc += 1;
}
Instruction::Smull(reg, reg1, op2) => {
self[reg] =
(self[reg1] as i32).wrapping_mul(self.resolve(op2) as i32) as u32;
self.pc += 1;
}
Instruction::Umulh(reg, reg1, op2) => {
self[reg] = self[reg1].widening_mul(self.resolve(op2)).1;
self.pc += 1;
}
Instruction::Smulh(reg, reg1, op2) => {
self[reg] =
(self[reg1] as i32).widening_mul(self.resolve(op2) as i32).1 as u32;
self.pc += 1;
}
Instruction::Div(reg, reg1, op2) => {
self.pc += 1;
let d = self.resolve(op2);
if unlikely(d == 0) {
self.serve_interupt(
InteruptKind::DivByZero,
[reg.0.into(), self[reg1]],
);
return;
} }
self[reg] = self[reg1] / d; Instruction::Add(reg, reg1, op2) => {
} self[reg] = self[reg1] + self.resolve(op2);
Instruction::Mod(reg, reg1, op2) => { self.pc += 1;
self[reg] = self[reg1] % self.resolve(op2);
self.pc += 1
}
Instruction::Store(reg, op2, reg1) => {
self.pc += 1;
let addr = (self[reg].wrapping_add(self.resolve(op2))) as usize;
if !addr.is_multiple_of(4) {
self.serve_interupt(
InteruptKind::IllegalLoadStore,
[1, addr as u32, self[reg1]],
);
return;
} }
if addr <= 0x00ffffff { Instruction::Sub(reg, reg1, op2) => {
self.ram[addr / 4] = self[reg1]; self[reg] = self[reg1] - self.resolve(op2);
} else if addr <= 0x00ff_ffff + 480 * 640 * 4 { self.pc += 1;
let buf_addr = (addr - 0x0100_0000) / 4;
let dat = if cfg!(feature = "rgba") {
self[reg1]
} else {
self[reg1] & 0x00FF_FFFF
};
(&SHARED.screen_buf[buf_addr])
.store(dat, std::sync::atomic::Ordering::Relaxed);
} else if addr == 0x0120_1000 {
(&SHARED.external_enabled_interupts)
.store(self[reg1], std::sync::atomic::Ordering::Relaxed);
} else {
self.serve_interupt(InteruptKind::IllegalOpcode, [next_opcode]);
} }
} Instruction::Or(reg, reg1, op2) => {
Instruction::Load(reg, reg1, op2) => { self[reg] = self[reg1] | self.resolve(op2);
self.pc += 1; self.pc += 1;
let addr = (self[reg1].wrapping_add(self.resolve(op2))) as usize;
if !addr.is_multiple_of(4) {
self.serve_interupt(
InteruptKind::IllegalLoadStore,
[0, addr as u32, reg.0 as u32],
);
return;
} }
self[reg] = if addr <= 0x00ffffff { Instruction::And(reg, reg1, op2) => {
self.ram[addr / 4] self[reg] = self[reg1] & self.resolve(op2);
} else if addr <= 0x00ffffff + 480 * 640 * 4 { self.pc += 1;
let buf_addr = (addr - 0x0100_0000) / 4; }
(&SHARED.screen_buf[buf_addr]) Instruction::Xor(reg, reg1, op2) => {
.load(std::sync::atomic::Ordering::Relaxed) self[reg] = self[reg1] ^ self.resolve(op2);
} else { self.pc += 1;
match addr as isize - 0x0120_0000 { }
#[cfg(feature = "rich_keyboard")] Instruction::Lsl(reg, reg1, op2) => {
-12 => { self[reg] = (self[reg1] as u64).wrapping_shl(self.resolve(op2)) as u32;
SHARED.keyboard[0].load(std::sync::atomic::Ordering::Relaxed) self.pc += 1;
} }
#[cfg(feature = "rich_keyboard")] Instruction::Lsr(reg, reg1, op2) => {
-8 => SHARED.keyboard[1].load(std::sync::atomic::Ordering::Relaxed), self[reg] = (self[reg1] as u64).wrapping_shr(self.resolve(op2)) as u32;
#[cfg(feature = "rich_keyboard")] self.pc += 1;
-4 => SHARED.keyboard[2].load(std::sync::atomic::Ordering::Relaxed), }
0 => SHARED.keyboard[3].load(std::sync::atomic::Ordering::Relaxed), Instruction::Asr(reg, reg1, op2) => {
4 => time::Instant::now() self[reg] = (self[reg1] as i64).wrapping_shr(self.resolve(op2)) as u32;
.duration_since(self.creation) self.pc += 1;
.as_millis() as u32, }
8 => SHARED.mouse[0].load(std::sync::atomic::Ordering::Relaxed), Instruction::Umull(reg, reg1, op2) => {
12 => SHARED.mouse[1].load(std::sync::atomic::Ordering::Relaxed), self[reg] = self[reg1].wrapping_mul(self.resolve(op2));
16 => SHARED.mouse[2].load(std::sync::atomic::Ordering::Relaxed), self.pc += 1;
//guaranted by the inequality and is multiple of 4 }
_ => { Instruction::Smull(reg, reg1, op2) => {
self.serve_interupt(InteruptKind::IllegalOpcode, [next_opcode]); self[reg] =
return; (self[reg1] as i32).wrapping_mul(self.resolve(op2) as i32) as u32;
} self.pc += 1;
}
Instruction::Umulh(reg, reg1, op2) => {
self[reg] = self[reg1].widening_mul(self.resolve(op2)).1;
self.pc += 1;
}
Instruction::Smulh(reg, reg1, op2) => {
self[reg] =
(self[reg1] as i32).widening_mul(self.resolve(op2) as i32).1 as u32;
self.pc += 1;
}
Instruction::Div(reg, reg1, op2) => {
self.pc += 1;
let d = self.resolve(op2);
if unlikely(d == 0) {
self.serve_interupt(
InteruptKind::DivByZero,
[reg.0.into(), self[reg1]],
);
return;
} }
}; self[reg] = self[reg1] / d;
}
Instruction::Push(op2) => {
self.sp -= 1;
self.ram[self.sp] = self.resolve(op2);
self.pc += 1;
}
Instruction::Pop(reg) => {
self[reg] = self.ram[self.sp];
self.sp += 1;
self.pc += 1;
}
Instruction::Skip(d, cond, reg, op2) => {
self.pc += 1;
if cond.eval(self[reg], self.resolve(op2)) {
self.pc += d as usize
} }
} Instruction::Mod(reg, reg1, op2) => {
Instruction::Jump(mut addr) => { self[reg] = self[reg1] % self.resolve(op2);
if addr & (1 << 28) != 0 { self.pc += 1
addr += 7 << 29; }
} else if addr == 0 { Instruction::Store(reg, op2, reg1) => {
#[cfg(feature = "debug")] self.pc += 1;
{ let addr = (self[reg].wrapping_add(self.resolve(op2))) as usize;
match self.interupts { if !addr.is_multiple_of(4) {
InteruptState::Disabled => { self.serve_interupt(
println!("program terminated"); InteruptKind::IllegalLoadStore,
[1, addr as u32, self[reg1]],
);
return;
}
if addr <= 0x00ffffff {
self.ram[addr / 4] = self[reg1];
} else if addr <= 0x00ff_ffff + 480 * 640 * 4 {
let buf_addr = (addr - 0x0100_0000) / 4;
let dat = if cfg!(feature = "rgba") {
self[reg1]
} else {
self[reg1] & 0x00FF_FFFF
};
(&SHARED.screen_buf[buf_addr])
.store(dat, std::sync::atomic::Ordering::Relaxed);
} else if addr == 0x0120_1000 {
(&SHARED.external_enabled_interupts)
.store(self[reg1], std::sync::atomic::Ordering::Relaxed);
} else {
self.serve_interupt(InteruptKind::IllegalOpcode, [next_opcode]);
}
}
Instruction::Load(reg, reg1, op2) => {
self.pc += 1;
let addr = (self[reg1].wrapping_add(self.resolve(op2))) as usize;
if !addr.is_multiple_of(4) {
self.serve_interupt(
InteruptKind::IllegalLoadStore,
[0, addr as u32, reg.0 as u32],
);
return;
}
self[reg] = if addr <= 0x00ffffff {
self.ram[addr / 4]
} else if addr <= 0x00ffffff + 480 * 640 * 4 {
let buf_addr = (addr - 0x0100_0000) / 4;
(&SHARED.screen_buf[buf_addr])
.load(std::sync::atomic::Ordering::Relaxed)
} else {
match addr as isize - 0x0120_0000 {
#[cfg(feature = "rich_keyboard")]
-12 => SHARED.keyboard[0]
.load(std::sync::atomic::Ordering::Relaxed),
#[cfg(feature = "rich_keyboard")]
-8 => SHARED.keyboard[1]
.load(std::sync::atomic::Ordering::Relaxed),
#[cfg(feature = "rich_keyboard")]
-4 => SHARED.keyboard[2]
.load(std::sync::atomic::Ordering::Relaxed),
0 => SHARED.keyboard[3]
.load(std::sync::atomic::Ordering::Relaxed),
4 => time::Instant::now()
.duration_since(self.creation)
.as_millis()
as u32,
8 => SHARED.mouse[0].load(std::sync::atomic::Ordering::Relaxed),
12 => {
SHARED.mouse[1].load(std::sync::atomic::Ordering::Relaxed)
}
16 => {
SHARED.mouse[2].load(std::sync::atomic::Ordering::Relaxed)
}
//guaranted by the inequality and is multiple of 4
_ => {
self.serve_interupt(
InteruptKind::IllegalOpcode,
[next_opcode],
);
return;
}
}
};
}
Instruction::Push(op2) => {
self.sp -= 1;
self.ram[self.sp] = self.resolve(op2);
self.pc += 1;
}
Instruction::Pop(reg) => {
self[reg] = self.ram[self.sp];
self.sp += 1;
self.pc += 1;
}
Instruction::Skip(d, cond, reg, op2) => {
self.pc += 1;
if cond.eval(self[reg], self.resolve(op2)) {
self.pc += d as usize
}
}
Instruction::Jump(mut addr) => {
if addr & (1 << 28) != 0 {
addr += 7 << 29;
} else if addr == 0 {
#[cfg(feature = "debug")]
{
match self.interupts {
InteruptState::Disabled => {
println!("program terminated");
self.error = true;
return;
}
_ => (),
}
if SHARED
.external_enabled_interupts
.load(std::sync::atomic::Ordering::Relaxed)
== 0
{
println!("Program terminated");
self.error = true; self.error = true;
return; return;
} }
_ => (), println!("awaiting interupt...");
} }
if SHARED SHARED.external_interupts.wait(0);
.external_enabled_interupts }
.load(std::sync::atomic::Ordering::Relaxed) self.pc = (addr + self.pc as u32) as usize;
== 0 }
Instruction::Call(mut addr) => {
self.sp -= 1;
self.ram[self.sp] = ((self.pc << 2) + 4) as u32;
if addr & (1 << 28) != 0 {
addr += 7 << 29;
} else if unlikely(addr == 0) {
#[cfg(feature = "debug")]
{ {
println!("Program terminated"); println!("program terminated");
self.error = true; self.error = true;
return; return;
} }
println!("awaiting interupt..."); #[cfg(not(feature = "debug"))]
exit(0);
} }
SHARED.external_interupts.wait(0); self.pc = (addr + self.pc as u32) as usize;
} }
self.pc = (addr + self.pc as u32) as usize; Instruction::Ret() => {
} self.pc = (self.ram[self.sp] >> 2) as usize;
Instruction::Call(mut addr) => { self.sp += 1;
self.sp -= 1;
self.ram[self.sp] = ((self.pc << 2) + 4) as u32;
if addr & (1 << 28) != 0 {
addr += 7 << 29;
} else if unlikely(addr == 0) {
#[cfg(feature = "debug")]
{
println!("program terminated");
self.error = true;
return;
}
#[cfg(not(feature = "debug"))]
exit(0);
} }
self.pc = (addr + self.pc as u32) as usize; Instruction::Reti() => {
} let mut ret_index = None;
Instruction::Ret() => { let mut ret_value = 0;
self.pc = (self.ram[self.sp] >> 2) as usize;
self.sp += 1;
}
Instruction::Reti() => {
let mut ret_index = None;
let mut ret_value = 0;
match self.interupts { match self.interupts {
InteruptState::Serving(kind, prev) => { InteruptState::Serving(kind, prev) => {
match prev.highest_one() { match prev.highest_one() {
None => self.interupts = InteruptState::Enabled, None => self.interupts = InteruptState::Enabled,
Some(i) => { Some(i) => {
self.interupts = self.interupts =
InteruptState::Serving(i.into(), prev ^ (1 << i)) InteruptState::Serving(i.into(), prev ^ (1 << i))
}
}
match kind {
InteruptKind::MMIO => {
(&SHARED.external_interupts)
.store(0, std::sync::atomic::Ordering::Release);
SHARED.external_interupts.signal();
//no need to check prev because MMIO is the lowest priority
self.interupts = InteruptState::Enabled
}
InteruptKind::Swi => {}
InteruptKind::DivByZero | InteruptKind::UnsupportedOpcode => {
ret_index = Some(self.regs[0]);
ret_value = self.regs[1];
}
InteruptKind::IllegalLoadStore => {
if self.regs[0] == 0 {
ret_value = self.regs[1];
ret_index = Some(self.regs[2]);
} }
} }
InteruptKind::IllegalOpcode => {} match kind {
InteruptKind::MMIO => {
(&SHARED.external_interupts)
.store(0, std::sync::atomic::Ordering::Release);
SHARED.external_interupts.signal();
//no need to check prev because MMIO is the lowest priority
self.interupts = InteruptState::Enabled
}
InteruptKind::Swi => {}
InteruptKind::DivByZero
| InteruptKind::UnsupportedOpcode => {
ret_index = Some(self.regs[0]);
ret_value = self.regs[1];
}
InteruptKind::IllegalLoadStore => {
if self.regs[0] == 0 {
ret_value = self.regs[1];
ret_index = Some(self.regs[2]);
}
}
InteruptKind::IllegalOpcode => {}
}
} }
_ => { /* This is a troubling case but ... well it's ok */ }
}
let ret = self.ram[self.sp];
self.pc = (ret & 0x0FFF_FFFF) as usize;
self.sp += 1 as usize;
for i in (0..(ret >> 28) as usize).rev() {
self.regs[i] = self.ram[self.sp];
self.sp += 1
}
if let Some(idx) = ret_index {
self.regs[idx as usize] = ret_value;
} }
_ => { /* This is a troubling case but ... well it's ok */ }
} }
let ret = self.ram[self.sp]; Instruction::Eint() => {
self.pc = (ret & 0x0FFF_FFFF) as usize; match self.interupts {
self.sp += 1 as usize; InteruptState::Disabled => self.interupts = InteruptState::Enabled,
for i in (0..(ret >> 28) as usize).rev() { _ => {}
self.regs[i] = self.ram[self.sp]; }
self.sp += 1 self.pc += 1;
} }
if let Some(idx) = ret_index { Instruction::Dint() => {
self.regs[idx as usize] = ret_value; self.interupts = InteruptState::Disabled;
} self.pc += 1;
}
Instruction::Eint() => {
match self.interupts {
InteruptState::Disabled => self.interupts = InteruptState::Enabled,
_ => {}
}
self.pc += 1;
}
Instruction::Dint() => {
self.interupts = InteruptState::Disabled;
self.pc += 1;
(&SHARED.external_enabled_interupts) (&SHARED.external_enabled_interupts)
.store(0, std::sync::atomic::Ordering::Relaxed); .store(0, std::sync::atomic::Ordering::Relaxed);
(&SHARED.external_interupts).store(0, std::sync::atomic::Ordering::Relaxed); (&SHARED.external_interupts)
SHARED.external_interupts.signal(); .store(0, std::sync::atomic::Ordering::Relaxed);
} SHARED.external_interupts.signal();
Instruction::Swi() => {
self.pc += 1;
self.serve_interupt(InteruptKind::Swi, []);
}
Instruction::GetStack(reg) => {
self[reg] = (self.sp << 2) as u32;
self.pc += 1;
}
Instruction::SetStack(op2) => {
let v = self.resolve(op2);
if likely(v.is_multiple_of(4)) {
self.sp = (v >> 2) as usize;
} else {
self.sp = usize::MAX //Yes, that means that clever program using sp to store information wont work on my emulator. Deal with it
} }
self.pc += 1; Instruction::Swi() => {
self.pc += 1;
self.serve_interupt(InteruptKind::Swi, []);
}
Instruction::GetStack(reg) => {
self[reg] = (self.sp << 2) as u32;
self.pc += 1;
}
Instruction::SetStack(op2) => {
let v = self.resolve(op2);
if likely(v.is_multiple_of(4)) {
self.sp = (v >> 2) as usize;
} else {
self.sp = usize::MAX //Yes, that means that clever program using sp to store information wont work on my emulator. Deal with it
}
self.pc += 1;
}
};
}
Err((kind, rx, ry, op2, opcode)) => {
self.pc += 1;
match kind {
InteruptKind::UnsupportedOpcode => self.serve_interupt(
kind,
[rx.0.into(), self[ry], self.resolve(op2), opcode],
),
InteruptKind::IllegalOpcode => self.serve_interupt(kind, [next_opcode]),
_ => unsafe { unreachable_unchecked() },
} }
};
}
Err((kind, rx, ry, op2, opcode)) => {
self.pc += 1;
match kind {
InteruptKind::UnsupportedOpcode => self
.serve_interupt(kind, [rx.0.into(), self[ry], self.resolve(op2), opcode]),
InteruptKind::IllegalOpcode => self.serve_interupt(kind, [next_opcode]),
_ => unsafe { unreachable_unchecked() },
} }
} }
} }

View File

@@ -303,7 +303,7 @@ fn main() -> Result<(), Error> {
let mut simulation = Computer::new(program); let mut simulation = Computer::new(program);
#[cfg(not(feature = "debug"))] #[cfg(not(feature = "debug"))]
loop { loop {
simulation.step(); simulation.step(64);
} }
#[cfg(feature = "debug")] #[cfg(feature = "debug")]
{ {
@@ -371,13 +371,13 @@ fn main() -> Result<(), Error> {
println!("cannot step, cpu killed"); println!("cannot step, cpu killed");
break; break;
} }
simulation.step(); simulation.step(1);
} }
false false
} }
"r" | "run" => { "r" | "run" => {
while !simulation.error { while !simulation.error {
simulation.step(); simulation.step(64);
} }
false false
} }
@@ -408,7 +408,7 @@ fn main() -> Result<(), Error> {
while !simulation.error while !simulation.error
&& simulation.ram[simulation.pc] != 0x8800_0000 && simulation.ram[simulation.pc] != 0x8800_0000
{ {
simulation.step(); simulation.step(1);
} }
false false
} }
@@ -419,7 +419,7 @@ fn main() -> Result<(), Error> {
while !simulation.error while !simulation.error
&& simulation.pc != (v as usize / 4) && simulation.pc != (v as usize / 4)
{ {
simulation.step(); simulation.step(1);
} }
false false
} }