From aead858727c7f0fcae17d3f879ae090acc5c4084 Mon Sep 17 00:00:00 2001 From: Mwa Date: Sat, 14 Mar 2026 00:48:03 +0100 Subject: [PATCH] performance improvement --- Cargo.toml | 3 + simu/src/cpu.rs | 143 +++++++++++++-------------- simu/src/main.rs | 250 +++++++++++++++++++++++++++++------------------ 3 files changed, 226 insertions(+), 170 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3e54e67..90ab2cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,6 @@ [workspace] resolver = "3" members = ["simu","asm"] + +[profile.release] +debug = "line-tables-only" diff --git a/simu/src/cpu.rs b/simu/src/cpu.rs index d9011bb..5fe9c63 100644 --- a/simu/src/cpu.rs +++ b/simu/src/cpu.rs @@ -1,13 +1,22 @@ use std::{ hint::{likely, unlikely}, io::Read, + ops::{Index, IndexMut}, process::exit, - sync::{Mutex, atomic::AtomicU32}, + sync::atomic::AtomicU32, thread::sleep, time::{self, Duration, Instant}, }; -use pixels::Pixels; +pub(crate) struct SharedState { + pub(crate) keyboard: AtomicU32, + pub(crate) screen_buf: [AtomicU32; 480 * 640], +} + +pub(crate) static SHARED: SharedState = SharedState { + keyboard: AtomicU32::new(0), + screen_buf: [const { AtomicU32::new(0) }; 480 * 640], +}; enum Op2 { Direct(u32), @@ -156,30 +165,40 @@ impl From for Instruction { } } -pub struct Computer<'a, 'b> { +pub struct Computer { creation: Instant, ram: Box<[u32; 0x01000000 / 4]>, regs: [u32; 16], pc: usize, sp: usize, - screen: &'b Mutex>, - key: &'b AtomicU32, +} + +impl Index for Computer { + type Output = u32; + + fn index(&self, index: Reg) -> &Self::Output { + &self.regs[index.0 as usize] + } +} + +impl IndexMut for Computer { + fn index_mut(&mut self, index: Reg) -> &mut Self::Output { + &mut self.regs[index.0 as usize] + } } fn iot() -> ! { exit(1); } -impl<'a, 'b> Computer<'a, 'b> { - pub fn new(filename: String, screen: &'b Mutex>, key: &'b AtomicU32) -> Self { +impl Computer { + pub fn new(filename: String) -> Self { let mut new = Self { creation: Instant::now(), ram: unsafe { Box::new_zeroed().assume_init() }, regs: [0; 16], pc: 0, sp: 0, - screen, - key, }; let mut buf = String::new(); std::fs::File::open(filename) @@ -198,122 +217,100 @@ impl<'a, 'b> Computer<'a, 'b> { let next_opcode = self.ram[self.pc]; match Instruction::from(next_opcode) { Instruction::Copy(reg, op2) => { - self.rg_wr(reg, self.resolve(op2)); + self[reg] = self.resolve(op2); self.pc += 1; } Instruction::Add(reg, reg1, op2) => { - self.rg_wr(reg, self.rg_r(reg1) + self.resolve(op2)); + self[reg] = self[reg1] + self.resolve(op2); self.pc += 1; } Instruction::Sub(reg, reg1, op2) => { - self.rg_wr(reg, self.rg_r(reg1) - self.resolve(op2)); + self[reg] = self[reg1] - self.resolve(op2); self.pc += 1; } Instruction::Or(reg, reg1, op2) => { - self.rg_wr(reg, self.rg_r(reg1) | self.resolve(op2)); + self[reg] = self[reg1] | self.resolve(op2); self.pc += 1; } Instruction::And(reg, reg1, op2) => { - self.rg_wr(reg, self.rg_r(reg1) & self.resolve(op2)); + self[reg] = self[reg1] & self.resolve(op2); self.pc += 1; } Instruction::Xor(reg, reg1, op2) => { - self.rg_wr(reg, self.rg_r(reg1) ^ self.resolve(op2)); + self[reg] = self[reg1] ^ self.resolve(op2); self.pc += 1; } Instruction::Lsl(reg, reg1, op2) => { - self.rg_wr(reg, self.rg_r(reg1) << self.resolve(op2)); + self[reg] = self[reg1] << self.resolve(op2); self.pc += 1; } Instruction::Lsr(reg, reg1, op2) => { - self.rg_wr(reg, self.rg_r(reg1) >> self.resolve(op2)); + self[reg] = self[reg1] >> self.resolve(op2); self.pc += 1; } Instruction::Asr(reg, reg1, op2) => { - self.rg_wr(reg, (self.rg_r(reg1) as i32 >> self.resolve(op2)) as u32); + self[reg] = (self[reg1] as i32 >> self.resolve(op2)) as u32; self.pc += 1; } Instruction::Umull(reg, reg1, op2) => { - self.rg_wr(reg, self.rg_r(reg1).wrapping_mul(self.resolve(op2))); + self[reg] = self[reg1].wrapping_mul(self.resolve(op2)); self.pc += 1; } Instruction::Smull(reg, reg1, op2) => { - self.rg_wr( - reg, - (self.rg_r(reg1) as i32).wrapping_mul(self.resolve(op2) as i32) as u32, - ); + self[reg] = (self[reg1] as i32).wrapping_mul(self.resolve(op2) as i32) as u32; self.pc += 1; } Instruction::Umulh(reg, reg1, op2) => { - self.rg_wr(reg, self.rg_r(reg1).widening_mul(self.resolve(op2)).1); + self[reg] = self[reg1].widening_mul(self.resolve(op2)).1; self.pc += 1; } Instruction::Smulh(reg, reg1, op2) => { - self.rg_wr( - reg, - (self.rg_r(reg1) as i32) - .widening_mul(self.resolve(op2) as i32) - .1 as u32, - ); + self[reg] = (self[reg1] as i32).widening_mul(self.resolve(op2) as i32).1 as u32; self.pc += 1; } Instruction::Div(reg, reg1, op2) => { - self.rg_wr(reg, self.rg_r(reg1) / self.resolve(op2)); + self[reg] = self[reg1] / self.resolve(op2); self.pc += 1 } Instruction::Mod(reg, reg1, op2) => { - self.rg_wr(reg, self.rg_r(reg1) % self.resolve(op2)); + self[reg] = self[reg1] % self.resolve(op2); self.pc += 1 } Instruction::Store(reg, op2, reg1) => { - let addr = (self.rg_r(reg).wrapping_add(self.resolve(op2))) as usize; + let addr = (self[reg].wrapping_add(self.resolve(op2))) as usize; if !addr.is_multiple_of(4) { iot(); } if addr <= 0x00ffffff { - self.ram[addr / 4] = self.rg_r(reg1); - } else if addr <= 0x00ffffff + 480 * 640 * 4 { - let addr_screen = addr - 0x01000000; - let ubgr = self.rg_r(reg1); - let rgba = [ubgr as u8, (ubgr >> 8) as u8, (ubgr >> 16) as u8, 0xff]; - let mut pixels = self.screen.lock().unwrap(); - let screen = pixels.frame_mut(); - for i in 0..4 { - screen[addr_screen + i] = rgba[i]; - } + self.ram[addr / 4] = self[reg1]; + } else if addr <= 0x00ff_ffff + 480 * 640 * 4 { + let buf_addr = (addr - 0x0100_0000) / 4; + let dat = self[reg1] & 0x00FF_FFFF; + (&SHARED.screen_buf[buf_addr]).store(dat, std::sync::atomic::Ordering::Relaxed); } else { iot(); } self.pc += 1; } Instruction::Load(reg, reg1, op2) => { - let addr = (self.rg_r(reg1).wrapping_add(self.resolve(op2))) as usize; + let addr = (self[reg1].wrapping_add(self.resolve(op2))) as usize; if !addr.is_multiple_of(4) { iot(); } - self.rg_wr( - reg, - if addr <= 0x00ffffff { - self.ram[addr / 4] - } else if addr <= 0x00ffffff + 480 * 640 * 4 { - let pixels = self.screen.lock().unwrap(); - let buffer = pixels.frame(); - let mut res = 0; - let addr_screen = addr - 0x01000000; - for i in 0..3 { - res += (buffer[addr_screen + i] as u32) << (i as u32 * 8) - } - res - } else if addr == 0x01200000 { - self.key.load(std::sync::atomic::Ordering::Relaxed) - } else if addr == 0x01200004 { - time::Instant::now() - .duration_since(self.creation) - .as_millis() as u32 - } else { - iot(); - }, - ); + self[reg] = if addr <= 0x00ffffff { + self.ram[addr / 4] + } else if addr <= 0x00ffffff + 480 * 640 * 4 { + let buf_addr = (addr - 0x0100_0000) / 4; + (&SHARED.screen_buf[buf_addr]).load(std::sync::atomic::Ordering::Relaxed) + } else if addr == 0x01200000 { + SHARED.keyboard.load(std::sync::atomic::Ordering::Relaxed) + } else if addr == 0x01200004 { + time::Instant::now() + .duration_since(self.creation) + .as_millis() as u32 + } else { + iot(); + }; self.pc += 1; } Instruction::Push(op2) => { @@ -322,13 +319,13 @@ impl<'a, 'b> Computer<'a, 'b> { self.pc += 1; } Instruction::Pop(reg) => { - self.rg_wr(reg, self.ram[self.sp]); + self[reg] = self.ram[self.sp]; self.sp += 1; self.pc += 1; } Instruction::Skip(d, cond, reg, op2) => { self.pc += 1; - if cond.eval(self.rg_r(reg), self.resolve(op2)) { + if cond.eval(self[reg], self.resolve(op2)) { self.pc += d as usize } } @@ -356,7 +353,7 @@ impl<'a, 'b> Computer<'a, 'b> { self.sp += 1; } Instruction::GetStack(reg) => { - self.rg_wr(reg, (self.sp << 2) as u32); + self[reg] = (self.sp << 2) as u32; self.pc += 1; } Instruction::SetStack(op2) => { @@ -376,10 +373,4 @@ impl<'a, 'b> Computer<'a, 'b> { Op2::Register(r) => self.regs[r as usize], } } - fn rg_wr(&mut self, reg: Reg, v: u32) { - self.regs[reg.0 as usize] = v - } - fn rg_r(&self, reg: Reg) -> u32 { - self.regs[reg.0 as usize] - } } diff --git a/simu/src/main.rs b/simu/src/main.rs index 068c29e..2efa722 100644 --- a/simu/src/main.rs +++ b/simu/src/main.rs @@ -1,130 +1,192 @@ -#![feature(likely_unlikely, widening_mul)] +#![feature(likely_unlikely, widening_mul, sync_unsafe_cell)] #![deny(clippy::all)] use std::env::args; use std::process::exit; -use std::sync::atomic::AtomicU32; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use std::thread::{scope, sleep}; -use std::time::Duration; +use std::time::{Duration, Instant}; use pixels::{Error, Pixels, SurfaceTexture}; +use winit::application::ApplicationHandler; use winit::dpi::LogicalSize; -use winit::event::{Event, WindowEvent}; +use winit::event::WindowEvent; use winit::event_loop::EventLoop; use winit::platform::scancode::PhysicalKeyExtScancode; use winit::window::Window; -use winit_input_helper::WinitInputHelper; +// use winit_input_helper::WinitInputHelper; use crate::cpu::Computer; +mod cpu; +use cpu::SHARED; + const WIDTH: u32 = 640; const HEIGHT: u32 = 480; -mod cpu; +struct App<'a> { + w: Option>, + pixels: Option>, +} + +impl<'a> ApplicationHandler for App<'a> { + fn resumed(&mut self, event_loop: &winit::event_loop::ActiveEventLoop) { + let window = { + let size = LogicalSize::new(WIDTH as f64, HEIGHT as f64); + Arc::new( + event_loop + .create_window( + Window::default_attributes() + .with_title("bisare screen") + .with_min_inner_size(size) + .with_maximized(true), + ) + .unwrap(), + ) + }; + self.w = Some(window.clone()); + let size = window.inner_size(); + let surface_texture = SurfaceTexture::new(size.width, size.height, window); + self.pixels = Some(Pixels::new(WIDTH, HEIGHT, surface_texture).unwrap()); + } + + fn window_event( + &mut self, + elwt: &winit::event_loop::ActiveEventLoop, + _: winit::window::WindowId, + event: WindowEvent, + ) { + // Draw the current frame + match event { + WindowEvent::KeyboardInput { event, .. } => match event.state { + winit::event::ElementState::Pressed => { + if let Some(val) = event.physical_key.to_scancode() { + cpu::SHARED + .keyboard + .store(val + 8, std::sync::atomic::Ordering::Relaxed); + } + } + winit::event::ElementState::Released => SHARED + .keyboard + .store(0, std::sync::atomic::Ordering::Relaxed), + }, + WindowEvent::CursorMoved { + device_id, + position, + } => {} + WindowEvent::MouseWheel { + device_id, + delta, + phase, + } => {} + WindowEvent::MouseInput { + device_id, + state, + button, + } => {} + WindowEvent::ScaleFactorChanged { + scale_factor, + inner_size_writer, + } => {} + //handling redraws and other graphical events + WindowEvent::RedrawRequested => { + let pix = self.pixels.as_mut().unwrap(); + + let screen = pix.frame_mut(); + for (addr, ubgr) in { cpu::SHARED.screen_buf.iter().enumerate() } { + let ubgr = ubgr.load(std::sync::atomic::Ordering::Relaxed); + let rgba = [ubgr as u8, (ubgr >> 8) as u8, (ubgr >> 16) as u8, 0xff]; + for i in 0..4 { + screen[addr * 4 + i] = rgba[i]; + } + } + + if let Err(_) = pix.render() { + elwt.exit(); + return; + } + } + WindowEvent::Resized(size) => { + if self + .pixels + .as_mut() + .unwrap() + .resize_surface(size.width, size.height) + .is_err() + { + println!("Error while resising pixels, exiting!"); + elwt.exit(); + return; + } + self.w.as_ref().unwrap().request_redraw(); + } + WindowEvent::CloseRequested => { + elwt.exit(); + return; + } + WindowEvent::Destroyed => { + println!("Windows destroyed, exiting!"); + elwt.exit(); + return; + } + //do nothing for the other events + _ => {} + } + } + fn new_events( + &mut self, + event_loop: &winit::event_loop::ActiveEventLoop, + cause: winit::event::StartCause, + ) { + match cause { + winit::event::StartCause::ResumeTimeReached { + requested_resume, .. + } => { + let mut next = requested_resume + Duration::from_secs_f64(1. / 60.); + let now = Instant::now(); + if next < now { + next = now + Duration::from_secs_f64(1. / 30.); + } + event_loop.set_control_flow(winit::event_loop::ControlFlow::WaitUntil(next)); + if let Some(w) = self.w.as_ref() { + w.request_redraw(); + } + } + winit::event::StartCause::WaitCancelled { .. } => {} + winit::event::StartCause::Poll => { + let next = Instant::now() + Duration::from_secs_f64(1. / 60.); + event_loop.set_control_flow(winit::event_loop::ControlFlow::WaitUntil(next)); + } + winit::event::StartCause::Init => { + let next = Instant::now() + Duration::from_secs_f64(1. / 60.); + event_loop.set_control_flow(winit::event_loop::ControlFlow::WaitUntil(next)); + } + } + } +} fn main() -> Result<(), Error> { let event_loop = EventLoop::new().unwrap(); - let mut input = WinitInputHelper::new(); - let window = { - let size = LogicalSize::new((WIDTH * 3) as f64, (HEIGHT * 3) as f64); - #[allow(deprecated)] - Arc::new( - event_loop - .create_window( - Window::default_attributes() - .with_title("bisare screen") - .with_inner_size(size) - .with_min_inner_size(size), - ) - .unwrap(), - ) + // let mut input = WinitInputHelper::new(); + + let mut app = App { + w: None, + pixels: None, }; - let pixels = Mutex::new({ - let window_size = window.inner_size(); - let surface_texture = SurfaceTexture::new(window_size.width, window_size.height, &window); - Pixels::new(WIDTH, HEIGHT, surface_texture)? - }); - - let keyboard = AtomicU32::new(0); let program = args() .nth(1) .expect("you must supply the exec name as the first argument"); - let kbref = &keyboard; - let pixelref = &pixels; scope(|sc| { sc.spawn(|| { - let mut simulation = Computer::new(program, pixelref, kbref); + let mut simulation = Computer::new(program); loop { simulation.step(); } }); #[allow(deprecated)] - let res = event_loop.run(|event, elwt| { - match event { - Event::Resumed => {} - Event::NewEvents(_) => input.step(), - Event::AboutToWait => input.end_step(), - Event::DeviceEvent { event, .. } => { - input.process_device_event(&event); - } - Event::WindowEvent { event, .. } => { - // Draw the current frame - if event == WindowEvent::RedrawRequested { - if let Err(_) = pixels.lock().unwrap().render() { - elwt.exit(); - return; - } - sleep(Duration::from_millis(30)); - } - - if let WindowEvent::KeyboardInput { - device_id: _, - ref event, - is_synthetic: _, - } = event - { - match event.state { - winit::event::ElementState::Pressed => { - if let Some(val) = event.physical_key.to_scancode() { - kbref.store(val + 8, std::sync::atomic::Ordering::Relaxed); - } - } - winit::event::ElementState::Released => { - kbref.store(0, std::sync::atomic::Ordering::Relaxed) - } - } - } - - // Handle input events - if input.process_window_event(&event) { - // Close events - if input.close_requested() { - elwt.exit(); - return; - } - - // Resize the window - if let Some(size) = input.window_resized() { - if let Err(_) = pixels - .lock() - .unwrap() - .resize_surface(size.width, size.height) - { - elwt.exit(); - return; - } - } - - // Update internal state and request a redraw - window.request_redraw(); - } - } - _ => {} - } - }); + let res = event_loop.run_app(&mut app); match res { Ok(_) => exit(0), Err(e) => {