performance improvement

This commit is contained in:
Mwa
2026-03-14 00:48:03 +01:00
parent 3f97a352cc
commit aead858727
3 changed files with 226 additions and 170 deletions

View File

@@ -1,3 +1,6 @@
[workspace]
resolver = "3"
members = ["simu","asm"]
[profile.release]
debug = "line-tables-only"

View File

@@ -1,13 +1,22 @@
use std::{
hint::{likely, unlikely},
io::Read,
ops::{Index, IndexMut},
process::exit,
sync::{Mutex, atomic::AtomicU32},
sync::atomic::AtomicU32,
thread::sleep,
time::{self, Duration, Instant},
};
use pixels::Pixels;
pub(crate) struct SharedState {
pub(crate) keyboard: AtomicU32,
pub(crate) screen_buf: [AtomicU32; 480 * 640],
}
pub(crate) static SHARED: SharedState = SharedState {
keyboard: AtomicU32::new(0),
screen_buf: [const { AtomicU32::new(0) }; 480 * 640],
};
enum Op2 {
Direct(u32),
@@ -156,30 +165,40 @@ impl From<u32> for Instruction {
}
}
pub struct Computer<'a, 'b> {
pub struct Computer {
creation: Instant,
ram: Box<[u32; 0x01000000 / 4]>,
regs: [u32; 16],
pc: usize,
sp: usize,
screen: &'b Mutex<Pixels<'a>>,
key: &'b AtomicU32,
}
impl Index<Reg> for Computer {
type Output = u32;
fn index(&self, index: Reg) -> &Self::Output {
&self.regs[index.0 as usize]
}
}
impl IndexMut<Reg> for Computer {
fn index_mut(&mut self, index: Reg) -> &mut Self::Output {
&mut self.regs[index.0 as usize]
}
}
fn iot() -> ! {
exit(1);
}
impl<'a, 'b> Computer<'a, 'b> {
pub fn new(filename: String, screen: &'b Mutex<Pixels<'a>>, key: &'b AtomicU32) -> Self {
impl Computer {
pub fn new(filename: String) -> Self {
let mut new = Self {
creation: Instant::now(),
ram: unsafe { Box::new_zeroed().assume_init() },
regs: [0; 16],
pc: 0,
sp: 0,
screen,
key,
};
let mut buf = String::new();
std::fs::File::open(filename)
@@ -198,122 +217,100 @@ impl<'a, 'b> Computer<'a, 'b> {
let next_opcode = self.ram[self.pc];
match Instruction::from(next_opcode) {
Instruction::Copy(reg, op2) => {
self.rg_wr(reg, self.resolve(op2));
self[reg] = self.resolve(op2);
self.pc += 1;
}
Instruction::Add(reg, reg1, op2) => {
self.rg_wr(reg, self.rg_r(reg1) + self.resolve(op2));
self[reg] = self[reg1] + self.resolve(op2);
self.pc += 1;
}
Instruction::Sub(reg, reg1, op2) => {
self.rg_wr(reg, self.rg_r(reg1) - self.resolve(op2));
self[reg] = self[reg1] - self.resolve(op2);
self.pc += 1;
}
Instruction::Or(reg, reg1, op2) => {
self.rg_wr(reg, self.rg_r(reg1) | self.resolve(op2));
self[reg] = self[reg1] | self.resolve(op2);
self.pc += 1;
}
Instruction::And(reg, reg1, op2) => {
self.rg_wr(reg, self.rg_r(reg1) & self.resolve(op2));
self[reg] = self[reg1] & self.resolve(op2);
self.pc += 1;
}
Instruction::Xor(reg, reg1, op2) => {
self.rg_wr(reg, self.rg_r(reg1) ^ self.resolve(op2));
self[reg] = self[reg1] ^ self.resolve(op2);
self.pc += 1;
}
Instruction::Lsl(reg, reg1, op2) => {
self.rg_wr(reg, self.rg_r(reg1) << self.resolve(op2));
self[reg] = self[reg1] << self.resolve(op2);
self.pc += 1;
}
Instruction::Lsr(reg, reg1, op2) => {
self.rg_wr(reg, self.rg_r(reg1) >> self.resolve(op2));
self[reg] = self[reg1] >> self.resolve(op2);
self.pc += 1;
}
Instruction::Asr(reg, reg1, op2) => {
self.rg_wr(reg, (self.rg_r(reg1) as i32 >> self.resolve(op2)) as u32);
self[reg] = (self[reg1] as i32 >> self.resolve(op2)) as u32;
self.pc += 1;
}
Instruction::Umull(reg, reg1, op2) => {
self.rg_wr(reg, self.rg_r(reg1).wrapping_mul(self.resolve(op2)));
self[reg] = self[reg1].wrapping_mul(self.resolve(op2));
self.pc += 1;
}
Instruction::Smull(reg, reg1, op2) => {
self.rg_wr(
reg,
(self.rg_r(reg1) as i32).wrapping_mul(self.resolve(op2) as i32) as u32,
);
self[reg] = (self[reg1] as i32).wrapping_mul(self.resolve(op2) as i32) as u32;
self.pc += 1;
}
Instruction::Umulh(reg, reg1, op2) => {
self.rg_wr(reg, self.rg_r(reg1).widening_mul(self.resolve(op2)).1);
self[reg] = self[reg1].widening_mul(self.resolve(op2)).1;
self.pc += 1;
}
Instruction::Smulh(reg, reg1, op2) => {
self.rg_wr(
reg,
(self.rg_r(reg1) as i32)
.widening_mul(self.resolve(op2) as i32)
.1 as u32,
);
self[reg] = (self[reg1] as i32).widening_mul(self.resolve(op2) as i32).1 as u32;
self.pc += 1;
}
Instruction::Div(reg, reg1, op2) => {
self.rg_wr(reg, self.rg_r(reg1) / self.resolve(op2));
self[reg] = self[reg1] / self.resolve(op2);
self.pc += 1
}
Instruction::Mod(reg, reg1, op2) => {
self.rg_wr(reg, self.rg_r(reg1) % self.resolve(op2));
self[reg] = self[reg1] % self.resolve(op2);
self.pc += 1
}
Instruction::Store(reg, op2, reg1) => {
let addr = (self.rg_r(reg).wrapping_add(self.resolve(op2))) as usize;
let addr = (self[reg].wrapping_add(self.resolve(op2))) as usize;
if !addr.is_multiple_of(4) {
iot();
}
if addr <= 0x00ffffff {
self.ram[addr / 4] = self.rg_r(reg1);
} else if addr <= 0x00ffffff + 480 * 640 * 4 {
let addr_screen = addr - 0x01000000;
let ubgr = self.rg_r(reg1);
let rgba = [ubgr as u8, (ubgr >> 8) as u8, (ubgr >> 16) as u8, 0xff];
let mut pixels = self.screen.lock().unwrap();
let screen = pixels.frame_mut();
for i in 0..4 {
screen[addr_screen + i] = rgba[i];
}
self.ram[addr / 4] = self[reg1];
} else if addr <= 0x00ff_ffff + 480 * 640 * 4 {
let buf_addr = (addr - 0x0100_0000) / 4;
let dat = self[reg1] & 0x00FF_FFFF;
(&SHARED.screen_buf[buf_addr]).store(dat, std::sync::atomic::Ordering::Relaxed);
} else {
iot();
}
self.pc += 1;
}
Instruction::Load(reg, reg1, op2) => {
let addr = (self.rg_r(reg1).wrapping_add(self.resolve(op2))) as usize;
let addr = (self[reg1].wrapping_add(self.resolve(op2))) as usize;
if !addr.is_multiple_of(4) {
iot();
}
self.rg_wr(
reg,
if addr <= 0x00ffffff {
self.ram[addr / 4]
} else if addr <= 0x00ffffff + 480 * 640 * 4 {
let pixels = self.screen.lock().unwrap();
let buffer = pixels.frame();
let mut res = 0;
let addr_screen = addr - 0x01000000;
for i in 0..3 {
res += (buffer[addr_screen + i] as u32) << (i as u32 * 8)
}
res
} else if addr == 0x01200000 {
self.key.load(std::sync::atomic::Ordering::Relaxed)
} else if addr == 0x01200004 {
time::Instant::now()
.duration_since(self.creation)
.as_millis() as u32
} else {
iot();
},
);
self[reg] = if addr <= 0x00ffffff {
self.ram[addr / 4]
} else if addr <= 0x00ffffff + 480 * 640 * 4 {
let buf_addr = (addr - 0x0100_0000) / 4;
(&SHARED.screen_buf[buf_addr]).load(std::sync::atomic::Ordering::Relaxed)
} else if addr == 0x01200000 {
SHARED.keyboard.load(std::sync::atomic::Ordering::Relaxed)
} else if addr == 0x01200004 {
time::Instant::now()
.duration_since(self.creation)
.as_millis() as u32
} else {
iot();
};
self.pc += 1;
}
Instruction::Push(op2) => {
@@ -322,13 +319,13 @@ impl<'a, 'b> Computer<'a, 'b> {
self.pc += 1;
}
Instruction::Pop(reg) => {
self.rg_wr(reg, self.ram[self.sp]);
self[reg] = self.ram[self.sp];
self.sp += 1;
self.pc += 1;
}
Instruction::Skip(d, cond, reg, op2) => {
self.pc += 1;
if cond.eval(self.rg_r(reg), self.resolve(op2)) {
if cond.eval(self[reg], self.resolve(op2)) {
self.pc += d as usize
}
}
@@ -356,7 +353,7 @@ impl<'a, 'b> Computer<'a, 'b> {
self.sp += 1;
}
Instruction::GetStack(reg) => {
self.rg_wr(reg, (self.sp << 2) as u32);
self[reg] = (self.sp << 2) as u32;
self.pc += 1;
}
Instruction::SetStack(op2) => {
@@ -376,10 +373,4 @@ impl<'a, 'b> Computer<'a, 'b> {
Op2::Register(r) => self.regs[r as usize],
}
}
fn rg_wr(&mut self, reg: Reg, v: u32) {
self.regs[reg.0 as usize] = v
}
fn rg_r(&self, reg: Reg) -> u32 {
self.regs[reg.0 as usize]
}
}

View File

@@ -1,130 +1,192 @@
#![feature(likely_unlikely, widening_mul)]
#![feature(likely_unlikely, widening_mul, sync_unsafe_cell)]
#![deny(clippy::all)]
use std::env::args;
use std::process::exit;
use std::sync::atomic::AtomicU32;
use std::sync::{Arc, Mutex};
use std::sync::Arc;
use std::thread::{scope, sleep};
use std::time::Duration;
use std::time::{Duration, Instant};
use pixels::{Error, Pixels, SurfaceTexture};
use winit::application::ApplicationHandler;
use winit::dpi::LogicalSize;
use winit::event::{Event, WindowEvent};
use winit::event::WindowEvent;
use winit::event_loop::EventLoop;
use winit::platform::scancode::PhysicalKeyExtScancode;
use winit::window::Window;
use winit_input_helper::WinitInputHelper;
// use winit_input_helper::WinitInputHelper;
use crate::cpu::Computer;
mod cpu;
use cpu::SHARED;
const WIDTH: u32 = 640;
const HEIGHT: u32 = 480;
mod cpu;
struct App<'a> {
w: Option<Arc<Window>>,
pixels: Option<Pixels<'a>>,
}
impl<'a> ApplicationHandler for App<'a> {
fn resumed(&mut self, event_loop: &winit::event_loop::ActiveEventLoop) {
let window = {
let size = LogicalSize::new(WIDTH as f64, HEIGHT as f64);
Arc::new(
event_loop
.create_window(
Window::default_attributes()
.with_title("bisare screen")
.with_min_inner_size(size)
.with_maximized(true),
)
.unwrap(),
)
};
self.w = Some(window.clone());
let size = window.inner_size();
let surface_texture = SurfaceTexture::new(size.width, size.height, window);
self.pixels = Some(Pixels::new(WIDTH, HEIGHT, surface_texture).unwrap());
}
fn window_event(
&mut self,
elwt: &winit::event_loop::ActiveEventLoop,
_: winit::window::WindowId,
event: WindowEvent,
) {
// Draw the current frame
match event {
WindowEvent::KeyboardInput { event, .. } => match event.state {
winit::event::ElementState::Pressed => {
if let Some(val) = event.physical_key.to_scancode() {
cpu::SHARED
.keyboard
.store(val + 8, std::sync::atomic::Ordering::Relaxed);
}
}
winit::event::ElementState::Released => SHARED
.keyboard
.store(0, std::sync::atomic::Ordering::Relaxed),
},
WindowEvent::CursorMoved {
device_id,
position,
} => {}
WindowEvent::MouseWheel {
device_id,
delta,
phase,
} => {}
WindowEvent::MouseInput {
device_id,
state,
button,
} => {}
WindowEvent::ScaleFactorChanged {
scale_factor,
inner_size_writer,
} => {}
//handling redraws and other graphical events
WindowEvent::RedrawRequested => {
let pix = self.pixels.as_mut().unwrap();
let screen = pix.frame_mut();
for (addr, ubgr) in { cpu::SHARED.screen_buf.iter().enumerate() } {
let ubgr = ubgr.load(std::sync::atomic::Ordering::Relaxed);
let rgba = [ubgr as u8, (ubgr >> 8) as u8, (ubgr >> 16) as u8, 0xff];
for i in 0..4 {
screen[addr * 4 + i] = rgba[i];
}
}
if let Err(_) = pix.render() {
elwt.exit();
return;
}
}
WindowEvent::Resized(size) => {
if self
.pixels
.as_mut()
.unwrap()
.resize_surface(size.width, size.height)
.is_err()
{
println!("Error while resising pixels, exiting!");
elwt.exit();
return;
}
self.w.as_ref().unwrap().request_redraw();
}
WindowEvent::CloseRequested => {
elwt.exit();
return;
}
WindowEvent::Destroyed => {
println!("Windows destroyed, exiting!");
elwt.exit();
return;
}
//do nothing for the other events
_ => {}
}
}
fn new_events(
&mut self,
event_loop: &winit::event_loop::ActiveEventLoop,
cause: winit::event::StartCause,
) {
match cause {
winit::event::StartCause::ResumeTimeReached {
requested_resume, ..
} => {
let mut next = requested_resume + Duration::from_secs_f64(1. / 60.);
let now = Instant::now();
if next < now {
next = now + Duration::from_secs_f64(1. / 30.);
}
event_loop.set_control_flow(winit::event_loop::ControlFlow::WaitUntil(next));
if let Some(w) = self.w.as_ref() {
w.request_redraw();
}
}
winit::event::StartCause::WaitCancelled { .. } => {}
winit::event::StartCause::Poll => {
let next = Instant::now() + Duration::from_secs_f64(1. / 60.);
event_loop.set_control_flow(winit::event_loop::ControlFlow::WaitUntil(next));
}
winit::event::StartCause::Init => {
let next = Instant::now() + Duration::from_secs_f64(1. / 60.);
event_loop.set_control_flow(winit::event_loop::ControlFlow::WaitUntil(next));
}
}
}
}
fn main() -> Result<(), Error> {
let event_loop = EventLoop::new().unwrap();
let mut input = WinitInputHelper::new();
let window = {
let size = LogicalSize::new((WIDTH * 3) as f64, (HEIGHT * 3) as f64);
#[allow(deprecated)]
Arc::new(
event_loop
.create_window(
Window::default_attributes()
.with_title("bisare screen")
.with_inner_size(size)
.with_min_inner_size(size),
)
.unwrap(),
)
// let mut input = WinitInputHelper::new();
let mut app = App {
w: None,
pixels: None,
};
let pixels = Mutex::new({
let window_size = window.inner_size();
let surface_texture = SurfaceTexture::new(window_size.width, window_size.height, &window);
Pixels::new(WIDTH, HEIGHT, surface_texture)?
});
let keyboard = AtomicU32::new(0);
let program = args()
.nth(1)
.expect("you must supply the exec name as the first argument");
let kbref = &keyboard;
let pixelref = &pixels;
scope(|sc| {
sc.spawn(|| {
let mut simulation = Computer::new(program, pixelref, kbref);
let mut simulation = Computer::new(program);
loop {
simulation.step();
}
});
#[allow(deprecated)]
let res = event_loop.run(|event, elwt| {
match event {
Event::Resumed => {}
Event::NewEvents(_) => input.step(),
Event::AboutToWait => input.end_step(),
Event::DeviceEvent { event, .. } => {
input.process_device_event(&event);
}
Event::WindowEvent { event, .. } => {
// Draw the current frame
if event == WindowEvent::RedrawRequested {
if let Err(_) = pixels.lock().unwrap().render() {
elwt.exit();
return;
}
sleep(Duration::from_millis(30));
}
if let WindowEvent::KeyboardInput {
device_id: _,
ref event,
is_synthetic: _,
} = event
{
match event.state {
winit::event::ElementState::Pressed => {
if let Some(val) = event.physical_key.to_scancode() {
kbref.store(val + 8, std::sync::atomic::Ordering::Relaxed);
}
}
winit::event::ElementState::Released => {
kbref.store(0, std::sync::atomic::Ordering::Relaxed)
}
}
}
// Handle input events
if input.process_window_event(&event) {
// Close events
if input.close_requested() {
elwt.exit();
return;
}
// Resize the window
if let Some(size) = input.window_resized() {
if let Err(_) = pixels
.lock()
.unwrap()
.resize_surface(size.width, size.height)
{
elwt.exit();
return;
}
}
// Update internal state and request a redraw
window.request_redraw();
}
}
_ => {}
}
});
let res = event_loop.run_app(&mut app);
match res {
Ok(_) => exit(0),
Err(e) => {