#![feature(file_buffered)] use std::{ borrow::Cow, collections::HashMap, env::args, io::{BufRead, BufReader, Write, stdin}, ops::Index, process::exit, }; use regex::Regex; #[derive(Clone, Copy)] enum Op2 { Direct(i32), Register(u8), } #[derive(Clone, Copy)] struct Reg(u8); impl Into for Reg { fn into(self) -> u32 { self.0 as u32 } } impl Into for &Reg { fn into(self) -> u32 { self.0 as u32 } } #[derive(Clone, Copy)] enum Cond { Ifeq, Ifne, Iflt, Ifge, Ifgt, Ifle, Ifult, Ifuge, Ifugt, Ifule, } impl From for u32 { fn from(value: Cond) -> Self { match value { Cond::Ifeq => 0b0000, Cond::Ifne => 0b0001, Cond::Iflt => 0b1000, Cond::Ifge => 0b1001, Cond::Ifgt => 0b1010, Cond::Ifle => 0b1011, Cond::Ifult => 0b1100, Cond::Ifuge => 0b1101, Cond::Ifugt => 0b1110, Cond::Ifule => 0b1111, } } } impl From<&Cond> for u32 { fn from(value: &Cond) -> Self { (*value).into() } } impl TryFrom<&str> for Cond { type Error = (); fn try_from(value: &str) -> Result { Ok(match value { "ifeq" => Cond::Ifeq, "ifne" => Cond::Ifne, "iflt" => Cond::Iflt, "ifge" => Cond::Ifge, "ifgt" => Cond::Ifgt, "ifle" => Cond::Ifle, "ifult" => Cond::Ifult, "ifuge" => Cond::Ifuge, "ifugt" => Cond::Ifugt, "ifule" => Cond::Ifule, s => { println!("unrecognised condition: {s}"); return Err(()); } }) } } enum Labeli32 { Value(i32), Label(String), } enum Labeli17 { Register(Reg), Value(i32), LabelLow(String), LabelHigh(String), } enum Instruction { Copy(Reg, Labeli17), Add(Reg, Reg, Labeli17), Sub(Reg, Reg, Op2), Or(Reg, Reg, Op2), And(Reg, Reg, Op2), Xor(Reg, Reg, Op2), Lsl(Reg, Reg, Op2), Lsr(Reg, Reg, Op2), Asr(Reg, Reg, Op2), Smull(Reg, Reg, Op2), Smulh(Reg, Reg, Op2), Umull(Reg, Reg, Op2), Umulh(Reg, Reg, Op2), Div(Reg, Reg, Op2), Mod(Reg, Reg, Op2), Store(Reg, Op2, Reg), Load(Reg, Reg, Op2), Push(Op2), Pop(Reg), Skip(Labeli32, Cond, Reg, Op2), Jump(Labeli32), //address / 4 Call(Labeli32), //address / 4 Ret(), Reti(), Swi(), Dint(), Eint(), GetStack(Reg), SetStack(Op2), Data(Labeli32), } fn encode_reg(fmt: u32, function: u32, d: u32, x: u32, y: u32) -> u32 { (fmt << 30) | (function << 24) | (d << 20) | (x << 16) | (y << 12) } fn encode_imm(fmt: u32, function: u32, d: u32, x: u32, c: i32) -> u32 { let c_sign = if c < 0 { 1 } else { 0 }; let c_mask = c as u32 & 0xFFFF; (fmt << 30) | (c_sign << 29) | (1 << 28) | (function << 24) | (d << 20) | (x << 16) | (c_mask << 0) } fn encode_op2(fmt: u32, function: u32, d: u32, x: u32, y_or_c: Op2) -> u32 { match y_or_c { Op2::Direct(c) => encode_imm(fmt, function, d, x, c), Op2::Register(y) => encode_reg(fmt, function, d, x, y as u32), } } impl Instruction { fn encode(&self, prgm: &Program, self_addr: u32) -> Result { Ok(match self { Instruction::Copy(dest, value) => match value { Labeli17::Value(v) => encode_imm(1, 0, dest.into(), 0, *v), Labeli17::LabelLow(label) => { encode_imm(1, 0, dest.into(), 0, (prgm[label] & 0xFFFF) as i32) } Labeli17::LabelHigh(label) => { encode_imm(1, 0, dest.into(), 0, (prgm[label] as u32 >> 16) as i32) } Labeli17::Register(reg) => encode_reg(1, 0, dest.into(), 0, reg.into()), }, Instruction::Add(reg, reg1, value) => match value { Labeli17::Register(regy) => encode_reg(1, 1, reg.into(), reg1.into(), regy.into()), Labeli17::Value(v) => encode_imm(1, 1, reg.into(), reg1.into(), *v), Labeli17::LabelLow(l) => { encode_imm(1, 1, reg.into(), reg1.into(), (prgm[l] & 0xFFFF) as i32) } Labeli17::LabelHigh(l) => { encode_imm(1, 1, reg.into(), reg1.into(), (prgm[l] as u32 >> 16) as i32) } }, Instruction::Sub(reg, reg1, op2) => encode_op2(01, 2, reg.into(), reg1.into(), *op2), Instruction::Or(reg, reg1, op2) => encode_op2(01, 3, reg.into(), reg1.into(), *op2), Instruction::And(reg, reg1, op2) => encode_op2(01, 4, reg.into(), reg1.into(), *op2), Instruction::Xor(reg, reg1, op2) => encode_op2(01, 5, reg.into(), reg1.into(), *op2), Instruction::Lsl(reg, reg1, op2) => encode_op2(01, 6, reg.into(), reg1.into(), *op2), Instruction::Lsr(reg, reg1, op2) => encode_op2(01, 7, reg.into(), reg1.into(), *op2), Instruction::Asr(reg, reg1, op2) => encode_op2(01, 8, reg.into(), reg1.into(), *op2), Instruction::Smull(reg, reg1, op2) => encode_op2(01, 9, reg.into(), reg1.into(), *op2), Instruction::Smulh(reg, reg1, op2) => encode_op2(01, 10, reg.into(), reg1.into(), *op2), Instruction::Umull(reg, reg1, op2) => encode_op2(01, 11, reg.into(), reg1.into(), *op2), Instruction::Umulh(reg, reg1, op2) => encode_op2(01, 12, reg.into(), reg1.into(), *op2), Instruction::Div(reg, reg1, op2) => encode_op2(01, 13, reg.into(), reg1.into(), *op2), Instruction::Mod(reg, reg1, op2) => encode_op2(01, 14, reg.into(), reg1.into(), *op2), Instruction::Store(addr, op2, val) => encode_op2(2, 0, val.into(), addr.into(), *op2), Instruction::Load(dest, addr, op2) => encode_op2(2, 1, dest.into(), addr.into(), *op2), Instruction::Push(op2) => encode_op2(2, 2, 0, 0, *op2), Instruction::Pop(reg) => encode_reg(2, 3, reg.into(), 0, 0), Instruction::Skip(labeli32, cond, reg, op2) => { let jump_distance = match labeli32 { Labeli32::Value(v) => *v as u32, Labeli32::Label(label) => { let dest = prgm[label]; dest.wrapping_sub(self_addr + 4) / 4 } }; if jump_distance > 15 { println!("Error, cannot skip more than 15 instructions"); return Err(()); } encode_op2(11, cond.into(), jump_distance, reg.into(), *op2) } Instruction::Jump(labeli32) => { let dest = match labeli32 { Labeli32::Value(v) => *v as u32, Labeli32::Label(label) => { let dest = prgm[label]; dest.wrapping_sub(self_addr) >> 2 } }; dest & 0x1FFF_FFFF } Instruction::Call(labeli32) => { let dest = match labeli32 { Labeli32::Value(v) => *v as u32, Labeli32::Label(label) => { let dest = prgm[label]; dest.wrapping_sub(self_addr) >> 2 } }; (dest & 0x1FFF_FFFF) | (1 << 29) } Instruction::Ret() => encode_reg(2, 0b1000, 0, 0, 0), Instruction::Reti() => encode_reg(2, 0b1000, 0, 0, 0) | 1 << 29, Instruction::Swi() => encode_reg(2, 0b1001, 0, 0, 0), Instruction::Dint() => encode_reg(2, 0b1011, 0, 0, 0), Instruction::Eint() => encode_reg(2, 0b1100, 0, 0, 0), Instruction::GetStack(reg) => encode_reg(2, 0b1101, reg.into(), 0, 0), Instruction::SetStack(op2) => encode_op2(2, 0b1110, 0, 0, *op2), Instruction::Data(labeli32) => match labeli32 { Labeli32::Value(v) => *v as u32, Labeli32::Label(label) => prgm[label], }, }) } } struct Program { locations: HashMap, instructions: Vec<(Instruction, usize)>, } impl Index<&String> for &Program { type Output = u32; fn index(&self, index: &String) -> &Self::Output { match self.locations.get(index) { Some(v) => v, None => { println!("Could not resolve label {index}"); exit(1); } } } } fn reg(s: &str, n: usize, l: &str) -> Reg { match s { "r0" => Reg(0), "r1" => Reg(1), "r2" => Reg(2), "r3" => Reg(3), "r4" => Reg(4), "r5" => Reg(5), "r6" => Reg(6), "r7" => Reg(7), "r8" => Reg(8), "r9" => Reg(9), "r10" => Reg(10), "r11" => Reg(11), "r12" => Reg(12), "r13" => Reg(13), "r14" => Reg(14), "r15" => Reg(15), _ => { println!("Error: illegal register name: {s}. Error in {l} at line {n}"); exit(1) } } } fn _reg(s: &str, n: usize, l: &str) -> Reg { reg(s, n, l) } fn op2(s: &str, n: usize, l: &str) -> Op2 { Op2::Register(match s { "r0" => 0, "r1" => 1, "r2" => 2, "r3" => 3, "r4" => 4, "r5" => 5, "r6" => 6, "r7" => 7, "r8" => 8, "r9" => 9, "r10" => 10, "r11" => 11, "r12" => 12, "r13" => 13, "r14" => 14, "r15" => 15, _ => match parse_int::parse::(s) { Ok(v) => { if -0x10000 <= v && v <= 0xFFFF { return Op2::Direct(v as i32); } else { println!("Error: constant {s} is too large to fit in {l} at line {n}"); exit(1) } } Err(_) => { println!( "Error: neither a register name nor a constant: {s}. Error in {l} at line {n}" ); exit(1) } }, }) } fn memaddr<'a>(n: usize, l: &'a str, rgx: &RegexCollection) -> (Reg, Op2, Cow<'a, str>) { let f = || { println!("invalid memory operand in {l} at line {n}"); exit(1); }; let (rx, op) = match rgx.memop.captures(l) { Some(c) => { let rx = reg(c.get(1).unwrap().as_str(), n, l); let sign = c.get(2); match sign { Some(s) => { let sign = s.as_str(); let op = op2(c.get(3).unwrap().as_str(), n, l); match (sign, op) { ("+", o) => (rx, o), ("-", Op2::Direct(v)) => (rx, Op2::Direct(-v)), _ => f(), } } None => (rx, Op2::Direct(0)), } } None => f(), }; let replace = rgx.memop.replace(l, "0"); (rx, op, replace) } struct RegexCollection { label: Regex, memop: Regex, } fn process_line(prgm: &mut Program, (linenum, line): (usize, String), rgx: &RegexCollection) { let linenum = linenum + 1; let mut no_comment = line.split(';').next().unwrap_or(""); let binding = no_comment.to_ascii_lowercase(); no_comment = binding.as_str(); let comma = no_comment.find(':'); match comma { Some(index) => { let (label, next) = no_comment.split_at(index); let label = label.trim(); if !rgx.label.is_match(label) { println!("The label {label} on line {linenum} is illegal") } prgm.locations .insert(label.to_string(), (prgm.instructions.len() * 4) as u32); no_comment = &next[1..] } None => {} } let mut words = no_comment.split_ascii_whitespace(); let mnemonic = words.next(); let args: Vec<&str> = words.collect(); if mnemonic.is_none() { return; } let reg = |i| reg(args[i], linenum, &line); let op2 = |i| op2(args[i], linenum, &line); let labi32 = |i| { if rgx.label.is_match(args[i]) { Labeli32::Label((args[i] as &str).to_string()) } else { Labeli32::Value(parse_int::parse::(args[i]).unwrap_or_else(|e| { println!( "Malformated lable/constant: '{}' in {line} at line {linenum}: {e}", args[i] ); exit(1); }) as i32) } }; let chk = |n| { if args.len() != n { println!( "Wrong number of argument for {} in {line} at line {linenum}", mnemonic.unwrap() ); exit(1); } }; let next = match mnemonic.unwrap() { "copy" => { chk(2); if args[0] == "sp" { Instruction::SetStack(op2(1)) } else if args[1] == "sp" { Instruction::GetStack(reg(0)) } else { Instruction::Copy( reg(0), match op2(1) { Op2::Direct(v) => Labeli17::Value(v), Op2::Register(r) => Labeli17::Register(Reg(r)), }, ) } } "add" => { chk(3); Instruction::Add( reg(0), reg(1), match op2(2) { Op2::Direct(v) => Labeli17::Value(v), Op2::Register(r) => Labeli17::Register(Reg(r)), }, ) } "sub" => { chk(3); Instruction::Sub(reg(0), reg(1), op2(2)) } "or" => { chk(3); Instruction::Or(reg(0), reg(1), op2(2)) } "and" => { chk(3); Instruction::And(reg(0), reg(1), op2(2)) } "xor" => { chk(3); Instruction::Xor(reg(0), reg(1), op2(2)) } "lsl" => { chk(3); Instruction::Lsl(reg(0), reg(1), op2(2)) } "lsr" => { chk(3); Instruction::Lsr(reg(0), reg(1), op2(2)) } "smull" => { chk(3); Instruction::Smull(reg(0), reg(1), op2(2)) } "smulh" => { chk(3); Instruction::Smulh(reg(0), reg(1), op2(2)) } "umull" => { chk(3); Instruction::Umull(reg(0), reg(1), op2(2)) } "umulh" => { chk(3); Instruction::Umulh(reg(0), reg(1), op2(2)) } "mod" => { chk(3); Instruction::Mod(reg(0), reg(1), op2(2)) } "div" => { chk(3); Instruction::Div(reg(0), reg(1), op2(2)) } "asr" => { chk(3); Instruction::Asr(reg(0), reg(1), op2(2)) } "load" => { let (rx, op2, new_line) = memaddr(linenum, no_comment, rgx); let args: Vec<_> = new_line.split_ascii_whitespace().collect(); if args.len() != 3 { println!("wrong number of args in {line} at line {linenum}"); exit(1); } Instruction::Load(_reg(args[1], linenum, &line), rx, op2) } "store" => { let (rx, op2, new_line) = memaddr(linenum, no_comment, rgx); let args: Vec<_> = new_line.split_ascii_whitespace().collect(); if args.len() != 3 { println!("wrong number of args in {line} at line {linenum}"); exit(1); } Instruction::Store(rx, op2, _reg(args[2], linenum, &line)) } "push" => { chk(1); Instruction::Push(op2(0)) } "pop" => { chk(1); Instruction::Pop(reg(0)) } "skip" => { chk(4); let d = labi32(0); match args[1].try_into() { Ok(c) => Instruction::Skip(d, c, reg(2), op2(3)), Err(_) => { println!("in {line} at line {linenum}"); exit(1) } } } "skipto" => { chk(4); let d = labi32(0); match args[1].try_into() { Ok(c) => Instruction::Skip(d, c, reg(2), op2(3)), Err(_) => { println!("in {line} at line {linenum}"); exit(1) } } } "jump" => { chk(1); Instruction::Jump(labi32(0)) } "call" => { chk(1); Instruction::Call(labi32(0)) } "ret" => { chk(0); Instruction::Ret() } "reti" => { chk(0); Instruction::Reti() } "eint" => { chk(0); Instruction::Eint() } "dint" => { chk(0); Instruction::Dint() } "halt" => { chk(0); Instruction::Jump(Labeli32::Value(0)) } "nop" => { chk(0); Instruction::Add(Reg(0), Reg(0), Labeli17::Value(0)) } "not" => { chk(2); Instruction::Xor(reg(0), reg(1), Op2::Direct(-1)) } "swi" => { chk(0); Instruction::Swi() } "let" => { chk(2); let r = reg(0); match labi32(1) { Labeli32::Label(l) => { prgm.instructions.push(( Instruction::Copy(r, Labeli17::LabelHigh(l.clone())), linenum, )); prgm.instructions .push((Instruction::Lsl(r, r, Op2::Direct(16)), linenum)); Instruction::Add(r, r, Labeli17::LabelLow(l)) } Labeli32::Value(v) => { if -0xFFFF <= v && v <= 0xFFFF { Instruction::Copy(r, Labeli17::Value(v)) } else { prgm.instructions.push(( Instruction::Copy(r, Labeli17::Value((v as u32 >> 16) as i32)), linenum, )); prgm.instructions .push((Instruction::Lsl(r, r, Op2::Direct(16)), linenum)); Instruction::Add(r, r, Labeli17::Value(v & 0xFFFF)) } } } } "d" => { chk(1); Instruction::Data(labi32(0)) } m => { println!("Unknown mnemnonic {m} in {line} at line {linenum}"); exit(1); } }; prgm.instructions.push((next, linenum)); } const HELP: &str = "usage: cargo run --release INPUT [OUTPUT] where INPUT is a file name or - for stdin and the optional OUTPUT is a output file name if stdin is used and no output is used, out.bin is used"; fn main() { let progname = args().nth(1).expect(HELP); let outname = args().nth(2); let mut prgm = Program { locations: HashMap::new(), instructions: Vec::new(), }; let rgx = RegexCollection { label: Regex::new("^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap(), memop: Regex::new(r"\[ *(r[0-9]{1,2}) *(?:(\+|-) *(r[0-9]{1,2}|0x[0-9a-f]*|[0-9]*) *)?\]") .unwrap(), }; if progname == "-" { for line in stdin() .lines() .map(|x| x.expect("error while reading stdin")) .enumerate() { process_line(&mut prgm, line, &rgx); } } else { let infile = std::fs::File::open(progname.as_str()).expect("Cannot open input file"); for line in BufReader::new(infile) .lines() .map(|x| x.expect("Erro while reading input file")) .enumerate() { process_line(&mut prgm, line, &rgx); } } let outname = match outname { Some(s) => s, None => { if progname == "-" { "out.bin".to_string() } else { progname .strip_suffix(".asm") .unwrap_or(progname.as_str()) .to_string() + ".bin" } } }; let mut outfile = std::fs::File::create_buffered(outname).expect("could not open out file"); for (num, (instruction, linenum)) in prgm.instructions.iter().enumerate() { let v = instruction.encode(&prgm, num as u32 * 4); match v { Ok(v) => writeln!(outfile, "{v:08x}").expect("error writing"), Err(_) => { println!("in source at line {linenum}"); exit(1); } } } if prgm.locations.len() != 0 { writeln!(outfile, "SYMBOL TABLE:").expect("error writing"); } let mut assocs: Vec<_> = prgm.locations.iter().collect(); assocs.sort_by_key(|(_, v)| **v); for (s, v) in assocs { writeln!(outfile, "{v:08x} {s}").expect("error writing"); } }