Skip to main content

aarch64_sim/
lib.rs

1//! Tiny AArch64 multi-core simulator.
2//!
3//! v0.1 — MOVZ, ADD (imm), LDR/STR (unsigned offset), B + MMIO UART.
4//! v0.2 — Stage-1 MMU translation walk (4 KiB granule, 39-bit VA).
5//! v0.3 — MSR/MRS + SCTLR_EL1.M=1 routes fetch/load/store through MMU.
6//! v0.4 — EL2 boot, ERET drops to EL1.
7//! v0.5 — SVC + ESR_EL1 (full EL0 ↔ EL1 syscall round trip).
8//! v0.6 — Two cores (P-core / E-core) sharing physical memory + UART.
9//!        Per-core register file, EL state, and sysregs (incl. MPIDR_EL1).
10
11use serde::Serialize;
12
13#[cfg(feature = "wasm")]
14use wasm_bindgen::prelude::wasm_bindgen;
15
16#[cfg(feature = "wasm")]
17mod wasm;
18
19const MEM_SIZE: usize = 0x10000;
20const UART_OUT: u64 = 0x1000;
21const ENTRY_PC: u64 = 0x4000;
22
23// Demo page-table layout (4 KiB granule, T0SZ=25 → 39-bit VA, start at level 1).
24const L1_TABLE_PA: u64 = 0x8000;
25const L2_TABLE_PA: u64 = 0x9000;
26const L3_TABLE_PA: u64 = 0xA000;
27
28const NUM_CORES: usize = 2;
29
30// MPIDR_EL1 values per core. Bit 31 is RES1 in ARMv8. We model two clusters:
31// core 0 in cluster 0 (P-core, Aff1=0), core 1 in cluster 1 (E-core, Aff1=1).
32const MPIDR_VALUES: [u64; NUM_CORES] = [0x8000_0000, 0x8000_0100];
33const CORE_KIND: [&str; NUM_CORES] = ["P-core", "E-core"];
34
35// AIC timer: fires an IRQ on every core every TIMER_PERIOD system steps.
36// Sized so the per-core kernel boot (~35 inst) completes before the first
37// tick, with room for several task iterations between ticks.
38const TIMER_PERIOD: u64 = 80;
39
40// AIC MMIO layout (loosely modelled on Apple's per-core AIC view): software
41// reads from one MMIO base and the controller routes the call by which core
42// issued it. We expose two registers:
43//   AIC_BASE + 0x00  → ACK (read-only): returns the lowest pending IRQ id for
44//                      the calling core and clears that bit. 0xFFFF_FFFF when
45//                      nothing pending.
46//   AIC_BASE + 0x10  → IPI_SET (write-only): target core id; raises IRQ_IPI on
47//                      that core.
48const AIC_BASE: u64 = 0x2000;
49const AIC_END: u64 = 0x2100;
50const AIC_REG_ACK: u64 = 0x00;
51const AIC_REG_IPI_SET: u64 = 0x10;
52
53const IRQ_TIMER: u32 = 0;
54const IRQ_IPI: u32 = 1;
55const IRQ_NONE: u32 = 0xFFFF_FFFF;
56
57// Block-device MMIO. Loosely virtio-blk-shaped but with a fixed-size 64-byte
58// sector and a synchronous "write CMD → transfer happens before the STR
59// returns". 8 sectors × 64 bytes = 512 bytes total disk image.
60const BLK_BASE: u64 = 0x3000;
61const BLK_END: u64 = 0x3100;
62const BLK_REG_SECTOR: u64 = 0x00;
63const BLK_REG_BUF_ADDR: u64 = 0x08;
64const BLK_REG_CMD: u64 = 0x10;
65const BLK_REG_STATUS: u64 = 0x18;
66const SECTOR_SIZE: u64 = 64;
67const NUM_SECTORS: u64 = 8;
68const DISK_SIZE: u64 = SECTOR_SIZE * NUM_SECTORS;
69const BLK_CMD_READ: u64 = 0;
70const BLK_CMD_WRITE: u64 = 1;
71const BLK_STATUS_IDLE: u64 = 0;
72const BLK_STATUS_OK: u64 = 1;
73const BLK_STATUS_FAULT: u64 = 2;
74
75#[derive(Serialize, Clone)]
76pub struct CoreState {
77    pub id: u8,
78    pub kind: String,
79    pub mpidr: u64,
80    pub x: [u64; 31],
81    pub sp: u64,
82    pub pc: u64,
83    pub nzcv: u8,
84    pub halted: bool,
85    pub last_trap: Option<String>,
86    pub steps: u64,
87    pub current_el: u8,
88    pub daif: u8,
89    pub wfi_halted: bool,
90    pub ttbr0_el1: u64,
91    pub tcr_el1: u64,
92    pub sctlr_el1: u64,
93    pub vbar_el1: u64,
94    pub elr_el1: u64,
95    pub spsr_el1: u64,
96    pub esr_el1: u64,
97    pub vbar_el2: u64,
98    pub elr_el2: u64,
99    pub spsr_el2: u64,
100    pub esr_el2: u64,
101}
102
103#[derive(Serialize, Clone)]
104pub struct PageAttrs {
105    pub af: bool,
106    pub ap: u8,
107    pub attr_idx: u8,
108    pub sh: u8,
109}
110
111#[derive(Serialize, Clone)]
112#[serde(tag = "kind")]
113pub enum WalkOutcome {
114    Table { next_table: u64 },
115    Page { pa: u64, attrs: PageAttrs },
116    Block { pa: u64, attrs: PageAttrs, span: u64 },
117    Invalid,
118    Fault { reason: String },
119}
120
121#[derive(Serialize, Clone)]
122pub struct WalkStep {
123    pub level: u8,
124    pub table_addr: u64,
125    pub index: u32,
126    pub entry_addr: u64,
127    pub descriptor: u64,
128    pub outcome: WalkOutcome,
129}
130
131#[derive(Serialize, Clone)]
132pub struct TranslationResult {
133    pub va: u64,
134    pub steps: Vec<WalkStep>,
135    pub pa: Option<u64>,
136    pub fault: Option<String>,
137    pub mmu_enabled: bool,
138}
139
140#[derive(Serialize, Clone)]
141pub struct AicState {
142    /// Per-core pending bitmap. Bit 0 = IRQ_TIMER, bit 1 = IRQ_IPI.
143    pub pending: Vec<u32>,
144    pub total_acks: u64,
145    /// Cumulative number of IPIs the AIC has dispatched (every successful
146    /// MMIO write to AIC_REG_IPI_SET counts once).
147    pub total_ipis: u64,
148    /// Index of the most recent IPI's target core, or None if no IPI yet.
149    /// Cleared back to None at reset.
150    pub last_ipi_target: Option<u32>,
151}
152
153// === Aic: tiny Apple-style interrupt controller ===============================
154
155struct Aic {
156    pending: [u32; NUM_CORES],
157    total_acks: u64,
158    total_ipis: u64,
159    last_ipi_target: Option<u32>,
160}
161
162impl Aic {
163    fn new() -> Self {
164        Aic {
165            pending: [0; NUM_CORES],
166            total_acks: 0,
167            total_ipis: 0,
168            last_ipi_target: None,
169        }
170    }
171
172    fn reset(&mut self) {
173        for p in self.pending.iter_mut() {
174            *p = 0;
175        }
176        self.total_acks = 0;
177        self.total_ipis = 0;
178        self.last_ipi_target = None;
179    }
180
181    fn set_irq(&mut self, core: usize, irq_id: u32) {
182        if core < NUM_CORES && irq_id < 32 {
183            self.pending[core] |= 1u32 << irq_id;
184        }
185    }
186
187    fn has_pending(&self, core: usize) -> bool {
188        core < NUM_CORES && self.pending[core] != 0
189    }
190
191    /// MMIO ACK read by `core`: lowest pending IRQ id, clears it. Returns
192    /// IRQ_NONE if nothing pending.
193    fn read_ack(&mut self, core: usize) -> u32 {
194        if core >= NUM_CORES || self.pending[core] == 0 {
195            return IRQ_NONE;
196        }
197        let irq = self.pending[core].trailing_zeros();
198        self.pending[core] &= !(1u32 << irq);
199        self.total_acks = self.total_acks.saturating_add(1);
200        irq
201    }
202
203    fn mmio_read(&mut self, core: usize, offset: u64) -> u64 {
204        match offset {
205            AIC_REG_ACK => self.read_ack(core) as u64,
206            _ => 0,
207        }
208    }
209
210    fn mmio_write(&mut self, _core: usize, offset: u64, val: u64) {
211        match offset {
212            AIC_REG_IPI_SET => {
213                let target = val as u32;
214                if (target as usize) < NUM_CORES {
215                    self.set_irq(target as usize, IRQ_IPI);
216                    self.total_ipis = self.total_ipis.saturating_add(1);
217                    self.last_ipi_target = Some(target);
218                }
219            }
220            _ => {}
221        }
222    }
223
224    fn snapshot(&self) -> AicState {
225        AicState {
226            pending: self.pending.to_vec(),
227            total_acks: self.total_acks,
228            total_ipis: self.total_ipis,
229            last_ipi_target: self.last_ipi_target,
230        }
231    }
232}
233
234#[derive(Serialize, Clone)]
235pub struct BlockState {
236    pub sector: u64,
237    pub buf_addr: u64,
238    pub last_command: u64,
239    pub status: u64,
240    pub total_reads: u64,
241    pub total_writes: u64,
242    pub disk: Vec<u8>,
243}
244
245// === Block: a tiny synchronous "virtio-blk"-shaped device ====================
246
247struct Block {
248    disk: Vec<u8>,
249    sector: u64,
250    buf_addr: u64,
251    last_command: u64,
252    status: u64,
253    total_reads: u64,
254    total_writes: u64,
255}
256
257impl Block {
258    fn new() -> Self {
259        let mut disk = vec![0u8; DISK_SIZE as usize];
260        // Pre-populate sectors with text so a read produces visible content.
261        let inscribe = |b: &mut [u8], sector: u64, text: &str| {
262            let off = (sector * SECTOR_SIZE) as usize;
263            let bytes = text.as_bytes();
264            let n = bytes.len().min(SECTOR_SIZE as usize);
265            b[off..off + n].copy_from_slice(&bytes[..n]);
266        };
267        inscribe(&mut disk, 0, "AArch64 disk image - sector 0\n");
268        inscribe(&mut disk, 1, "Sector 1: kernels run on top of devices\n");
269        inscribe(&mut disk, 2, "Sector 2: virtio is the lingua franca\n");
270        inscribe(&mut disk, 3, "Sector 3: this is a 64-byte sector\n");
271        Self {
272            disk,
273            sector: 0,
274            buf_addr: 0,
275            last_command: 0,
276            status: BLK_STATUS_IDLE,
277            total_reads: 0,
278            total_writes: 0,
279        }
280    }
281
282    fn reset(&mut self) {
283        // Preserve the disk image (the user's textarea edits live there) and
284        // only clear ephemeral controller state.
285        self.sector = 0;
286        self.buf_addr = 0;
287        self.last_command = 0;
288        self.status = BLK_STATUS_IDLE;
289        self.total_reads = 0;
290        self.total_writes = 0;
291    }
292
293    fn mmio_read(&self, offset: u64) -> u64 {
294        match offset {
295            BLK_REG_SECTOR => self.sector,
296            BLK_REG_BUF_ADDR => self.buf_addr,
297            BLK_REG_CMD => self.last_command,
298            BLK_REG_STATUS => self.status,
299            _ => 0,
300        }
301    }
302
303    fn mmio_write(&mut self, mem: &mut [u8], offset: u64, val: u64) {
304        match offset {
305            BLK_REG_SECTOR => self.sector = val,
306            BLK_REG_BUF_ADDR => self.buf_addr = val,
307            BLK_REG_CMD => {
308                self.last_command = val;
309                let sec = self.sector as usize * SECTOR_SIZE as usize;
310                let buf = self.buf_addr as usize;
311                let len = SECTOR_SIZE as usize;
312                let in_disk = sec + len <= self.disk.len();
313                let in_mem = buf + len <= mem.len();
314                if !in_disk || !in_mem {
315                    self.status = BLK_STATUS_FAULT;
316                    return;
317                }
318                match val {
319                    BLK_CMD_READ => {
320                        mem[buf..buf + len].copy_from_slice(&self.disk[sec..sec + len]);
321                        self.status = BLK_STATUS_OK;
322                        self.total_reads = self.total_reads.saturating_add(1);
323                    }
324                    BLK_CMD_WRITE => {
325                        self.disk[sec..sec + len].copy_from_slice(&mem[buf..buf + len]);
326                        self.status = BLK_STATUS_OK;
327                        self.total_writes = self.total_writes.saturating_add(1);
328                    }
329                    _ => {
330                        self.status = BLK_STATUS_FAULT;
331                    }
332                }
333            }
334            _ => {}
335        }
336    }
337
338    fn snapshot(&self) -> BlockState {
339        BlockState {
340            sector: self.sector,
341            buf_addr: self.buf_addr,
342            last_command: self.last_command,
343            status: self.status,
344            total_reads: self.total_reads,
345            total_writes: self.total_writes,
346            disk: self.disk.clone(),
347        }
348    }
349}
350
351// === Core: per-core register file + EL state + sysregs ===========================
352
353struct Core {
354    id: u8,
355    mpidr: u64,
356    x: [u64; 31],
357    sp: u64,
358    pc: u64,
359    nzcv: u8,
360    halted: bool,
361    last_trap: Option<String>,
362    steps: u64,
363    current_el: u8,
364    /// Low 4 bits of PSTATE.DAIF — D, A, I, F (bit 3 → bit 0). 1 = masked.
365    /// On reset (EL2) we mask everything; SVC/IRQ entries also re-mask.
366    daif: u8,
367    /// Set by WFI: the core stops fetching until an unmasked IRQ wakes it.
368    /// Cleared when take_irq runs.
369    wfi_halted: bool,
370    /// Address reserved by the most-recent LDXR. STXR succeeds only while
371    /// this matches; cleared by CLREX, by IRQ entry, and by a store from
372    /// any other core to the same address.
373    exclusive_monitor: Option<u64>,
374    /// PA of the last store this core performed in this step. Cpu::step
375    /// reads it and invalidates the matching monitor on the other cores.
376    last_store_pa: Option<u64>,
377    ttbr0_el1: u64,
378    tcr_el1: u64,
379    sctlr_el1: u64,
380    vbar_el1: u64,
381    elr_el1: u64,
382    spsr_el1: u64,
383    esr_el1: u64,
384    vbar_el2: u64,
385    elr_el2: u64,
386    spsr_el2: u64,
387    esr_el2: u64,
388}
389
390enum StepResult {
391    Continue,
392    Halt,
393}
394
395impl Core {
396    fn new(id: u8, mpidr: u64) -> Self {
397        Core {
398            id,
399            mpidr,
400            x: [0; 31],
401            sp: 0,
402            pc: ENTRY_PC,
403            nzcv: 0,
404            halted: false,
405            last_trap: None,
406            steps: 0,
407            current_el: 2,
408            daif: 0xF, // boot at EL2 with all interrupts masked
409            wfi_halted: false,
410            exclusive_monitor: None,
411            last_store_pa: None,
412            ttbr0_el1: 0,
413            tcr_el1: 0,
414            sctlr_el1: 0,
415            vbar_el1: 0,
416            elr_el1: 0,
417            spsr_el1: 0,
418            esr_el1: 0,
419            vbar_el2: 0,
420            elr_el2: 0,
421            spsr_el2: 0,
422            esr_el2: 0,
423        }
424    }
425
426    fn reset(&mut self) {
427        let saved_id = self.id;
428        let saved_mpidr = self.mpidr;
429        *self = Core::new(saved_id, saved_mpidr);
430    }
431
432    fn snapshot(&self) -> CoreState {
433        CoreState {
434            id: self.id,
435            kind: CORE_KIND[self.id as usize].to_string(),
436            mpidr: self.mpidr,
437            x: self.x,
438            sp: self.sp,
439            pc: self.pc,
440            nzcv: self.nzcv,
441            halted: self.halted,
442            last_trap: self.last_trap.clone(),
443            steps: self.steps,
444            current_el: self.current_el,
445            daif: self.daif,
446            wfi_halted: self.wfi_halted,
447            ttbr0_el1: self.ttbr0_el1,
448            tcr_el1: self.tcr_el1,
449            sctlr_el1: self.sctlr_el1,
450            vbar_el1: self.vbar_el1,
451            elr_el1: self.elr_el1,
452            spsr_el1: self.spsr_el1,
453            esr_el1: self.esr_el1,
454            vbar_el2: self.vbar_el2,
455            elr_el2: self.elr_el2,
456            spsr_el2: self.spsr_el2,
457            esr_el2: self.esr_el2,
458        }
459    }
460
461    fn read_x(&self, idx: usize) -> u64 {
462        if idx == 31 { 0 } else { self.x[idx] }
463    }
464
465    fn write_x(&mut self, idx: usize, val: u64) {
466        if idx < 31 {
467            self.x[idx] = val;
468        }
469    }
470
471    fn trap(&mut self, msg: String) {
472        self.last_trap = Some(msg);
473        self.halted = true;
474    }
475
476    /// Pack the current EL+SP context and DAIF into an SPSR-shaped value, suitable
477    /// for storing in SPSR_EL<x> on exception entry.
478    fn build_spsr(&self) -> u64 {
479        let m_low: u64 = match self.current_el {
480            0 => 0b0000,           // EL0t (no SP_ELx)
481            1 => 0b0101,           // EL1h (uses SP_EL1)
482            2 => 0b1001,           // EL2h
483            _ => 0,
484        };
485        m_low | ((self.daif as u64) << 6)
486    }
487
488    /// Take an asynchronous IRQ exception into EL1, vectoring to VBAR_EL1+0x480.
489    /// Caller is responsible for verifying DAIF.I is clear and irq_pending is set.
490    fn take_irq(&mut self) {
491        self.elr_el1 = self.pc; // resume at the not-yet-fetched instruction
492        self.spsr_el1 = self.build_spsr();
493        self.esr_el1 = 0; // IRQ has no syndrome
494        self.current_el = 1;
495        self.daif = 0xF; // exception entry masks everything
496        self.pc = self.vbar_el1.wrapping_add(0x480);
497        self.exclusive_monitor = None; // exception entry clears the local monitor
498        self.wfi_halted = false;
499    }
500
501    fn irq_masked(&self) -> bool {
502        self.daif & 0b0010 != 0 // I bit (bit 1 of low nibble: D=8, A=4, I=2, F=1)
503    }
504
505    fn read_sysreg(&self, sr: (u32, u32, u32, u32, u32)) -> Result<u64, String> {
506        Ok(match sr {
507            (3, 0, 2, 0, 0) => self.ttbr0_el1,
508            (3, 0, 2, 0, 2) => self.tcr_el1,
509            (3, 0, 1, 0, 0) => self.sctlr_el1,
510            (3, 0, 12, 0, 0) => self.vbar_el1,
511            (3, 0, 4, 0, 0) => self.spsr_el1,
512            (3, 0, 4, 0, 1) => self.elr_el1,
513            (3, 0, 5, 2, 0) => self.esr_el1,
514            (3, 4, 12, 0, 0) => self.vbar_el2,
515            (3, 4, 4, 0, 0) => self.spsr_el2,
516            (3, 4, 4, 0, 1) => self.elr_el2,
517            (3, 4, 5, 2, 0) => self.esr_el2,
518            // CurrentEL[3:2] = current_el.
519            (3, 0, 4, 2, 2) => (self.current_el as u64) << 2,
520            // MPIDR_EL1 — read-only, identifies this core.
521            (3, 0, 0, 0, 5) => self.mpidr,
522            _ => return Err(unsupported_sysreg("MRS", sr, self.pc)),
523        })
524    }
525
526    fn write_sysreg(&mut self, sr: (u32, u32, u32, u32, u32), val: u64) -> Result<(), String> {
527        match sr {
528            (3, 0, 2, 0, 0) => self.ttbr0_el1 = val,
529            (3, 0, 2, 0, 2) => self.tcr_el1 = val,
530            (3, 0, 1, 0, 0) => self.sctlr_el1 = val,
531            (3, 0, 12, 0, 0) => self.vbar_el1 = val,
532            (3, 0, 4, 0, 0) => self.spsr_el1 = val,
533            (3, 0, 4, 0, 1) => self.elr_el1 = val,
534            (3, 0, 5, 2, 0) => self.esr_el1 = val,
535            (3, 4, 12, 0, 0) => self.vbar_el2 = val,
536            (3, 4, 4, 0, 0) => self.spsr_el2 = val,
537            (3, 4, 4, 0, 1) => self.elr_el2 = val,
538            (3, 4, 5, 2, 0) => self.esr_el2 = val,
539            (3, 0, 4, 2, 2) => return Err("MSR to CurrentEL (read-only)".into()),
540            (3, 0, 0, 0, 5) => return Err("MSR to MPIDR_EL1 (read-only)".into()),
541            _ => return Err(unsupported_sysreg("MSR", sr, self.pc)),
542        }
543        Ok(())
544    }
545
546    fn translate_for_access(&self, mem: &[u8], va: u64) -> Result<u64, String> {
547        if self.sctlr_el1 & 1 == 0 {
548            return Ok(va);
549        }
550        let r = self.do_translate(mem, va);
551        let pa = r.pa.ok_or_else(|| r.fault.clone().unwrap_or_else(|| "MMU fault".into()))?;
552
553        // AP-bit enforcement: at EL0, the leaf descriptor's AP[0] must be set
554        // (user-accessible). AP=00 / AP=10 are kernel-only and fault for EL0.
555        // Higher ELs are unrestricted in this toy.
556        if self.current_el == 0 {
557            let ap = match r.steps.last().map(|s| &s.outcome) {
558                Some(WalkOutcome::Page { attrs, .. }) | Some(WalkOutcome::Block { attrs, .. }) => {
559                    attrs.ap
560                }
561                _ => return Ok(pa),
562            };
563            if ap & 1 == 0 {
564                return Err(format!(
565                    "permission fault at VA {:#x} (AP={:#04b}, EL0)",
566                    va, ap
567                ));
568            }
569        }
570        Ok(pa)
571    }
572
573    fn fetch_u32(&self, mem: &[u8], va: u64) -> Result<u32, String> {
574        let pa = self.translate_for_access(mem, va)?;
575        read_pa_u32(mem, pa)
576    }
577
578    fn load64(&self, mem: &[u8], aic: &mut Aic, block: &Block, va: u64) -> Result<u64, String> {
579        let pa = self.translate_for_access(mem, va)?;
580        if (AIC_BASE..AIC_END).contains(&pa) {
581            return Ok(aic.mmio_read(self.id as usize, pa - AIC_BASE));
582        }
583        if (BLK_BASE..BLK_END).contains(&pa) {
584            return Ok(block.mmio_read(pa - BLK_BASE));
585        }
586        read_pa_u64(mem, pa)
587    }
588
589    fn store64(
590        &mut self,
591        mem: &mut [u8],
592        out: &mut Vec<u8>,
593        aic: &mut Aic,
594        block: &mut Block,
595        va: u64,
596        val: u64,
597    ) -> Result<(), String> {
598        let pa = self.translate_for_access(mem, va)?;
599        self.last_store_pa = Some(pa);
600        if (AIC_BASE..AIC_END).contains(&pa) {
601            aic.mmio_write(self.id as usize, pa - AIC_BASE, val);
602            return Ok(());
603        }
604        if (BLK_BASE..BLK_END).contains(&pa) {
605            block.mmio_write(mem, pa - BLK_BASE, val);
606            return Ok(());
607        }
608        if pa == UART_OUT {
609            out.push((val & 0xFF) as u8);
610        }
611        write_pa_u64(mem, pa, val)
612    }
613
614    /// Public translation entry: walk + AP-bit enforcement based on this core's
615    /// current_el. translate_for_access (instruction path) and the JS-callable
616    /// translate() both go through here.
617    fn do_translate(&self, mem: &[u8], va: u64) -> TranslationResult {
618        let mut r = self.do_translate_walk(mem, va);
619        if r.pa.is_some() && self.current_el == 0 {
620            let ap = match r.steps.last().map(|s| &s.outcome) {
621                Some(WalkOutcome::Page { attrs, .. })
622                | Some(WalkOutcome::Block { attrs, .. }) => attrs.ap,
623                _ => return r,
624            };
625            if ap & 1 == 0 {
626                r.fault = Some(format!(
627                    "permission fault at VA {:#x} (AP={:#04b}, EL0)",
628                    va, ap
629                ));
630                r.pa = None;
631            }
632        }
633        r
634    }
635
636    fn do_translate_walk(&self, mem: &[u8], va: u64) -> TranslationResult {
637        let mmu_enabled = self.sctlr_el1 & 1 != 0;
638        let t0sz = self.tcr_el1 & 0x3F;
639        if t0sz == 0 || self.ttbr0_el1 == 0 {
640            return TranslationResult {
641                va,
642                steps: Vec::new(),
643                pa: None,
644                fault: Some("MMU not configured (TTBR0_EL1 or TCR_EL1.T0SZ unset)".into()),
645                mmu_enabled,
646            };
647        }
648
649        let va_bits = 64 - t0sz;
650        let start_level: u8 = if va_bits >= 40 {
651            0
652        } else if va_bits >= 31 {
653            1
654        } else if va_bits >= 22 {
655            2
656        } else {
657            3
658        };
659
660        let mut steps: Vec<WalkStep> = Vec::new();
661        let mut table_addr = self.ttbr0_el1 & 0x0000_FFFF_FFFF_F000;
662        let mut level = start_level;
663
664        loop {
665            let shift = 12 + 9 * (3 - level as u32);
666            let index = ((va >> shift) & 0x1FF) as u32;
667            let entry_addr = table_addr + (index as u64) * 8;
668            let descriptor = match read_pa_u64(mem, entry_addr) {
669                Ok(d) => d,
670                Err(e) => {
671                    steps.push(WalkStep {
672                        level,
673                        table_addr,
674                        index,
675                        entry_addr,
676                        descriptor: 0,
677                        outcome: WalkOutcome::Fault { reason: e.clone() },
678                    });
679                    return TranslationResult {
680                        va,
681                        steps,
682                        pa: None,
683                        fault: Some(e),
684                        mmu_enabled,
685                    };
686                }
687            };
688
689            let valid = descriptor & 1 != 0;
690            let typ = (descriptor >> 1) & 1;
691            if !valid {
692                steps.push(WalkStep {
693                    level,
694                    table_addr,
695                    index,
696                    entry_addr,
697                    descriptor,
698                    outcome: WalkOutcome::Invalid,
699                });
700                return TranslationResult {
701                    va,
702                    steps,
703                    pa: None,
704                    fault: Some(format!(
705                        "translation fault at level {} (invalid descriptor)",
706                        level
707                    )),
708                    mmu_enabled,
709                };
710            }
711
712            if level == 3 {
713                let pa_base = descriptor & 0x0000_FFFF_FFFF_F000;
714                let pa = pa_base | (va & 0xFFF);
715                let attrs = decode_attrs(descriptor);
716                let af = attrs.af;
717                steps.push(WalkStep {
718                    level: 3,
719                    table_addr,
720                    index,
721                    entry_addr,
722                    descriptor,
723                    outcome: WalkOutcome::Page { pa, attrs },
724                });
725                if !af {
726                    return TranslationResult {
727                        va,
728                        steps,
729                        pa: None,
730                        fault: Some("access flag fault".into()),
731                        mmu_enabled,
732                    };
733                }
734                return TranslationResult {
735                    va,
736                    steps,
737                    pa: Some(pa),
738                    fault: None,
739                    mmu_enabled,
740                };
741            }
742
743            if typ == 1 {
744                let next = descriptor & 0x0000_FFFF_FFFF_F000;
745                steps.push(WalkStep {
746                    level,
747                    table_addr,
748                    index,
749                    entry_addr,
750                    descriptor,
751                    outcome: WalkOutcome::Table { next_table: next },
752                });
753                table_addr = next;
754                level += 1;
755            } else {
756                let block_size = 1u64 << shift;
757                let pa_base = descriptor & !(block_size - 1) & 0x0000_FFFF_FFFF_FFFF;
758                let pa = pa_base | (va & (block_size - 1));
759                let attrs = decode_attrs(descriptor);
760                let af = attrs.af;
761                steps.push(WalkStep {
762                    level,
763                    table_addr,
764                    index,
765                    entry_addr,
766                    descriptor,
767                    outcome: WalkOutcome::Block {
768                        pa,
769                        attrs,
770                        span: block_size,
771                    },
772                });
773                if !af {
774                    return TranslationResult {
775                        va,
776                        steps,
777                        pa: None,
778                        fault: Some("access flag fault".into()),
779                        mmu_enabled,
780                    };
781                }
782                return TranslationResult {
783                    va,
784                    steps,
785                    pa: Some(pa),
786                    fault: None,
787                    mmu_enabled,
788                };
789            }
790        }
791    }
792
793    fn step(
794        &mut self,
795        mem: &mut [u8],
796        out: &mut Vec<u8>,
797        aic: &mut Aic,
798        block: &mut Block,
799    ) -> bool {
800        if self.halted {
801            return false;
802        }
803        let pc = self.pc;
804        let insn = match self.fetch_u32(mem, pc) {
805            Ok(v) => v,
806            Err(e) => {
807                self.trap(format!("{e} at pc={:#x}", pc));
808                return false;
809            }
810        };
811        match self.execute(insn, mem, out, aic, block) {
812            Ok(StepResult::Continue) => true,
813            Ok(StepResult::Halt) => {
814                self.halted = true;
815                false
816            }
817            Err(e) => {
818                self.trap(e);
819                false
820            }
821        }
822    }
823
824    fn execute(
825        &mut self,
826        insn: u32,
827        mem: &mut [u8],
828        out: &mut Vec<u8>,
829        aic: &mut Aic,
830        block: &mut Block,
831    ) -> Result<StepResult, String> {
832        self.steps = self.steps.saturating_add(1);
833
834        // MOVZ Xd, #imm16{, LSL #hw*16} :: 1 10 100101 hw imm16 Rd
835        if insn & 0xFF80_0000 == 0xD280_0000 {
836            let rd = (insn & 0x1F) as usize;
837            let hw = ((insn >> 21) & 0x3) as u32;
838            let imm = ((insn >> 5) & 0xFFFF) as u64;
839            self.write_x(rd, imm << (hw * 16));
840            self.pc = self.pc.wrapping_add(4);
841            return Ok(StepResult::Continue);
842        }
843
844        // ADD Xd, Xn, #imm12{, LSL #12} :: 1 00 10001 sh imm12 Rn Rd
845        if insn & 0xFF80_0000 == 0x9100_0000 {
846            let rd = (insn & 0x1F) as usize;
847            let rn = ((insn >> 5) & 0x1F) as usize;
848            let imm12 = ((insn >> 10) & 0xFFF) as u64;
849            let sh = ((insn >> 22) & 0x1) as u32;
850            let imm = if sh == 1 { imm12 << 12 } else { imm12 };
851            let val = self.read_x(rn).wrapping_add(imm);
852            self.write_x(rd, val);
853            self.pc = self.pc.wrapping_add(4);
854            return Ok(StepResult::Continue);
855        }
856
857        // SUB Xd, Xn, #imm12{, LSL #12} :: 1 10 10001 sh imm12 Rn Rd
858        if insn & 0xFF80_0000 == 0xD100_0000 {
859            let rd = (insn & 0x1F) as usize;
860            let rn = ((insn >> 5) & 0x1F) as usize;
861            let imm12 = ((insn >> 10) & 0xFFF) as u64;
862            let sh = ((insn >> 22) & 0x1) as u32;
863            let imm = if sh == 1 { imm12 << 12 } else { imm12 };
864            let val = self.read_x(rn).wrapping_sub(imm);
865            self.write_x(rd, val);
866            self.pc = self.pc.wrapping_add(4);
867            return Ok(StepResult::Continue);
868        }
869
870        // ADD Xd, Xn, Xm  (shifted register, LSL #0) :: 1 0 0 01011 00 0 Rm 000000 Rn Rd
871        if insn & 0xFF20_FC00 == 0x8B00_0000 {
872            let rd = (insn & 0x1F) as usize;
873            let rn = ((insn >> 5) & 0x1F) as usize;
874            let rm = ((insn >> 16) & 0x1F) as usize;
875            let val = self.read_x(rn).wrapping_add(self.read_x(rm));
876            self.write_x(rd, val);
877            self.pc = self.pc.wrapping_add(4);
878            return Ok(StepResult::Continue);
879        }
880
881        // SUB Xd, Xn, Xm  (shifted register, LSL #0) :: 1 1 0 01011 00 0 Rm 000000 Rn Rd
882        if insn & 0xFF20_FC00 == 0xCB00_0000 {
883            let rd = (insn & 0x1F) as usize;
884            let rn = ((insn >> 5) & 0x1F) as usize;
885            let rm = ((insn >> 16) & 0x1F) as usize;
886            let val = self.read_x(rn).wrapping_sub(self.read_x(rm));
887            self.write_x(rd, val);
888            self.pc = self.pc.wrapping_add(4);
889            return Ok(StepResult::Continue);
890        }
891
892        // STR Xt, [Xn, #imm12]
893        if insn & 0xFFC0_0000 == 0xF900_0000 {
894            let rt = (insn & 0x1F) as usize;
895            let rn = ((insn >> 5) & 0x1F) as usize;
896            let imm12 = ((insn >> 10) & 0xFFF) as u64;
897            let addr = self.read_x(rn).wrapping_add(imm12 * 8);
898            let val = self.read_x(rt);
899            self.store64(mem, out, aic, block, addr, val)?;
900            self.pc = self.pc.wrapping_add(4);
901            return Ok(StepResult::Continue);
902        }
903
904        // LDR Xt, [Xn, #imm12]
905        if insn & 0xFFC0_0000 == 0xF940_0000 {
906            let rt = (insn & 0x1F) as usize;
907            let rn = ((insn >> 5) & 0x1F) as usize;
908            let imm12 = ((insn >> 10) & 0xFFF) as u64;
909            let addr = self.read_x(rn).wrapping_add(imm12 * 8);
910            let val = self.load64(mem, aic, block, addr)?;
911            self.write_x(rt, val);
912            self.pc = self.pc.wrapping_add(4);
913            return Ok(StepResult::Continue);
914        }
915
916        // LDRB Wt, [Xn, #imm12]  (byte load, zero-extend) :: 0011 1001 01 imm12 Rn Rt
917        if insn & 0xFFC0_0000 == 0x3940_0000 {
918            let rt = (insn & 0x1F) as usize;
919            let rn = ((insn >> 5) & 0x1F) as usize;
920            let imm12 = ((insn >> 10) & 0xFFF) as u64;
921            let va = self.read_x(rn).wrapping_add(imm12);
922            let pa = self.translate_for_access(mem, va)?;
923            // AIC/Block ranges fall back to byte access via mmio_read of low byte.
924            let byte = if (AIC_BASE..AIC_END).contains(&pa) {
925                (aic.mmio_read(self.id as usize, pa - AIC_BASE) & 0xFF) as u8
926            } else if (BLK_BASE..BLK_END).contains(&pa) {
927                (block.mmio_read(pa - BLK_BASE) & 0xFF) as u8
928            } else {
929                let a = pa as usize;
930                if a >= mem.len() {
931                    return Err(format!("byte fetch fault at PA {:#x}", pa));
932                }
933                mem[a]
934            };
935            self.write_x(rt, byte as u64);
936            self.pc = self.pc.wrapping_add(4);
937            return Ok(StepResult::Continue);
938        }
939
940        // CBZ / CBNZ Xt, label :: sf 011010 op imm19 Rt
941        if insn & 0xFE00_0000 == 0xB400_0000 {
942            let op = (insn >> 24) & 1; // 0 = CBZ, 1 = CBNZ
943            let imm19_raw = ((insn >> 5) & 0x7_FFFF) as u32;
944            let imm19 = ((imm19_raw as i32) << 13) >> 13; // sign-extend 19-bit
945            let offset = (imm19 as i64) * 4;
946            let rt = (insn & 0x1F) as usize;
947            let val = self.read_x(rt);
948            let take = if op == 0 { val == 0 } else { val != 0 };
949            if take {
950                let target = (self.pc as i64).wrapping_add(offset) as u64;
951                if target == self.pc {
952                    return Ok(StepResult::Halt);
953                }
954                self.pc = target;
955            } else {
956                self.pc = self.pc.wrapping_add(4);
957            }
958            return Ok(StepResult::Continue);
959        }
960
961        // LDP/STP (signed offset, 64-bit)
962        // 1 0 1 0 1 0 0 1 0 L imm7 Rt2 Rn Rt1
963        //   STP: 0xA9000000  |  LDP: 0xA9400000
964        if (insn & 0xFFC0_0000 == 0xA900_0000) || (insn & 0xFFC0_0000 == 0xA940_0000) {
965            let l = (insn >> 22) & 1;
966            // imm7 is signed, scaled by 8.
967            let imm7_raw = ((insn >> 15) & 0x7F) as u32;
968            let imm7 = ((imm7_raw as i32) << 25) >> 25; // sign-extend 7-bit
969            let offset_bytes = (imm7 as i64) * 8;
970            let rt2 = ((insn >> 10) & 0x1F) as usize;
971            let rn = ((insn >> 5) & 0x1F) as usize;
972            let rt1 = (insn & 0x1F) as usize;
973            let base = self.read_x(rn);
974            let addr = (base as i64).wrapping_add(offset_bytes) as u64;
975            if l == 0 {
976                let v1 = self.read_x(rt1);
977                let v2 = self.read_x(rt2);
978                self.store64(mem, out, aic, block, addr, v1)?;
979                self.store64(mem, out, aic, block, addr.wrapping_add(8), v2)?;
980            } else {
981                let v1 = self.load64(mem, aic, block, addr)?;
982                let v2 = self.load64(mem, aic, block, addr.wrapping_add(8))?;
983                self.write_x(rt1, v1);
984                self.write_x(rt2, v2);
985            }
986            self.pc = self.pc.wrapping_add(4);
987            return Ok(StepResult::Continue);
988        }
989
990        // LDXR Xt, [Xn] :: 1100_1000_0101_1111_0111_1100_Rn_Rt   (size=11, L=1, excl)
991        if insn & 0xFFFF_FC00 == 0xC85F_7C00 {
992            let rt = (insn & 0x1F) as usize;
993            let rn = ((insn >> 5) & 0x1F) as usize;
994            let va = self.read_x(rn);
995            let pa = self.translate_for_access(mem, va)?;
996            let val = read_pa_u64(mem, pa)?;
997            self.write_x(rt, val);
998            self.exclusive_monitor = Some(pa);
999            self.pc = self.pc.wrapping_add(4);
1000            return Ok(StepResult::Continue);
1001        }
1002
1003        // STXR Ws, Xt, [Xn] :: 1100_1000_000_Rs_0_11111_Rn_Rt   (size=11, L=0, excl)
1004        if insn & 0xFFE0_FC00 == 0xC800_7C00 {
1005            let rt = (insn & 0x1F) as usize;
1006            let rn = ((insn >> 5) & 0x1F) as usize;
1007            let rs = ((insn >> 16) & 0x1F) as usize;
1008            let va = self.read_x(rn);
1009            let pa = self.translate_for_access(mem, va)?;
1010            let succeeded = self.exclusive_monitor == Some(pa);
1011            self.exclusive_monitor = None;
1012            if succeeded {
1013                let val = self.read_x(rt);
1014                self.last_store_pa = Some(pa);
1015                write_pa_u64(mem, pa, val)?;
1016                self.write_x(rs, 0);
1017            } else {
1018                self.write_x(rs, 1);
1019            }
1020            self.pc = self.pc.wrapping_add(4);
1021            return Ok(StepResult::Continue);
1022        }
1023
1024        // CLREX :: 0xD503_3F5F (CRm=1111, the only encoding we emit)
1025        if insn & 0xFFFF_F0FF == 0xD503_305F {
1026            self.exclusive_monitor = None;
1027            self.pc = self.pc.wrapping_add(4);
1028            return Ok(StepResult::Continue);
1029        }
1030
1031        // MSR/MRS sysreg
1032        if insn & 0xFFC0_0000 == 0xD500_0000 {
1033            let l = (insn >> 21) & 1;
1034            let op0 = (insn >> 19) & 0x3;
1035            let op1 = (insn >> 16) & 0x7;
1036            let crn = (insn >> 12) & 0xF;
1037            let crm = (insn >> 8) & 0xF;
1038            let op2 = (insn >> 5) & 0x7;
1039            let rt = (insn & 0x1F) as usize;
1040
1041            if op0 < 2 {
1042                // WFI :: 0xD503_207F — set wfi_halted, advance PC.
1043                if insn == 0xD503_207F {
1044                    self.wfi_halted = true;
1045                    self.pc = self.pc.wrapping_add(4);
1046                    return Ok(StepResult::Continue);
1047                }
1048                // Other hints (NOP/YIELD/WFE/SEV…) and barriers — no-op.
1049                if insn & 0xFFFF_F01F == 0xD503_201F || insn & 0xFFFF_F01F == 0xD503_301F {
1050                    self.pc = self.pc.wrapping_add(4);
1051                    return Ok(StepResult::Continue);
1052                }
1053                // MSR <pstatefield>, #imm — same major class with op0=00, op1=011,
1054                // CRn=0100. Distinguished from DAIFSet/DAIFClr by op2.
1055                //   op2=110 → MSR DAIFSet, #imm4
1056                //   op2=111 → MSR DAIFClr, #imm4
1057                // The imm4 (D=8/A=4/I=2/F=1) maps directly onto our internal
1058                // 4-bit daif representation.
1059                if op0 == 0 && op1 == 3 && crn == 4 && (op2 == 6 || op2 == 7) {
1060                    let imm4 = (crm & 0xF) as u8;
1061                    if op2 == 6 {
1062                        self.daif |= imm4; // DAIFSet
1063                    } else {
1064                        self.daif &= !imm4; // DAIFClr
1065                    }
1066                    self.pc = self.pc.wrapping_add(4);
1067                    return Ok(StepResult::Continue);
1068                }
1069                return Err(format!(
1070                    "unsupported system instruction {:#010x} at pc={:#x}",
1071                    insn, self.pc
1072                ));
1073            }
1074
1075            let sr = (op0, op1, crn, crm, op2);
1076            if l == 0 {
1077                let val = self.read_x(rt);
1078                self.write_sysreg(sr, val)?;
1079            } else {
1080                let val = self.read_sysreg(sr)?;
1081                self.write_x(rt, val);
1082            }
1083            self.pc = self.pc.wrapping_add(4);
1084            return Ok(StepResult::Continue);
1085        }
1086
1087        // SVC #imm16 — sync exception from EL0 to EL1.
1088        if insn & 0xFFE0_001F == 0xD400_0001 {
1089            let imm16 = ((insn >> 5) & 0xFFFF) as u16;
1090            if self.current_el != 0 {
1091                return Err(format!(
1092                    "SVC from EL{} not modeled at pc={:#x}",
1093                    self.current_el, self.pc
1094                ));
1095            }
1096            self.elr_el1 = self.pc.wrapping_add(4);
1097            self.spsr_el1 = self.build_spsr();
1098            self.esr_el1 = (0x15u64 << 26) | (1 << 25) | (imm16 as u64);
1099            self.current_el = 1;
1100            self.daif = 0xF; // exception entry masks DAIF
1101            self.pc = self.vbar_el1.wrapping_add(0x400);
1102            self.exclusive_monitor = None; // any exception entry clears the monitor
1103            return Ok(StepResult::Continue);
1104        }
1105
1106        // ERET
1107        if insn == 0xD69F_03E0 {
1108            let (elr, spsr) = match self.current_el {
1109                2 => (self.elr_el2, self.spsr_el2),
1110                1 => (self.elr_el1, self.spsr_el1),
1111                _ => {
1112                    return Err(format!(
1113                        "ERET from EL{} (no exception state) at pc={:#x}",
1114                        self.current_el, self.pc
1115                    ));
1116                }
1117            };
1118            let new_el = ((spsr >> 2) & 0x3) as u8;
1119            if new_el > self.current_el {
1120                return Err(format!(
1121                    "ERET would raise EL{} → EL{} (illegal) at pc={:#x}",
1122                    self.current_el, new_el, self.pc
1123                ));
1124            }
1125            let new_daif = ((spsr >> 6) & 0xF) as u8;
1126            self.current_el = new_el;
1127            self.daif = new_daif;
1128            self.pc = elr;
1129            return Ok(StepResult::Continue);
1130        }
1131
1132        // B label
1133        if insn & 0xFC00_0000 == 0x1400_0000 {
1134            let imm26_raw = (insn & 0x03FF_FFFF) as i32;
1135            let imm26 = (imm26_raw << 6) >> 6;
1136            let offset = (imm26 as i64) * 4;
1137            let target = (self.pc as i64).wrapping_add(offset) as u64;
1138            if target == self.pc {
1139                return Ok(StepResult::Halt);
1140            }
1141            self.pc = target;
1142            return Ok(StepResult::Continue);
1143        }
1144
1145        Err(format!("undefined instruction {:#010x} at pc={:#x}", insn, self.pc))
1146    }
1147}
1148
1149// === Cpu: the system shell — N cores + shared memory + UART buffer ============
1150
1151#[cfg_attr(feature = "wasm", wasm_bindgen)]
1152pub struct Cpu {
1153    cores: Vec<Core>,
1154    mem: Vec<u8>,
1155    output_buf: Vec<u8>,
1156    aic: Aic,
1157    block: Block,
1158    /// Number of Cpu::step() calls since reset.
1159    system_steps: u64,
1160    /// system_steps value at which the next timer IRQ fires.
1161    timer_next: u64,
1162    /// Total number of timer ticks observed; mostly for the UI.
1163    timer_ticks: u64,
1164}
1165
1166impl Cpu {
1167    pub fn new() -> Cpu {
1168        let cores = (0..NUM_CORES as u8)
1169            .map(|i| Core::new(i, MPIDR_VALUES[i as usize]))
1170            .collect();
1171        let mut sys = Cpu {
1172            cores,
1173            mem: vec![0u8; MEM_SIZE],
1174            output_buf: Vec::new(),
1175            aic: Aic::new(),
1176            block: Block::new(),
1177            system_steps: 0,
1178            timer_next: TIMER_PERIOD,
1179            timer_ticks: 0,
1180        };
1181        load_demo(&mut sys.mem);
1182        setup_demo_pgtable(&mut sys.mem);
1183        sys
1184    }
1185
1186    pub fn reset(&mut self) {
1187        for c in self.cores.iter_mut() {
1188            c.reset();
1189        }
1190        for b in self.mem.iter_mut() {
1191            *b = 0;
1192        }
1193        self.output_buf.clear();
1194        self.aic.reset();
1195        self.block.reset();
1196        self.system_steps = 0;
1197        self.timer_next = TIMER_PERIOD;
1198        self.timer_ticks = 0;
1199        load_demo(&mut self.mem);
1200        setup_demo_pgtable(&mut self.mem);
1201    }
1202
1203    /// Step every core once. On the way in: bump system_steps; if the timer
1204    /// is due, broadcast IRQ_TIMER to all cores via AIC. Each core then either
1205    /// takes a pending IRQ (when DAIF.I is clear) or executes one instruction.
1206    pub fn step(&mut self) -> bool {
1207        self.system_steps = self.system_steps.saturating_add(1);
1208        if self.system_steps >= self.timer_next {
1209            for i in 0..NUM_CORES {
1210                self.aic.set_irq(i, IRQ_TIMER);
1211            }
1212            self.timer_next = self.system_steps + TIMER_PERIOD;
1213            self.timer_ticks = self.timer_ticks.saturating_add(1);
1214        }
1215
1216        let mut any_runnable = false;
1217        for i in 0..self.cores.len() {
1218            if self.cores[i].halted {
1219                continue;
1220            }
1221            // A WFI'd core counts as "runnable" — the system still needs to
1222            // tick because a future timer IRQ can wake it. Only fully halted
1223            // cores stop the run() loop.
1224            any_runnable = true;
1225            if self.aic.has_pending(i) && !self.cores[i].irq_masked() {
1226                self.cores[i].take_irq();
1227            } else if self.cores[i].wfi_halted {
1228                // sleeping — no fetch this step
1229            } else {
1230                self.cores[i].step(
1231                    &mut self.mem,
1232                    &mut self.output_buf,
1233                    &mut self.aic,
1234                    &mut self.block,
1235                );
1236            }
1237            self.invalidate_remote_monitors(i);
1238        }
1239        any_runnable
1240    }
1241
1242    /// Step a single core. Honours pending IRQs on that core (set either by
1243    /// the system timer in `step()` or by another core via IPI MMIO).
1244    pub fn step_core(&mut self, idx: u32) -> bool {
1245        let i = idx as usize;
1246        if i >= self.cores.len() {
1247            return false;
1248        }
1249        if self.cores[i].halted {
1250            return false;
1251        }
1252        let progressed = if self.aic.has_pending(i) && !self.cores[i].irq_masked() {
1253            self.cores[i].take_irq();
1254            true
1255        } else if self.cores[i].wfi_halted {
1256            false
1257        } else {
1258            self.cores[i].step(
1259                &mut self.mem,
1260                &mut self.output_buf,
1261                &mut self.aic,
1262                &mut self.block,
1263            )
1264        };
1265        self.invalidate_remote_monitors(i);
1266        progressed
1267    }
1268
1269    /// Implements the cross-core half of the exclusive monitor: a store from
1270    /// core `i` clears every *other* core's reservation if it matches the
1271    /// stored address. This is what makes LDXR/STXR a real concurrency
1272    /// primitive — without it two cores could both think their CAS won.
1273    fn invalidate_remote_monitors(&mut self, i: usize) {
1274        let Some(pa) = self.cores[i].last_store_pa.take() else {
1275            return;
1276        };
1277        for (j, c) in self.cores.iter_mut().enumerate() {
1278            if j != i && c.exclusive_monitor == Some(pa) {
1279                c.exclusive_monitor = None;
1280            }
1281        }
1282    }
1283
1284    pub fn run(&mut self, max: u32) -> u32 {
1285        let mut n = 0u32;
1286        while n < max && self.step() {
1287            n += 1;
1288        }
1289        n
1290    }
1291
1292    pub fn system_steps(&self) -> u64 {
1293        self.system_steps
1294    }
1295
1296    pub fn timer_period(&self) -> u64 {
1297        TIMER_PERIOD
1298    }
1299
1300    /// System steps until the next timer IRQ fires (0 if it's due now).
1301    pub fn timer_remaining(&self) -> u64 {
1302        self.timer_next.saturating_sub(self.system_steps)
1303    }
1304
1305    pub fn timer_ticks(&self) -> u64 {
1306        self.timer_ticks
1307    }
1308
1309    pub fn aic_state(&self) -> AicState {
1310        self.aic.snapshot()
1311    }
1312
1313    pub fn block_state(&self) -> BlockState {
1314        self.block.snapshot()
1315    }
1316
1317    /// Replace disk sector 0 with the given UTF-8 text (padded with zeros to
1318    /// SECTOR_SIZE bytes). Also patches the live disk-buffer page at PA 0x6000
1319    /// so task B's printer reflects the change without a reset.
1320    pub fn set_disk_text(&mut self, text: &str) {
1321        let bytes = text.as_bytes();
1322        let n = bytes.len().min(SECTOR_SIZE as usize);
1323        for i in 0..(SECTOR_SIZE as usize) {
1324            let b = if i < n { bytes[i] } else { 0 };
1325            self.block.disk[i] = b;
1326            self.mem[0x6000 + i] = b;
1327        }
1328    }
1329
1330    pub fn disk_text(&self) -> String {
1331        let mut end = SECTOR_SIZE as usize;
1332        for i in 0..(SECTOR_SIZE as usize) {
1333            if self.block.disk[i] == 0 {
1334                end = i;
1335                break;
1336            }
1337        }
1338        String::from_utf8_lossy(&self.block.disk[..end]).into_owned()
1339    }
1340
1341    /// Returns one CoreState per core.
1342    pub fn state(&self) -> Vec<CoreState> {
1343        self.cores.iter().map(|c| c.snapshot()).collect()
1344    }
1345
1346    pub fn mem_slice(&self, start: u32, len: u32) -> Vec<u8> {
1347        let s = (start as usize).min(self.mem.len());
1348        let e = (s + len as usize).min(self.mem.len());
1349        self.mem[s..e].to_vec()
1350    }
1351
1352    pub fn output(&self) -> String {
1353        String::from_utf8_lossy(&self.output_buf).into_owned()
1354    }
1355
1356    /// Walk page tables for `va` using the sysregs of `core_idx`.
1357    /// Returns `None` if `core_idx` is out of range.
1358    pub fn translate(&self, va: u64, core_idx: u32) -> Option<TranslationResult> {
1359        self.cores
1360            .get(core_idx as usize)
1361            .map(|c| c.do_translate(&self.mem, va))
1362    }
1363
1364    pub fn entry_pc(&self) -> u64 {
1365        ENTRY_PC
1366    }
1367
1368    pub fn uart_addr(&self) -> u64 {
1369        UART_OUT
1370    }
1371
1372    pub fn l1_table_pa(&self) -> u64 {
1373        L1_TABLE_PA
1374    }
1375
1376    pub fn num_cores(&self) -> u32 {
1377        self.cores.len() as u32
1378    }
1379
1380    /// Reads the shared u64 atomic counter at PA 0x6FF8 — task A's LDXR/STXR
1381    /// loop bumps it once per scheduling round.
1382    pub fn atomic_counter(&self) -> u64 {
1383        read_pa_u64(&self.mem, ATOMIC_COUNTER_PA_PUB).unwrap_or(0)
1384    }
1385}
1386
1387const ATOMIC_COUNTER_PA_PUB: u64 = 0x6FF8;
1388
1389impl Default for Cpu {
1390    fn default() -> Self {
1391        Self::new()
1392    }
1393}
1394
1395// === Shared-memory helpers (free functions) ===================================
1396
1397fn read_pa_u32(mem: &[u8], pa: u64) -> Result<u32, String> {
1398    let a = pa as usize;
1399    if a + 4 > mem.len() {
1400        return Err(format!("fetch fault at PA {:#x}", pa));
1401    }
1402    Ok(u32::from_le_bytes([mem[a], mem[a + 1], mem[a + 2], mem[a + 3]]))
1403}
1404
1405fn read_pa_u64(mem: &[u8], pa: u64) -> Result<u64, String> {
1406    let a = pa as usize;
1407    if a + 8 > mem.len() {
1408        return Err(format!("load fault at PA {:#x}", pa));
1409    }
1410    Ok(u64::from_le_bytes([
1411        mem[a],
1412        mem[a + 1],
1413        mem[a + 2],
1414        mem[a + 3],
1415        mem[a + 4],
1416        mem[a + 5],
1417        mem[a + 6],
1418        mem[a + 7],
1419    ]))
1420}
1421
1422fn write_pa_u64(mem: &mut [u8], pa: u64, val: u64) -> Result<(), String> {
1423    let a = pa as usize;
1424    if a + 8 > mem.len() {
1425        return Err(format!("store fault at PA {:#x}", pa));
1426    }
1427    mem[a..a + 8].copy_from_slice(&val.to_le_bytes());
1428    Ok(())
1429}
1430
1431fn write_u64(mem: &mut [u8], addr: u64, val: u64) {
1432    let a = addr as usize;
1433    mem[a..a + 8].copy_from_slice(&val.to_le_bytes());
1434}
1435
1436fn write_words(mem: &mut [u8], base: u64, words: &[u32]) {
1437    let mut off = base as usize;
1438    for w in words.iter() {
1439        mem[off..off + 4].copy_from_slice(&w.to_le_bytes());
1440        off += 4;
1441    }
1442}
1443
1444fn decode_attrs(desc: u64) -> PageAttrs {
1445    PageAttrs {
1446        af: (desc >> 10) & 1 != 0,
1447        ap: ((desc >> 6) & 0x3) as u8,
1448        attr_idx: ((desc >> 2) & 0x7) as u8,
1449        sh: ((desc >> 8) & 0x3) as u8,
1450    }
1451}
1452
1453fn unsupported_sysreg(op: &str, sr: (u32, u32, u32, u32, u32), pc: u64) -> String {
1454    format!(
1455        "{op} of unsupported sysreg S{}_{}_C{}_C{}_{} at pc={:#x}",
1456        sr.0, sr.1, sr.2, sr.3, sr.4, pc
1457    )
1458}
1459
1460// === Disassembler ============================================================
1461// Mirrors the decoder in Core::execute, producing ARM-style mnemonics. Used
1462// by the JS UI to render a Disassembly panel; not used by execution itself.
1463
1464pub fn disassemble(insn: u32, pc: u64) -> String {
1465    // MOVZ Xd, #imm16 {, LSL #hw*16}
1466    if insn & 0xFF80_0000 == 0xD280_0000 {
1467        let rd = insn & 0x1F;
1468        let hw = (insn >> 21) & 0x3;
1469        let imm = (insn >> 5) & 0xFFFF;
1470        return if hw == 0 {
1471            format!("movz x{rd}, #{imm:#x}")
1472        } else {
1473            format!("movz x{rd}, #{imm:#x}, lsl #{}", hw * 16)
1474        };
1475    }
1476    // ADD Xd, Xn, #imm12 {, LSL #12}
1477    if insn & 0xFF80_0000 == 0x9100_0000 {
1478        let rd = insn & 0x1F;
1479        let rn = (insn >> 5) & 0x1F;
1480        let imm = (insn >> 10) & 0xFFF;
1481        let sh = (insn >> 22) & 1;
1482        return if sh == 1 {
1483            format!("add x{rd}, x{rn}, #{imm:#x}, lsl #12")
1484        } else {
1485            format!("add x{rd}, x{rn}, #{imm:#x}")
1486        };
1487    }
1488    // SUB Xd, Xn, #imm12
1489    if insn & 0xFF80_0000 == 0xD100_0000 {
1490        let rd = insn & 0x1F;
1491        let rn = (insn >> 5) & 0x1F;
1492        let imm = (insn >> 10) & 0xFFF;
1493        return format!("sub x{rd}, x{rn}, #{imm:#x}");
1494    }
1495    // ADD Xd, Xn, Xm
1496    if insn & 0xFF20_FC00 == 0x8B00_0000 {
1497        let rd = insn & 0x1F;
1498        let rn = (insn >> 5) & 0x1F;
1499        let rm = (insn >> 16) & 0x1F;
1500        return format!("add x{rd}, x{rn}, x{rm}");
1501    }
1502    // SUB Xd, Xn, Xm
1503    if insn & 0xFF20_FC00 == 0xCB00_0000 {
1504        let rd = insn & 0x1F;
1505        let rn = (insn >> 5) & 0x1F;
1506        let rm = (insn >> 16) & 0x1F;
1507        return format!("sub x{rd}, x{rn}, x{rm}");
1508    }
1509    // STR Xt, [Xn, #imm12*8]
1510    if insn & 0xFFC0_0000 == 0xF900_0000 {
1511        let rt = insn & 0x1F;
1512        let rn = (insn >> 5) & 0x1F;
1513        let imm = (insn >> 10) & 0xFFF;
1514        let off = imm * 8;
1515        return if off == 0 {
1516            format!("str x{rt}, [x{rn}]")
1517        } else {
1518            format!("str x{rt}, [x{rn}, #{off}]")
1519        };
1520    }
1521    // LDR Xt, [Xn, #imm12*8]
1522    if insn & 0xFFC0_0000 == 0xF940_0000 {
1523        let rt = insn & 0x1F;
1524        let rn = (insn >> 5) & 0x1F;
1525        let imm = (insn >> 10) & 0xFFF;
1526        let off = imm * 8;
1527        return if off == 0 {
1528            format!("ldr x{rt}, [x{rn}]")
1529        } else {
1530            format!("ldr x{rt}, [x{rn}, #{off}]")
1531        };
1532    }
1533    // LDRB Wt, [Xn, #imm12]
1534    if insn & 0xFFC0_0000 == 0x3940_0000 {
1535        let rt = insn & 0x1F;
1536        let rn = (insn >> 5) & 0x1F;
1537        let imm = (insn >> 10) & 0xFFF;
1538        return if imm == 0 {
1539            format!("ldrb w{rt}, [x{rn}]")
1540        } else {
1541            format!("ldrb w{rt}, [x{rn}, #{imm}]")
1542        };
1543    }
1544    // LDP/STP signed offset
1545    if insn & 0xFFC0_0000 == 0xA900_0000 || insn & 0xFFC0_0000 == 0xA940_0000 {
1546        let l = (insn >> 22) & 1;
1547        let imm7_raw = (insn >> 15) & 0x7F;
1548        let imm7 = ((imm7_raw as i32) << 25) >> 25;
1549        let off = imm7 * 8;
1550        let rt2 = (insn >> 10) & 0x1F;
1551        let rn = (insn >> 5) & 0x1F;
1552        let rt1 = insn & 0x1F;
1553        let mnem = if l == 0 { "stp" } else { "ldp" };
1554        return if off == 0 {
1555            format!("{mnem} x{rt1}, x{rt2}, [x{rn}]")
1556        } else {
1557            format!("{mnem} x{rt1}, x{rt2}, [x{rn}, #{off}]")
1558        };
1559    }
1560    // LDXR Xt, [Xn]
1561    if insn & 0xFFFF_FC00 == 0xC85F_7C00 {
1562        let rt = insn & 0x1F;
1563        let rn = (insn >> 5) & 0x1F;
1564        return format!("ldxr x{rt}, [x{rn}]");
1565    }
1566    // STXR Ws, Xt, [Xn]
1567    if insn & 0xFFE0_FC00 == 0xC800_7C00 {
1568        let rt = insn & 0x1F;
1569        let rn = (insn >> 5) & 0x1F;
1570        let rs = (insn >> 16) & 0x1F;
1571        return format!("stxr w{rs}, x{rt}, [x{rn}]");
1572    }
1573    // CLREX (any CRm — canonical CRm=1111)
1574    if insn & 0xFFFF_F0FF == 0xD503_305F {
1575        return "clrex".into();
1576    }
1577    // SVC #imm16
1578    if insn & 0xFFE0_001F == 0xD400_0001 {
1579        let imm = (insn >> 5) & 0xFFFF;
1580        return format!("svc #{imm:#x}");
1581    }
1582    // ERET
1583    if insn == 0xD69F_03E0 {
1584        return "eret".into();
1585    }
1586    // WFI
1587    if insn == 0xD503_207F {
1588        return "wfi".into();
1589    }
1590    // Hint class
1591    if insn & 0xFFFF_F01F == 0xD503_201F {
1592        let hint = (insn >> 5) & 0x7F;
1593        return match hint {
1594            0 => "nop".into(),
1595            1 => "yield".into(),
1596            2 => "wfe".into(),
1597            3 => "wfi".into(),
1598            n => format!("hint #{n}"),
1599        };
1600    }
1601    // Barrier class
1602    if insn & 0xFFFF_F01F == 0xD503_301F {
1603        let crm = (insn >> 8) & 0xF;
1604        let op2 = (insn >> 5) & 0x7;
1605        let mnem = match op2 {
1606            4 => Some("dsb"),
1607            5 => Some("dmb"),
1608            6 => Some("isb"),
1609            _ => None,
1610        };
1611        let domain = match crm {
1612            0xF => Some("sy"),
1613            0xE => Some("st"),
1614            0xD => Some("ld"),
1615            0xB => Some("ish"),
1616            _ => None,
1617        };
1618        if let (Some(m), Some(d)) = (mnem, domain) {
1619            return format!("{m} {d}");
1620        }
1621    }
1622    // MSR / MRS sysreg or DAIFSet/Clr
1623    if insn & 0xFFC0_0000 == 0xD500_0000 {
1624        let l = (insn >> 21) & 1;
1625        let op0 = (insn >> 19) & 0x3;
1626        let op1 = (insn >> 16) & 0x7;
1627        let crn = (insn >> 12) & 0xF;
1628        let crm = (insn >> 8) & 0xF;
1629        let op2 = (insn >> 5) & 0x7;
1630        let rt = insn & 0x1F;
1631        if op0 == 0 && op1 == 3 && crn == 4 && (op2 == 6 || op2 == 7) {
1632            let pf = if op2 == 6 { "DAIFSet" } else { "DAIFClr" };
1633            return format!("msr {pf}, #{crm:#x}");
1634        }
1635        if op0 >= 2 {
1636            let sr = sysreg_name(op0, op1, crn, crm, op2);
1637            return if l == 0 {
1638                format!("msr {sr}, x{rt}")
1639            } else {
1640                format!("mrs x{rt}, {sr}")
1641            };
1642        }
1643    }
1644    // CBZ / CBNZ Xt, label
1645    if insn & 0xFE00_0000 == 0xB400_0000 {
1646        let op = (insn >> 24) & 1;
1647        let imm19_raw = (insn >> 5) & 0x7_FFFF;
1648        let imm19 = ((imm19_raw as i32) << 13) >> 13;
1649        let target = (pc as i64).wrapping_add((imm19 as i64) * 4) as u64;
1650        let rt = insn & 0x1F;
1651        let mnem = if op == 0 { "cbz" } else { "cbnz" };
1652        return format!("{mnem} x{rt}, {target:#x}");
1653    }
1654    // B label
1655    if insn & 0xFC00_0000 == 0x1400_0000 {
1656        let imm26_raw = (insn & 0x03FF_FFFF) as i32;
1657        let imm26 = (imm26_raw << 6) >> 6;
1658        let target = (pc as i64).wrapping_add((imm26 as i64) * 4) as u64;
1659        return format!("b {target:#x}");
1660    }
1661    // Unknown
1662    format!(".word {insn:#010x}")
1663}
1664
1665fn sysreg_name(op0: u32, op1: u32, crn: u32, crm: u32, op2: u32) -> String {
1666    match (op0, op1, crn, crm, op2) {
1667        (3, 0, 0, 0, 5) => "mpidr_el1".into(),
1668        (3, 0, 1, 0, 0) => "sctlr_el1".into(),
1669        (3, 0, 2, 0, 0) => "ttbr0_el1".into(),
1670        (3, 0, 2, 0, 2) => "tcr_el1".into(),
1671        (3, 0, 4, 0, 0) => "spsr_el1".into(),
1672        (3, 0, 4, 0, 1) => "elr_el1".into(),
1673        (3, 0, 4, 2, 2) => "currentel".into(),
1674        (3, 0, 5, 2, 0) => "esr_el1".into(),
1675        (3, 0, 12, 0, 0) => "vbar_el1".into(),
1676        (3, 4, 4, 0, 0) => "spsr_el2".into(),
1677        (3, 4, 4, 0, 1) => "elr_el2".into(),
1678        (3, 4, 5, 2, 0) => "esr_el2".into(),
1679        (3, 4, 12, 0, 0) => "vbar_el2".into(),
1680        _ => format!("s{op0}_{op1}_c{crn}_c{crm}_{op2}"),
1681    }
1682}
1683
1684// === Demo program =============================================================
1685
1686fn load_demo(mem: &mut [u8]) {
1687    // Memory regions, both cores execute the same code:
1688    //   PA 0x4000  kernel boot
1689    //   PA 0x4800  sync handler (stub)
1690    //   PA 0x4880  IRQ handler = scheduler with X0-X3 save/restore
1691    //   PA 0x4D00  task A — silent WFI sleeper (pinned to core 0)
1692    //   PA 0x4E00  task B — disk printer (walks 0x6000 byte by byte)
1693    //   PA 0x4F00  core 0's slot region (entry + save_ptr + 2 save areas)
1694    //   PA 0x5000  core 1's slot region (same layout)
1695    //
1696    // Each core derives "my slot region" from MPIDR_EL1: bit 8 of MPIDR
1697    // distinguishes our two clusters (P-core 0x80000000, E-core 0x80000100),
1698    // so mpidr_offset = mpidr - 0x80000000 ∈ {0, 0x100} maps directly:
1699    //   slot_base = 0x4F00 + mpidr_offset → 0x4F00 / 0x5000
1700    //   initial_task = TASK_A_ENTRY + mpidr_offset → 0x4D00 / 0x4E00
1701    // So core 0 boots into task A and core 1 boots into task B; they run
1702    // concurrently with independent state and the UART output truly
1703    // interleaves instead of duplicating.
1704    //
1705    // Within a slot region:
1706    //   +0x00  current task entry (8 bytes)
1707    //   +0x08  current task save-area pointer (8 bytes)
1708    //   +0x10  save area 0 (32 bytes — X0..X3)
1709    //   +0x30  save area 1 (32 bytes — X0..X3)
1710    // The scheduler swaps between save areas via the (2*slot_base + 0x40 -
1711    // current_save_ptr) trick — no per-core constants baked into the handler.
1712    const SPSR_EL1H_DAIF: u32 = 0x3C5;
1713    const VBAR: u32 = 0x4400;
1714    const SYNC_HANDLER_PA: u64 = 0x4800;
1715    const IRQ_HANDLER_PA: u64 = 0x4880;
1716    const TASK_A_ENTRY: u32 = 0x4D00;
1717    const TASK_B_ENTRY: u32 = 0x4E00;
1718    const TASK_SUM: u32 = TASK_A_ENTRY + TASK_B_ENTRY; // 0x9B00
1719    const SLOT_BASE_BASE: u32 = 0x4F00; // base for core 0
1720    const MPIDR_BASE_HI: u32 = 0x8000; // moved to upper half via LSL #16
1721    const DISK_BUF_PA: u32 = 0x6000;
1722    // Atomic counter sits in the same user-mapped page as the disk buffer
1723    // (0x6000–0x6FFF), well past the 64-byte sector. Task A bumps it via
1724    // LDXR/STXR; the UI reads it from RAM.
1725    const ATOMIC_COUNTER_PA: u32 = 0x6FF8;
1726    // Sync handler dispatches IPIs to core 1 via AIC_REG_IPI_SET (offset
1727    // 0x10 = imm12 #2 in an 8-byte-scaled STR).
1728    const IPI_TARGET: u32 = 1;
1729    const IPI_OFFSET_WORDS: u32 = (AIC_REG_IPI_SET / 8) as u32;
1730    const EL1_ENTRY: u32 = ENTRY_PC as u32 + 5 * 4;
1731
1732    let kernel: [u32; 35] = [
1733        // --- EL2 prologue → ERET to EL1 ---
1734        movz(9, EL1_ENTRY, 0),
1735        msr_elr_el2(9),
1736        movz(9, SPSR_EL1H_DAIF, 0),
1737        msr_spsr_el2(9),
1738        eret(),
1739        // --- EL1: MMU bring-up ---
1740        movz(9, L1_TABLE_PA as u32, 0),
1741        msr_ttbr0(9),
1742        movz(9, 25, 0),
1743        msr_tcr(9),
1744        movz(9, 1, 0),
1745        msr_sctlr(9),
1746        isb(),
1747        // --- EL1: synchronous disk read sector 0 → DISK_BUF_PA ---
1748        movz(9, BLK_BASE as u32, 0),
1749        movz(10, 0, 0),
1750        str_imm(10, 9, 0), // SECTOR = 0
1751        movz(10, DISK_BUF_PA, 0),
1752        str_imm(10, 9, 1), // BUF_ADDR = 0x6000
1753        movz(10, BLK_CMD_READ as u32, 0),
1754        str_imm(10, 9, 2), // CMD = READ
1755        // --- EL1: install vector base ---
1756        movz(9, VBAR, 0),
1757        msr_vbar_el1(9),
1758        // --- EL1: derive per-core slot region from MPIDR_EL1 ---
1759        // X9 = mpidr_offset ∈ {0, 0x100}
1760        mrs_mpidr(9),
1761        movz(10, MPIDR_BASE_HI, 1), // X10 = 0x80000000 (LSL #16)
1762        sub_reg(9, 9, 10),
1763        // X14 = my slot base
1764        movz(10, SLOT_BASE_BASE, 0),
1765        add_reg(14, 10, 9),
1766        // X11 = my initial task entry
1767        movz(10, TASK_A_ENTRY, 0),
1768        add_reg(11, 10, 9),
1769        // X12 = my initial save area = slot_base + 0x10
1770        add_imm(12, 14, 0x10),
1771        // Initialise my slot: [slot_base+0]=entry, [slot_base+8]=save_ptr
1772        str_imm(11, 14, 0),
1773        str_imm(12, 14, 1),
1774        // --- EL1: ERET into my initial task at EL0t with DAIF=0 ---
1775        msr_elr_el1(11),
1776        movz(10, 0, 0),
1777        msr_spsr_el1(10),
1778        eret(),
1779    ];
1780    // Sync handler at VBAR+0x400 — kernel-side IPI service. Task A calls
1781    // SVC #0 to ask the kernel to ping core 1 via the AIC, then ERETs.
1782    // Real systems route IPIs through a privileged path because the AIC
1783    // page is AP=00 (kernel-only); user code must trap to ask.
1784    let sync_handler: [u32; 4] = [
1785        movz(9, AIC_BASE as u32, 0),     // 0: X9 = AIC_BASE
1786        movz(10, IPI_TARGET as u32, 0),  // 1: X10 = peer core id (1)
1787        str_imm(10, 9, IPI_OFFSET_WORDS), // 2: STR X10, [X9, #0x10]
1788        eret(),                          // 3: back to task A
1789    ];
1790    // IRQ handler at VBAR+0x480 — minimal: ack the AIC and ERET back to the
1791    // *same* preempted task. Tasks are pinned per core (core 0 = task A,
1792    // core 1 = task B), so no context-switch is needed and the UART output
1793    // stays monotonic (only one core ever writes it).
1794    let scheduler: [u32; 3] = [
1795        movz(9, AIC_BASE as u32, 0), // 0: X9 = AIC_BASE
1796        ldr_imm(10, 9, 0),           // 1: X10 = irq id (read clears pending)
1797        eret(),                      // 2: return to preempted task
1798    ];
1799    // Task A — atomic counter + IPI generator. Each scheduling round it
1800    // (1) atomically bumps the u64 at PA 0x6FF8 via an LDXR/STXR pair,
1801    // (2) issues SVC #0 to ask the kernel to ping core 1 via the AIC, and
1802    // (3) WFIs until the next IRQ. The MOVZ that loads the counter
1803    // address runs once at first entry; subsequent resumes loop back to
1804    // the LDXR. Does not touch UART, so task B's output stays clean.
1805    let task_a: [u32; 8] = [
1806        movz(4, ATOMIC_COUNTER_PA, 0), // 0: X4 = &counter (PA 0x6FF8)
1807        ldxr(5, 4),                    // 1: X5 = *X4, set local monitor
1808        add_imm(5, 5, 1),              // 2: X5 += 1
1809        stxr(6, 5, 4),                 // 3: try CAS, W6 = 0/1 (ok/retry)
1810        cbnz(6, -2),                   // 4: if W6 != 0 → branch back to LDXR
1811        svc_imm(0),                    // 5: kernel: please ping core 1
1812        wfi(),                         // 6: sleep until next IRQ
1813        b_offset(-6),                  // 7: on resume → back to LDXR
1814    ];
1815    // Task B — "disk printer". Walks the 64-byte disk buffer at PA 0x6000
1816    // once, emitting each byte to the UART. On hitting the null terminator
1817    // it parks itself in WFI forever so the disk content is printed exactly
1818    // once. The trailing `b -1` keeps us pinned at the WFI on every IRQ
1819    // resume (otherwise PC advances past WFI into UDF).
1820    let task_b: [u32; 10] = [
1821        movz(1, UART_OUT as u32, 0), // 0: X1 = UART
1822        movz(4, 0x6000, 0),          // 1: X4 = disk buffer base
1823        add_reg(4, 4, 3),            // 2: X4 += X3
1824        ldrb_imm(0, 4, 0),           // 3: W0 = byte at X4
1825        cbz(0, 4),                   // 4: if zero, branch to inst 8 (wfi)
1826        str_imm(0, 1, 0),            // 5: STR X0, [X1] — emit byte to UART
1827        add_imm(3, 3, 1),            // 6: X3 += 1
1828        b_offset(-6),                // 7: → inst 1 (re-init X4 = 0x6000 + X3)
1829        wfi(),                       // 8: parked — printed once, sleep
1830        b_offset(-1),                // 9: on IRQ resume, jump back to wfi
1831    ];
1832
1833    write_words(mem, ENTRY_PC, &kernel);
1834    write_words(mem, SYNC_HANDLER_PA, &sync_handler);
1835    write_words(mem, IRQ_HANDLER_PA, &scheduler);
1836    write_words(mem, TASK_A_ENTRY as u64, &task_a);
1837    write_words(mem, TASK_B_ENTRY as u64, &task_b);
1838}
1839
1840fn setup_demo_pgtable(mem: &mut [u8]) {
1841    write_u64(mem, L1_TABLE_PA, L2_TABLE_PA | 0b11);
1842    write_u64(mem, L2_TABLE_PA, L3_TABLE_PA | 0b11);
1843    // AF=1, valid+page (b11). AP at bits [7:6]:
1844    //   AP=00 → kernel R/W, EL0 no access.
1845    //   AP=01 → kernel R/W, EL0 R/W.
1846    let kern = (1u64 << 10) | 0b11; // AP=00, kernel-only
1847    let user = (1u64 << 10) | (0b01 << 6) | 0b11; // AP=01, user-accessible
1848    // User-accessible pages — tasks fetch / load / store from these.
1849    write_u64(mem, L3_TABLE_PA + 8, 0x1000 | user); // UART
1850    write_u64(mem, L3_TABLE_PA + 4 * 8, 0x4000 | user); // program/code
1851    write_u64(mem, L3_TABLE_PA + 6 * 8, 0x6000 | user); // disk buffer (task B)
1852    // Kernel-only pages — only EL1 (kernel handlers) can touch these.
1853    write_u64(mem, L3_TABLE_PA + 2 * 8, 0x2000 | kern); // AIC MMIO
1854    write_u64(mem, L3_TABLE_PA + 3 * 8, 0x3000 | kern); // Block MMIO
1855    write_u64(mem, L3_TABLE_PA + 5 * 8, 0x5000 | kern); // core 1 slot region
1856    write_u64(mem, L3_TABLE_PA + 8 * 8, 0x8000 | kern); // L1 table page
1857    write_u64(mem, L3_TABLE_PA + 9 * 8, 0x9000 | kern); // L2 table page
1858    write_u64(mem, L3_TABLE_PA + 0xA * 8, 0xA000 | kern); // L3 table page
1859}
1860
1861// === Instruction encoders =====================================================
1862
1863const fn movz(rd: u32, imm16: u32, hw: u32) -> u32 {
1864    0xD280_0000 | ((hw & 0x3) << 21) | ((imm16 & 0xFFFF) << 5) | (rd & 0x1F)
1865}
1866
1867const fn add_imm(rd: u32, rn: u32, imm12: u32) -> u32 {
1868    0x9100_0000 | ((imm12 & 0xFFF) << 10) | ((rn & 0x1F) << 5) | (rd & 0x1F)
1869}
1870
1871const fn sub_imm(rd: u32, rn: u32, imm12: u32) -> u32 {
1872    0xD100_0000 | ((imm12 & 0xFFF) << 10) | ((rn & 0x1F) << 5) | (rd & 0x1F)
1873}
1874
1875const fn add_reg(rd: u32, rn: u32, rm: u32) -> u32 {
1876    0x8B00_0000 | ((rm & 0x1F) << 16) | ((rn & 0x1F) << 5) | (rd & 0x1F)
1877}
1878
1879const fn sub_reg(rd: u32, rn: u32, rm: u32) -> u32 {
1880    0xCB00_0000 | ((rm & 0x1F) << 16) | ((rn & 0x1F) << 5) | (rd & 0x1F)
1881}
1882
1883#[allow(dead_code)]
1884const fn ldrb_imm(rt: u32, rn: u32, imm12: u32) -> u32 {
1885    0x3940_0000 | ((imm12 & 0xFFF) << 10) | ((rn & 0x1F) << 5) | (rt & 0x1F)
1886}
1887
1888const fn cbz(rt: u32, words: i32) -> u32 {
1889    let imm19 = (words as u32) & 0x7_FFFF;
1890    0xB400_0000 | (imm19 << 5) | (rt & 0x1F)
1891}
1892
1893#[allow(dead_code)]
1894const fn cbnz(rt: u32, words: i32) -> u32 {
1895    let imm19 = (words as u32) & 0x7_FFFF;
1896    0xB500_0000 | (imm19 << 5) | (rt & 0x1F)
1897}
1898
1899const fn str_imm(rt: u32, rn: u32, imm12: u32) -> u32 {
1900    0xF900_0000 | ((imm12 & 0xFFF) << 10) | ((rn & 0x1F) << 5) | (rt & 0x1F)
1901}
1902
1903#[allow(dead_code)]
1904const fn ldr_imm(rt: u32, rn: u32, imm12: u32) -> u32 {
1905    0xF940_0000 | ((imm12 & 0xFFF) << 10) | ((rn & 0x1F) << 5) | (rt & 0x1F)
1906}
1907
1908/// STP Xt1, Xt2, [Xn, #imm7*8] (signed-offset).
1909const fn stp_imm(rt1: u32, rt2: u32, rn: u32, imm7: i32) -> u32 {
1910    let imm = (imm7 as u32) & 0x7F;
1911    0xA900_0000 | (imm << 15) | ((rt2 & 0x1F) << 10) | ((rn & 0x1F) << 5) | (rt1 & 0x1F)
1912}
1913
1914/// LDP Xt1, Xt2, [Xn, #imm7*8] (signed-offset).
1915const fn ldp_imm(rt1: u32, rt2: u32, rn: u32, imm7: i32) -> u32 {
1916    let imm = (imm7 as u32) & 0x7F;
1917    0xA940_0000 | (imm << 15) | ((rt2 & 0x1F) << 10) | ((rn & 0x1F) << 5) | (rt1 & 0x1F)
1918}
1919
1920const fn b_self() -> u32 {
1921    0x1400_0000
1922}
1923
1924/// Encode `B label` with a signed instruction-word offset (-1 = previous insn).
1925const fn b_offset(words: i32) -> u32 {
1926    let imm26 = (words as u32) & 0x03FF_FFFF;
1927    0x1400_0000 | imm26
1928}
1929
1930const fn msr_sysreg(rt: u32, op0: u32, op1: u32, crn: u32, crm: u32, op2: u32) -> u32 {
1931    0xD500_0000
1932        | ((op0 & 0x3) << 19)
1933        | ((op1 & 0x7) << 16)
1934        | ((crn & 0xF) << 12)
1935        | ((crm & 0xF) << 8)
1936        | ((op2 & 0x7) << 5)
1937        | (rt & 0x1F)
1938}
1939
1940const fn msr_ttbr0(rt: u32) -> u32 {
1941    msr_sysreg(rt, 3, 0, 2, 0, 0)
1942}
1943const fn msr_tcr(rt: u32) -> u32 {
1944    msr_sysreg(rt, 3, 0, 2, 0, 2)
1945}
1946const fn msr_sctlr(rt: u32) -> u32 {
1947    msr_sysreg(rt, 3, 0, 1, 0, 0)
1948}
1949const fn msr_elr_el2(rt: u32) -> u32 {
1950    msr_sysreg(rt, 3, 4, 4, 0, 1)
1951}
1952const fn msr_spsr_el2(rt: u32) -> u32 {
1953    msr_sysreg(rt, 3, 4, 4, 0, 0)
1954}
1955const fn msr_vbar_el1(rt: u32) -> u32 {
1956    msr_sysreg(rt, 3, 0, 12, 0, 0)
1957}
1958const fn msr_elr_el1(rt: u32) -> u32 {
1959    msr_sysreg(rt, 3, 0, 4, 0, 1)
1960}
1961const fn msr_spsr_el1(rt: u32) -> u32 {
1962    msr_sysreg(rt, 3, 0, 4, 0, 0)
1963}
1964
1965/// MRS Xt, sysreg :: MSR with L=1 (bit 21 set).
1966const fn mrs_sysreg(rt: u32, op0: u32, op1: u32, crn: u32, crm: u32, op2: u32) -> u32 {
1967    msr_sysreg(rt, op0, op1, crn, crm, op2) | (1 << 21)
1968}
1969
1970const fn mrs_mpidr(rt: u32) -> u32 {
1971    mrs_sysreg(rt, 3, 0, 0, 0, 5)
1972}
1973
1974const fn isb() -> u32 {
1975    0xD503_3FDF
1976}
1977
1978const fn wfi() -> u32 {
1979    0xD503_207F
1980}
1981
1982/// LDXR Xt, [Xn] — load-exclusive 64-bit. Sets the local monitor to PA(Xn).
1983const fn ldxr(rt: u32, rn: u32) -> u32 {
1984    0xC85F_7C00 | ((rn & 0x1F) << 5) | (rt & 0x1F)
1985}
1986
1987/// STXR Ws, Xt, [Xn] — store-exclusive 64-bit. Writes Ws=0 on success, 1 on
1988/// monitor mismatch. Always clears the local monitor.
1989const fn stxr(rs: u32, rt: u32, rn: u32) -> u32 {
1990    0xC800_7C00 | ((rs & 0x1F) << 16) | ((rn & 0x1F) << 5) | (rt & 0x1F)
1991}
1992
1993/// CLREX — clear the local monitor unconditionally.
1994#[allow(dead_code)]
1995const fn clrex() -> u32 {
1996    0xD503_3F5F
1997}
1998
1999const fn eret() -> u32 {
2000    0xD69F_03E0
2001}
2002
2003const fn svc_imm(imm16: u32) -> u32 {
2004    0xD400_0001 | ((imm16 & 0xFFFF) << 5)
2005}
2006
2007// === Tests ====================================================================
2008
2009#[cfg(test)]
2010mod tests {
2011    use super::*;
2012
2013    #[test]
2014    fn disassemble_covers_supported_instructions() {
2015        assert_eq!(disassemble(movz(9, 0x4014, 0), 0x4000), "movz x9, #0x4014");
2016        assert_eq!(
2017            disassemble(movz(9, 0x8000, 1), 0x4000),
2018            "movz x9, #0x8000, lsl #16"
2019        );
2020        assert_eq!(disassemble(add_imm(0, 0, 0x1), 0x4000), "add x0, x0, #0x1");
2021        assert_eq!(disassemble(sub_imm(3, 3, 1), 0x4000), "sub x3, x3, #0x1");
2022        assert_eq!(disassemble(add_reg(14, 9, 14), 0x4000), "add x14, x9, x14");
2023        assert_eq!(disassemble(sub_reg(13, 13, 11), 0x4000), "sub x13, x13, x11");
2024        assert_eq!(disassemble(str_imm(0, 1, 0), 0x4000), "str x0, [x1]");
2025        assert_eq!(disassemble(str_imm(0, 1, 1), 0x4000), "str x0, [x1, #8]");
2026        assert_eq!(disassemble(ldr_imm(11, 14, 0), 0x4000), "ldr x11, [x14]");
2027        assert_eq!(disassemble(ldrb_imm(0, 4, 0), 0x4000), "ldrb w0, [x4]");
2028        assert_eq!(disassemble(stp_imm(0, 1, 12, 0), 0x4000), "stp x0, x1, [x12]");
2029        assert_eq!(disassemble(svc_imm(0), 0x4000), "svc #0x0");
2030        assert_eq!(disassemble(eret(), 0x4000), "eret");
2031        assert_eq!(disassemble(wfi(), 0x4000), "wfi");
2032        assert_eq!(disassemble(isb(), 0x4000), "isb sy");
2033        assert_eq!(disassemble(ldxr(5, 4), 0x4000), "ldxr x5, [x4]");
2034        assert_eq!(disassemble(stxr(6, 5, 4), 0x4000), "stxr w6, x5, [x4]");
2035        assert_eq!(disassemble(clrex(), 0x4000), "clrex");
2036        assert_eq!(disassemble(msr_ttbr0(9), 0x4000), "msr ttbr0_el1, x9");
2037        assert_eq!(disassemble(mrs_mpidr(14), 0x4000), "mrs x14, mpidr_el1");
2038        assert_eq!(disassemble(0xD503_42FFu32, 0x4000), "msr DAIFClr, #0x2");
2039        // Branch with PC-relative target.
2040        assert_eq!(disassemble(b_offset(-1), 0x4020), "b 0x401c");
2041        // Unknown instruction.
2042        assert_eq!(disassemble(0xDEAD_BEEF, 0), ".word 0xdeadbeef");
2043    }
2044
2045    #[test]
2046    fn boots_two_cores_at_el2() {
2047        let cpu = Cpu::new();
2048        assert_eq!(cpu.cores.len(), 2);
2049        assert_eq!(cpu.cores[0].current_el, 2);
2050        assert_eq!(cpu.cores[1].current_el, 2);
2051        assert_eq!(cpu.cores[0].pc, ENTRY_PC);
2052        assert_eq!(cpu.cores[1].pc, ENTRY_PC);
2053        assert_eq!(cpu.cores[0].mpidr, 0x8000_0000);
2054        assert_eq!(cpu.cores[1].mpidr, 0x8000_0100);
2055    }
2056
2057    #[test]
2058    fn cores_split_to_different_tasks_before_first_tick() {
2059        // 35-inst kernel, timer at step 80. After 60 steps both cores have
2060        // ERETed: core 0 into task A (silent WFI), core 1 into task B (the
2061        // disk printer that walks "AArch64 disk image - sector 0\n").
2062        let mut cpu = Cpu::new();
2063        cpu.run(60);
2064        let out = cpu.output();
2065        assert!(!out.is_empty(), "no output yet: {:?}", out);
2066        // Only task B prints; output begins with the disk content prefix.
2067        assert!(out.starts_with('A'), "expected disk-content prefix, got: {:?}", out);
2068        assert!(!cpu.cores[0].halted);
2069        assert!(!cpu.cores[1].halted);
2070    }
2071
2072    #[test]
2073    fn tasks_pinned_per_core_after_irqs() {
2074        let mut cpu = Cpu::new();
2075        // Run long enough for ≥ 2 timer ticks. With the minimal scheduler
2076        // each core's task entry stays the same across IRQs.
2077        cpu.run(200);
2078        assert!(cpu.timer_ticks >= 2);
2079        let out = cpu.output();
2080        // Task B (core 1) printed once and parked in WFI. Output is the disk
2081        // content with no task-A interleaving, length capped at 64 bytes.
2082        assert!(out.starts_with('A'), "expected disk-content prefix, got: {:?}", out);
2083        assert!(out.len() <= 64, "task B should print once, got {} bytes", out.len());
2084    }
2085
2086    #[test]
2087    fn each_core_uses_its_own_slot_region() {
2088        let mut cpu = Cpu::new();
2089        cpu.run(400);
2090        let read_u64 = |mem: &[u8], pa: usize| -> u64 {
2091            u64::from_le_bytes(mem[pa..pa + 8].try_into().unwrap())
2092        };
2093        // Core 0's slot is at 0x4F00, core 1's at 0x5000. With pinned tasks
2094        // they stay on their initial entries (A on core 0, B on core 1).
2095        let core0_entry = read_u64(&cpu.mem, 0x4F00);
2096        let core1_entry = read_u64(&cpu.mem, 0x5000);
2097        assert_eq!(core0_entry, 0x4D00, "core 0 should be on task A");
2098        assert_eq!(core1_entry, 0x4E00, "core 1 should be on task B");
2099        // Save-area pointers stay inside their own slot region.
2100        let core0_save = read_u64(&cpu.mem, 0x4F08);
2101        let core1_save = read_u64(&cpu.mem, 0x5008);
2102        assert!((0x4F00..0x5000).contains(&core0_save), "core0 save_ptr escaped: {:#x}", core0_save);
2103        assert!((0x5000..0x6000).contains(&core1_save), "core1 save_ptr escaped: {:#x}", core1_save);
2104    }
2105
2106    #[test]
2107    fn kernel_disk_read_populates_buffer() {
2108        let mut cpu = Cpu::new();
2109        // 35-inst kernel; run 50 to ensure both cores finished disk read.
2110        cpu.run(50);
2111        let buf = &cpu.mem[0x6000..0x6010];
2112        assert_eq!(&buf[..7], b"AArch64");
2113        let snap = cpu.block.snapshot();
2114        assert!(snap.total_reads >= 2, "total_reads = {}", snap.total_reads);
2115        assert_eq!(snap.status, BLK_STATUS_OK);
2116    }
2117
2118    #[test]
2119    fn aic_acks_clear_pending_bits() {
2120        let mut cpu = Cpu::new();
2121        cpu.run(200);
2122        // Many ticks; software has been ACKing each one. After ACK the bit
2123        // clears, so pending should NOT have unbounded accumulation. At any
2124        // sample point either the bit is clear or it was just raised.
2125        let aic_state = cpu.aic.snapshot();
2126        // total_acks should be ≥ timer_ticks * 2 - some_in_flight (both cores
2127        // ACK each tick). Approx check: at least timer_ticks acks happened.
2128        assert!(
2129            aic_state.total_acks >= cpu.timer_ticks,
2130            "total_acks {} < timer_ticks {}",
2131            aic_state.total_acks,
2132            cpu.timer_ticks
2133        );
2134    }
2135
2136    #[test]
2137    fn daif_restored_through_eret() {
2138        let mut cpu = Cpu::new();
2139        cpu.run(5);
2140        assert_eq!(cpu.cores[0].current_el, 1);
2141        assert_eq!(cpu.cores[0].daif, 0xF);
2142        // Remaining kernel boot is 30 more instructions (35 total per core)
2143        // before ERETing into EL0 with SPSR_EL1=0.
2144        cpu.run(30);
2145        assert_eq!(cpu.cores[0].current_el, 0);
2146        assert_eq!(cpu.cores[0].daif, 0);
2147    }
2148
2149    #[test]
2150    fn ap_bits_block_user_access_to_kernel_pages() {
2151        let mut cpu = Cpu::new();
2152        // Boot through kernel so MMU is active and at EL0/EL1 for both cores.
2153        cpu.run(40);
2154        // Core 0 is at EL0 (running task A). Translation of UART (user) should
2155        // succeed.
2156        let r_user = cpu.cores[0].do_translate(&cpu.mem, 0x1000);
2157        assert_eq!(r_user.pa, Some(0x1000));
2158        // Translation of AIC at EL0 should fail with permission fault when
2159        // checked through translate_for_access.
2160        let res = cpu.cores[0].translate_for_access(&cpu.mem, 0x2000);
2161        assert!(res.is_err(), "EL0 access to AIC should fault: {:?}", res);
2162        let err = res.unwrap_err();
2163        assert!(
2164            err.contains("permission fault"),
2165            "expected permission fault, got: {}",
2166            err
2167        );
2168
2169        // Force core 0 into EL1 — kernel can access AIC freely.
2170        cpu.cores[0].current_el = 1;
2171        let res = cpu.cores[0].translate_for_access(&cpu.mem, 0x2000);
2172        assert_eq!(res, Ok(0x2000));
2173    }
2174
2175    #[test]
2176    fn wfi_sleeps_with_irqs_masked() {
2177        // Hand-built program: print 'X', WFI, B-back. With DAIF.I masked the
2178        // timer won't wake us — we test that WFI parks the core indefinitely
2179        // and the rest of the system keeps ticking.
2180        let mut cpu = Cpu::new();
2181        let prog = [
2182            movz(1, UART_OUT as u32, 0),
2183            movz(0, b'X' as u32, 0),
2184            str_imm(0, 1, 0),
2185            wfi(),
2186            b_offset(-1),
2187        ];
2188        write_words(&mut cpu.mem, ENTRY_PC, &prog);
2189        cpu.cores[0].sctlr_el1 = 0;
2190        cpu.cores[0].daif = 0xF; // IRQs masked, WFI is permanent here
2191        cpu.cores[1].halted = true;
2192
2193        cpu.run(20);
2194        assert!(cpu.cores[0].wfi_halted, "WFI not entered");
2195        assert_eq!(cpu.output(), "X");
2196
2197        // The system_steps clock keeps advancing while a core is WFI'd.
2198        let before = cpu.system_steps;
2199        cpu.run(50);
2200        assert!(cpu.system_steps > before, "system_steps didn't advance during WFI");
2201        assert!(cpu.cores[0].wfi_halted, "WFI lifted with IRQs masked?");
2202    }
2203
2204    #[test]
2205    fn ldrb_cbz_sub_imm_loop() {
2206        // Tiny program: count down X0 from 5 to 0 using SUB imm + CBZ.
2207        let mut cpu = Cpu::new();
2208        let prog = [
2209            movz(0, 5, 0),                // X0 = 5
2210            sub_imm(0, 0, 1),             // X0 -= 1
2211            cbnz(0, -1),                  // if X0 != 0, branch back -1 word
2212            b_self(),                     // halt
2213        ];
2214        write_words(&mut cpu.mem, ENTRY_PC, &prog);
2215        cpu.cores[0].sctlr_el1 = 0;
2216        cpu.cores[1].sctlr_el1 = 0;
2217        cpu.run(50);
2218        assert_eq!(cpu.cores[0].x[0], 0);
2219        // LDRB at PA 0x1000 (in real memory or UART range — no UART side effect
2220        // for a load): make sure byte zero-extends into a u64.
2221        cpu.mem[0x100] = 0xAB;
2222        let prog2 = [
2223            movz(1, 0x100, 0),
2224            ldrb_imm(2, 1, 0),
2225            b_self(),
2226        ];
2227        cpu.cores[0].pc = ENTRY_PC;
2228        cpu.cores[0].halted = false;
2229        cpu.cores[0].steps = 0;
2230        write_words(&mut cpu.mem, ENTRY_PC, &prog2);
2231        cpu.run(20);
2232        assert_eq!(cpu.cores[0].x[2], 0xAB);
2233    }
2234
2235    #[test]
2236    fn daifclr_clears_i_bit() {
2237        // Verify the MSR DAIFClr immediate path — start at EL2 with daif=0xF,
2238        // run a hand-built program that does DAIFClr #2 (clear I).
2239        let mut cpu = Cpu::new();
2240        // Encode MSR DAIFClr #2: 0xD503_42FF.
2241        let prog = [0xD503_42FFu32, b_self()];
2242        write_words(&mut cpu.mem, ENTRY_PC, &prog);
2243        cpu.cores[0].sctlr_el1 = 0; // identity-map by bypassing MMU
2244        cpu.cores[0].daif = 0xF;
2245        cpu.run(5);
2246        // DAIFClr #2 = clear bit 1 (I) → daif = 0xD.
2247        assert_eq!(cpu.cores[0].daif, 0xD);
2248    }
2249
2250    #[test]
2251    fn mpidr_is_per_core() {
2252        let mut cpu = Cpu::new();
2253        // Run until both cores enter EL1. After 5 steps the EL2 prologue is done.
2254        cpu.run(5);
2255        // Core 0 reads MPIDR_EL1 (S3_0_C0_C0_5) — using the read_sysreg path directly.
2256        let v0 = cpu.cores[0].read_sysreg((3, 0, 0, 0, 5)).unwrap();
2257        let v1 = cpu.cores[1].read_sysreg((3, 0, 0, 0, 5)).unwrap();
2258        assert_eq!(v0, 0x8000_0000);
2259        assert_eq!(v1, 0x8000_0100);
2260        // Writing MPIDR is forbidden.
2261        assert!(cpu.cores[0].write_sysreg((3, 0, 0, 0, 5), 0).is_err());
2262    }
2263
2264    #[test]
2265    fn translate_after_boot() {
2266        let mut cpu = Cpu::new();
2267        // Run kernel boot prefix on both cores: 5 EL2 + 7 MMU = 12 instructions per core.
2268        cpu.run(12);
2269        // Both cores share the same page tables (same TTBR0 PA).
2270        let r0 = cpu.cores[0].do_translate(&cpu.mem, 0x4000);
2271        let r1 = cpu.cores[1].do_translate(&cpu.mem, 0x4000);
2272        assert_eq!(r0.pa, Some(0x4000));
2273        assert_eq!(r1.pa, Some(0x4000));
2274    }
2275
2276    #[test]
2277    fn movz_then_add() {
2278        let mut cpu = Cpu::new();
2279        // Overwrite memory with our own tiny program that runs at EL2 (no MMU).
2280        let prog = [movz(0, 5, 0), add_imm(0, 0, 7), b_self()];
2281        write_words(&mut cpu.mem, ENTRY_PC, &prog);
2282        // Disable MMU on core 0 so this program can run identity-mapped.
2283        cpu.cores[0].sctlr_el1 = 0;
2284        cpu.run(20);
2285        // Core 0 should have computed X0 = 12 then halted on B .
2286        assert_eq!(cpu.cores[0].x[0], 12);
2287    }
2288
2289    /// Helper for the LL/SC tests below — a tiny program does
2290    /// LDXR/ADD/STXR at PA 0x100 then halts.
2291    fn ll_sc_program() -> [u32; 5] {
2292        [
2293            movz(4, 0x100, 0),       // X4 = &counter
2294            ldxr(5, 4),              // X5 = *X4
2295            add_imm(5, 5, 1),        // X5 += 1
2296            stxr(6, 5, 4),           // try CAS
2297            b_self(),                // halt
2298        ]
2299    }
2300
2301    #[test]
2302    fn stxr_succeeds_after_clean_ldxr() {
2303        let mut cpu = Cpu::new();
2304        write_words(&mut cpu.mem, ENTRY_PC, &ll_sc_program());
2305        cpu.mem[0x100..0x108].copy_from_slice(&41u64.to_le_bytes());
2306        cpu.cores[0].sctlr_el1 = 0;
2307        cpu.cores[1].halted = true;
2308        cpu.run(20);
2309        let cnt = u64::from_le_bytes(cpu.mem[0x100..0x108].try_into().unwrap());
2310        assert_eq!(cnt, 42, "STXR did not commit");
2311        assert_eq!(cpu.cores[0].x[6], 0, "STXR success flag should be 0");
2312    }
2313
2314    #[test]
2315    fn stxr_fails_after_clrex() {
2316        // LDXR; CLREX; STXR — the STXR must fail because CLREX wiped the monitor.
2317        let mut cpu = Cpu::new();
2318        let prog = [
2319            movz(4, 0x100, 0),
2320            ldxr(5, 4),
2321            clrex(),
2322            stxr(6, 5, 4),
2323            b_self(),
2324        ];
2325        write_words(&mut cpu.mem, ENTRY_PC, &prog);
2326        cpu.mem[0x100..0x108].copy_from_slice(&7u64.to_le_bytes());
2327        cpu.cores[0].sctlr_el1 = 0;
2328        cpu.cores[1].halted = true;
2329        cpu.run(20);
2330        let cnt = u64::from_le_bytes(cpu.mem[0x100..0x108].try_into().unwrap());
2331        assert_eq!(cnt, 7, "STXR after CLREX must NOT commit");
2332        assert_eq!(cpu.cores[0].x[6], 1, "STXR fail flag should be 1");
2333    }
2334
2335    #[test]
2336    fn irq_clears_local_monitor() {
2337        // After LDXR the monitor is set; IRQ entry must clear it so a later
2338        // STXR fails. We simulate by calling take_irq() directly.
2339        let mut cpu = Cpu::new();
2340        cpu.cores[0].exclusive_monitor = Some(0x100);
2341        cpu.cores[0].vbar_el1 = 0x4400;
2342        cpu.cores[0].take_irq();
2343        assert_eq!(cpu.cores[0].exclusive_monitor, None);
2344    }
2345
2346    #[test]
2347    fn cross_core_store_invalidates_monitor() {
2348        // Core 0 reserves PA 0x100. Core 1 stores to 0x100. Core 0's
2349        // subsequent STXR must fail and the counter must reflect core 1's
2350        // store, not core 0's.
2351        let mut cpu = Cpu::new();
2352        cpu.mem[0x100..0x108].copy_from_slice(&100u64.to_le_bytes());
2353        cpu.cores[0].sctlr_el1 = 0;
2354        cpu.cores[1].sctlr_el1 = 0;
2355        // Core 0: LDXR; B-here (busy-wait until core 1 stores); STXR; halt.
2356        // We sequence the cores by setting them up at different PCs and
2357        // stepping each manually.
2358        let core0_prog = [
2359            movz(4, 0x100, 0),
2360            ldxr(5, 4),
2361            add_imm(5, 5, 1),
2362            // Pause here — caller advances core 1 first.
2363            b_self(),
2364            stxr(6, 5, 4),
2365            b_self(),
2366        ];
2367        let core1_prog = [
2368            movz(4, 0x100, 0),
2369            movz(5, 999, 0),
2370            str_imm(5, 4, 0),
2371            b_self(),
2372        ];
2373        const C0_BASE: u64 = ENTRY_PC;
2374        const C1_BASE: u64 = ENTRY_PC + 0x80;
2375        write_words(&mut cpu.mem, C0_BASE, &core0_prog);
2376        write_words(&mut cpu.mem, C1_BASE, &core1_prog);
2377        cpu.cores[1].pc = C1_BASE;
2378
2379        // Step core 0 through MOVZ + LDXR + ADD; reservation now held.
2380        for _ in 0..3 {
2381            cpu.step_core(0);
2382        }
2383        assert_eq!(cpu.cores[0].exclusive_monitor, Some(0x100));
2384
2385        // Step core 1 through MOVZ + MOVZ + STR — its store invalidates
2386        // core 0's monitor.
2387        for _ in 0..3 {
2388            cpu.step_core(1);
2389        }
2390        assert_eq!(cpu.cores[0].exclusive_monitor, None,
2391            "core 1's store should have wiped core 0's reservation");
2392
2393        // Now run core 0's STXR — must fail (W6 = 1) and leave counter at 999.
2394        cpu.cores[0].pc = C0_BASE + 4 * 4; // skip the b_self
2395        cpu.step_core(0);
2396        let cnt = u64::from_le_bytes(cpu.mem[0x100..0x108].try_into().unwrap());
2397        assert_eq!(cnt, 999, "core 1's value must survive — STXR failed");
2398        assert_eq!(cpu.cores[0].x[6], 1, "STXR fail flag should be 1");
2399    }
2400
2401    #[test]
2402    fn task_a_atomic_counter_increments() {
2403        // Boot the demo and let task A churn for many cycles; the atomic
2404        // counter at 0x6FF8 must be strictly increasing.
2405        let mut cpu = Cpu::new();
2406        cpu.run(2000);
2407        let v1 = cpu.atomic_counter();
2408        cpu.run(2000);
2409        let v2 = cpu.atomic_counter();
2410        assert!(v1 > 0, "task A's counter never advanced past zero");
2411        assert!(v2 > v1, "counter stalled: {v1} -> {v2}");
2412    }
2413
2414    #[test]
2415    fn aic_ipi_raises_target_only() {
2416        // Direct AIC.mmio_write to IPI_SET should raise pending on the
2417        // target core only, and bump total_ipis / last_ipi_target.
2418        let mut aic = Aic::new();
2419        aic.mmio_write(0, AIC_REG_IPI_SET, 1);
2420        assert!(aic.has_pending(1), "IPI did not raise pending on core 1");
2421        assert!(!aic.has_pending(0), "IPI leaked onto sender");
2422        let snap = aic.snapshot();
2423        assert_eq!(snap.total_ipis, 1);
2424        assert_eq!(snap.last_ipi_target, Some(1));
2425        // ACK from core 1 returns IRQ_IPI and clears the bit.
2426        let id = aic.read_ack(1);
2427        assert_eq!(id, IRQ_IPI);
2428        assert!(!aic.has_pending(1));
2429    }
2430
2431    #[test]
2432    fn aic_ipi_to_invalid_core_is_dropped() {
2433        let mut aic = Aic::new();
2434        aic.mmio_write(0, AIC_REG_IPI_SET, 99);
2435        assert_eq!(aic.snapshot().total_ipis, 0);
2436        assert!(aic.snapshot().last_ipi_target.is_none());
2437    }
2438
2439    #[test]
2440    fn task_a_svc_dispatches_ipis() {
2441        // End-to-end demo run: task A's SVC #0 handler should write to
2442        // AIC_REG_IPI_SET and the AIC counter should grow.
2443        let mut cpu = Cpu::new();
2444        cpu.run(3000);
2445        let snap = cpu.aic.snapshot();
2446        assert!(snap.total_ipis > 0, "no IPIs dispatched after 3000 steps");
2447        assert_eq!(snap.last_ipi_target, Some(1), "expected IPIs to target core 1");
2448    }
2449
2450    #[test]
2451    fn ipi_wakes_a_wfi_d_core() {
2452        // Park core 1 in WFI with IRQs unmasked (DAIF=0). Have core 0 send
2453        // an IPI directly via the AIC. Core 1 must take the IRQ.
2454        let mut cpu = Cpu::new();
2455        // Boot far enough that core 1 is parked in task B's WFI.
2456        cpu.run(500);
2457        assert!(cpu.cores[1].wfi_halted, "core 1 should be parked in WFI");
2458        let pc_before = cpu.cores[1].pc;
2459        cpu.aic.mmio_write(0, AIC_REG_IPI_SET, 1);
2460        // One step is enough for take_irq to fire on core 1.
2461        cpu.step();
2462        assert!(!cpu.cores[1].wfi_halted, "WFI not lifted by IPI");
2463        assert_ne!(cpu.cores[1].pc, pc_before, "core 1 PC unchanged after IPI");
2464    }
2465}