Skip to main content

esp_p4_eth/
dma.rs

1//! DMA helpers for descriptor ownership, cache synchronization and interrupt status.
2//!
3//! ESP32-P4 uses a write-back data cache, so DMA-visible descriptor and packet
4//! memory must be explicitly flushed before ownership is handed to hardware and
5//! invalidated before the CPU reads data produced by DMA.
6
7#[cfg(target_arch = "riscv32")]
8use core::sync::atomic::{compiler_fence, fence, Ordering};
9
10use crate::{descriptors::BUF_SIZE, regs, RDes, TDes};
11
12/// Cache line granularity used to compute aligned ranges for ROM cache ops.
13/// Set to the wider of the two P4 cache lines (L2 = 128 byte), so a single
14/// `Cache_Invalidate_Addr` never operates on a partial L2 line that happens
15/// to share storage with an unrelated descriptor / buffer.
16const CACHE_LINE_SIZE: usize = 128;
17const CACHE_LINE_MASK: usize = CACHE_LINE_SIZE - 1;
18
19// Cache map bits per ESP32-P4 ROM cache.h. The ROM helpers must be invoked
20// ONCE PER LEVEL — passing a combined `L1|L2` bitmask returns success (0) but
21// silently no-ops in practice, leaving descriptors invisible to DMA. IDF's
22// `cache_ll_l2_writeback_cache_addr` calls L2 only for HP SRAM; we tried
23// L2-only here on 2026-04-28 and it broke the data plane (DMA read garbage,
24// `regs::read(MAC.CONFIG)` after `start()` hung the AHB), so on this boot
25// path (espflash --ram --no-stub) HP SRAM also has live L1 D-cache state
26// that needs flushing. Both maps are kept.
27#[cfg(target_arch = "riscv32")]
28const CACHE_MAP_L1_DCACHE: u32 = 1 << 4;
29#[cfg(target_arch = "riscv32")]
30const CACHE_MAP_L2_CACHE: u32 = 1 << 5;
31
32// ROM-resident cache helpers exported by the ESP32-P4 boot ROM. Addresses come from
33// `components/esp_rom/esp32p4/ld/esp32p4.rom.ld` in ESP-IDF. Signature is
34// `int fn(uint32_t map, uint32_t addr, uint32_t size)` returning 0 on success and 1 if the
35// address is outside the cacheable address room (in which case the ROM silently does nothing).
36//
37// We prefer the ROM helpers over driving the cache MMIO directly because they take care of
38// autoload suspend, line alignment and atomic L1/L2 ordering on our behalf.
39#[cfg(target_arch = "riscv32")]
40type RomCacheOp = unsafe extern "C" fn(map: u32, addr: u32, size: u32) -> i32;
41#[cfg(target_arch = "riscv32")]
42const ROM_CACHE_INVALIDATE_ADDR: usize = 0x4FC0_03E4;
43#[cfg(target_arch = "riscv32")]
44const ROM_CACHE_INVALIDATE_ALL: usize = 0x4FC0_0404;
45#[cfg(target_arch = "riscv32")]
46const ROM_CACHE_WRITEBACK_ALL: usize = 0x4FC0_0414;
47#[cfg(target_arch = "riscv32")]
48type RomCacheAll = unsafe extern "C" fn(map: u32) -> i32;
49
50// `Cache_Set_L2_Cache_Mode(size, ways, line_size)` — must be called once
51// before any other L2 cache ROM helper after a CPU reset, because the L2
52// controller's internal mode/ways/line-size shadow registers are NOT
53// re-initialised by a warm reset. IDF runs this from `cache_hal_init`
54// in the second-stage bootloader; our `--ram --no-stub` boot path skips
55// it, which is the strongest theory for the intermittent
56// `Cache_WriteBack_All` hang observed 2026-04-28.
57#[cfg(target_arch = "riscv32")]
58const ROM_CACHE_SET_L2_CACHE_MODE: usize = 0x4FC0_03D4;
59#[cfg(target_arch = "riscv32")]
60type RomCacheSetL2Mode = unsafe extern "C" fn(size: u32, ways: u32, line_size: u32);
61
62// Enum values from `components/esp_rom/esp32p4/include/esp32p4/rom/cache.h`.
63// IDF default sdkconfig for ESP32-P4: 256 KB / 8-way / 64-byte line.
64#[cfg(target_arch = "riscv32")]
65const CACHE_SIZE_256K: u32 = 10;
66#[cfg(target_arch = "riscv32")]
67const CACHE_8WAYS_ASSOC: u32 = 2;
68#[cfg(target_arch = "riscv32")]
69const CACHE_LINE_SIZE_64B: u32 = 3;
70
71#[derive(Clone, Copy, Debug, Eq, PartialEq)]
72struct CacheAlignedRange {
73    start: usize,
74    size: usize,
75}
76
77const DMA_RESET_TIMEOUT_POLLS: usize = 1_000;
78const DMA_BURST_LENGTH: u32 = 8;
79
80/// DMA-level errors surfaced during initialization.
81#[derive(Clone, Copy, Debug, Eq, PartialEq)]
82pub enum DmaError {
83    /// Software reset bit did not clear before the polling timeout expired.
84    ResetTimeout,
85}
86
87/// Decoded snapshot of the DMA status register.
88#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
89pub struct DmaInterruptStatus {
90    raw: u32,
91}
92
93impl DmaInterruptStatus {
94    /// Creates a status view from the raw DMA status register value.
95    pub const fn from_raw(raw: u32) -> Self {
96        Self { raw }
97    }
98
99    /// Returns the original register value.
100    pub const fn raw(self) -> u32 {
101        self.raw
102    }
103
104    /// Returns `true` when a transmit completion interrupt is pending.
105    pub const fn has_tx_interrupt(self) -> bool {
106        self.raw & regs::bits::dmastatus::TI != 0
107    }
108
109    /// Returns `true` when TX ran out of descriptors or buffers.
110    pub const fn has_tx_buffer_unavailable(self) -> bool {
111        self.raw & regs::bits::dmastatus::TU != 0
112    }
113
114    /// Returns `true` when the TX DMA state machine entered the stopped state.
115    pub const fn has_tx_process_stopped(self) -> bool {
116        self.raw & regs::bits::dmastatus::TPS != 0
117    }
118
119    /// Returns `true` when the MAC reported a transmit jabber timeout.
120    pub const fn has_tx_jabber_timeout(self) -> bool {
121        self.raw & regs::bits::dmastatus::TJT != 0
122    }
123
124    /// Returns `true` when RX FIFO overflow was reported.
125    pub const fn has_rx_overflow(self) -> bool {
126        self.raw & regs::bits::dmastatus::OVF != 0
127    }
128
129    /// Returns `true` when TX underflow was reported.
130    pub const fn has_tx_underflow(self) -> bool {
131        self.raw & regs::bits::dmastatus::UNF != 0
132    }
133
134    /// Returns `true` when a receive completion interrupt is pending.
135    pub const fn has_rx_interrupt(self) -> bool {
136        self.raw & regs::bits::dmastatus::RI != 0
137    }
138
139    /// Returns `true` when RX ran out of available descriptors or buffers.
140    pub const fn has_rx_buffer_unavailable(self) -> bool {
141        self.raw & regs::bits::dmastatus::RU != 0
142    }
143
144    /// Returns `true` when the RX DMA state machine entered the stopped state.
145    pub const fn has_rx_process_stopped(self) -> bool {
146        self.raw & regs::bits::dmastatus::RPS != 0
147    }
148
149    /// Returns `true` when RX watchdog timeout was reported.
150    pub const fn has_rx_watchdog_timeout(self) -> bool {
151        self.raw & regs::bits::dmastatus::RWT != 0
152    }
153
154    /// Returns `true` when an early-transmit interrupt is pending.
155    pub const fn has_early_transmit_interrupt(self) -> bool {
156        self.raw & regs::bits::dmastatus::ETI != 0
157    }
158
159    /// Returns `true` when a fatal bus error was reported.
160    pub const fn has_fatal_bus_error(self) -> bool {
161        self.raw & regs::bits::dmastatus::FBI != 0
162    }
163
164    /// Returns `true` when an early-receive interrupt is pending.
165    pub const fn has_early_receive_interrupt(self) -> bool {
166        self.raw & regs::bits::dmastatus::ERI != 0
167    }
168
169    /// Returns `true` when the normal interrupt summary bit is set.
170    pub const fn is_normal_summary(self) -> bool {
171        self.raw & regs::bits::dmastatus::NIS != 0
172    }
173
174    /// Returns `true` when the abnormal interrupt summary bit is set.
175    pub const fn is_abnormal_summary(self) -> bool {
176        self.raw & regs::bits::dmastatus::AIS != 0
177    }
178
179    /// Returns the encoded DMA bus error subtype.
180    pub const fn error_bits(self) -> u32 {
181        (self.raw & regs::bits::dmastatus::EB_MASK) >> regs::bits::dmastatus::EB_SHIFT
182    }
183
184    /// Returns `true` when the RX path should be nudged after handling this status.
185    pub const fn should_kick_rx(self) -> bool {
186        self.has_rx_interrupt()
187            || self.has_early_receive_interrupt()
188            || self.has_rx_overflow()
189            || self.has_rx_buffer_unavailable()
190            || self.has_rx_process_stopped()
191            || self.has_rx_watchdog_timeout()
192    }
193
194    /// Returns `true` when the TX path should be nudged after handling this status.
195    pub const fn should_kick_tx(self) -> bool {
196        self.has_tx_interrupt()
197            || self.has_tx_buffer_unavailable()
198            || self.has_tx_underflow()
199            || self.has_tx_process_stopped()
200            || self.has_tx_jabber_timeout()
201            || self.has_early_transmit_interrupt()
202    }
203
204    /// Returns the sticky status bits that must be written back to clear this snapshot.
205    pub const fn clear_mask(self) -> u32 {
206        self.raw
207            & (regs::bits::dmastatus::TI
208                | regs::bits::dmastatus::TPS
209                | regs::bits::dmastatus::TU
210                | regs::bits::dmastatus::TJT
211                | regs::bits::dmastatus::OVF
212                | regs::bits::dmastatus::UNF
213                | regs::bits::dmastatus::RI
214                | regs::bits::dmastatus::RU
215                | regs::bits::dmastatus::RPS
216                | regs::bits::dmastatus::RWT
217                | regs::bits::dmastatus::ETI
218                | regs::bits::dmastatus::FBI
219                | regs::bits::dmastatus::ERI
220                | regs::bits::dmastatus::AIS
221                | regs::bits::dmastatus::NIS)
222    }
223}
224
225/// Safe entry points for cache sync, DMA setup and interrupt bookkeeping.
226pub struct Dma;
227
228impl Dma {
229    /// Resets the DMA engine and waits until the reset bit self-clears.
230    pub fn dma_reset() -> Result<(), DmaError> {
231        regs::write(regs::dma::BUS_MODE, regs::bits::dmabusmode::SWR);
232        #[cfg(test)]
233        regs::write(regs::dma::BUS_MODE, 0);
234        wait_for_dma_reset_clear(|| regs::read(regs::dma::BUS_MODE))
235    }
236
237    /// Applies baseline DMA bus mode, op mode and interrupt mask configuration.
238    pub fn dma_init() {
239        regs::write(regs::dma::BUS_MODE, dma_bus_mode_value(DMA_BURST_LENGTH));
240        regs::write(regs::dma::OP_MODE, dma_op_mode_value());
241        regs::write(regs::dma::INT_EN, dma_interrupt_enable_value());
242    }
243
244    /// Programs the TX and RX descriptor base addresses.
245    pub fn set_descriptor_lists(tx_descriptors: &[TDes], rx_descriptors: &[RDes]) {
246        regs::write(
247            regs::dma::TX_DESC_LIST,
248            tx_descriptors.as_ptr() as usize as u32,
249        );
250        regs::write(
251            regs::dma::RX_DESC_LIST,
252            rx_descriptors.as_ptr() as usize as u32,
253        );
254    }
255
256    /// Nudges the DMA engine to resume transmit descriptor processing.
257    pub fn demand_tx_poll() {
258        regs::write(regs::dma::TX_POLL_DEMAND, 1);
259    }
260
261    /// Nudges the DMA engine to resume receive descriptor processing.
262    pub fn demand_rx_poll() {
263        regs::write(regs::dma::RX_POLL_DEMAND, 1);
264    }
265
266    /// Reads the current DMA interrupt/status register.
267    pub fn read_interrupt_status() -> DmaInterruptStatus {
268        DmaInterruptStatus::from_raw(regs::read(regs::dma::STATUS))
269    }
270
271    /// Clears all sticky interrupt bits present in `status`.
272    pub fn clear_interrupt_status(status: DmaInterruptStatus) {
273        let clear_mask = status.clear_mask();
274        if clear_mask != 0 {
275            regs::write(regs::dma::STATUS, clear_mask);
276        }
277    }
278
279    /// Disables all DMA interrupt sources.
280    pub fn disable_interrupts() {
281        regs::write(regs::dma::INT_EN, 0);
282    }
283
284    /// Invalidates cache lines covering one descriptor object.
285    pub fn invalidate_descriptor<T>(descriptor: &T) {
286        // SAFETY: `descriptor` is a live shared reference, so the pointed-to memory is valid
287        // for reads for `size_of::<T>()` bytes. The cache operation does not create aliases or
288        // outlive the reference; it only synchronizes cache lines covering this object.
289        unsafe {
290            Self::cache_invalidate(
291                descriptor as *const T as *const u8,
292                core::mem::size_of::<T>(),
293            );
294        }
295    }
296
297    /// Invalidates the used prefix of a DMA packet buffer.
298    pub fn invalidate_buffer_prefix(buffer: &[u8; BUF_SIZE], used: usize) {
299        let used = used.min(BUF_SIZE);
300        // SAFETY: `buffer` is a live reference to a fixed-size DMA buffer. Invalidating at most
301        // `BUF_SIZE` bytes starting from `buffer.as_ptr()` stays within that allocation.
302        unsafe {
303            Self::cache_invalidate(buffer.as_ptr(), used);
304        }
305    }
306
307    /// Debug helper: invalidate cache and read the 16 bytes (rdes0..rdes3)
308    /// of an RX descriptor at the given absolute address. Use to peek at DMA
309    /// state from diagnostic code that lives outside the EMAC Mutex.
310    ///
311    /// # Safety
312    /// `addr` must point to live RX descriptor memory.
313    pub unsafe fn peek_rdes(addr: u32) -> [u32; 4] {
314        Self::cache_invalidate(addr as *const u8, 16);
315        let p = addr as *const u32;
316        [
317            core::ptr::read_volatile(p),
318            core::ptr::read_volatile(p.offset(1)),
319            core::ptr::read_volatile(p.offset(2)),
320            core::ptr::read_volatile(p.offset(3)),
321        ]
322    }
323
324    /// Debug helper: read raw 16 bytes WITHOUT invalidating cache. Returns the
325    /// CPU-side cache view (so this should match the snapshot the writer just
326    /// stored). Compare against [`Dma::peek_rdes`] to tell whether
327    /// `cache_invalidate` is dropping CPU dirty lines (cache holds writes,
328    /// memory does not).
329    ///
330    /// # Safety
331    /// `addr` must point to live RX descriptor memory.
332    pub unsafe fn peek_rdes_cached(addr: u32) -> [u32; 4] {
333        let p = addr as *const u32;
334        [
335            core::ptr::read_volatile(p),
336            core::ptr::read_volatile(p.offset(1)),
337            core::ptr::read_volatile(p.offset(2)),
338            core::ptr::read_volatile(p.offset(3)),
339        ]
340    }
341
342    /// # Safety
343    ///
344    /// `addr..addr+size` must describe a valid memory range owned by the caller for the full
345    /// duration of the cache operation. The pointed region must be DMA-visible memory.
346    pub unsafe fn cache_writeback(addr: *const u8, size: usize) {
347        let Some(range) = aligned_range(addr, size) else {
348            return;
349        };
350
351        sync_cache_region(CacheOp::Writeback, range);
352    }
353
354    /// # Safety
355    ///
356    /// `addr..addr+size` must describe a valid memory range owned by the caller for the full
357    /// duration of the cache operation. Any Rust references into that region must not be used to
358    /// observe stale cached contents across the invalidation boundary.
359    pub unsafe fn cache_invalidate(addr: *const u8, size: usize) {
360        let Some(range) = aligned_range(addr, size) else {
361            return;
362        };
363
364        sync_cache_region(CacheOp::Invalidate, range);
365    }
366
367    /// Flushes one descriptor object out of cache into DMA-visible memory.
368    pub fn flush_descriptor<T>(descriptor: &T) {
369        // SAFETY: `descriptor` is a valid reference, and flushing its exact byte range does not
370        // mutate Rust-visible state; it only makes CPU writes observable to DMA.
371        unsafe {
372            Self::cache_writeback(
373                descriptor as *const T as *const u8,
374                core::mem::size_of::<T>(),
375            );
376        }
377    }
378
379    /// Flushes a full packet buffer out of cache into DMA-visible memory.
380    pub fn flush_buffer(buffer: &[u8; BUF_SIZE]) {
381        // SAFETY: `buffer` is a valid reference to a DMA backing allocation. Flushing `BUF_SIZE`
382        // bytes from its base pointer remains within the buffer and only synchronizes caches.
383        unsafe {
384            Self::cache_writeback(buffer.as_ptr(), BUF_SIZE);
385        }
386    }
387
388    /// Initialises the ESP32-P4 L2 cache controller mode shadow registers
389    /// the way IDF's `cache_hal_init` does at second-stage bootloader.
390    /// Must be called once at driver init **before** any
391    /// `Cache_WriteBack_*` / `Cache_Invalidate_*` ROM helper, otherwise
392    /// the helpers walk the L2 with stale `mode`/`ways`/`line_size`
393    /// state inherited from a previous binary and intermittently hang
394    /// the AHB bus. After the mode is set, a `Cache_Invalidate_All`
395    /// drops any stale dirty lines so subsequent writebacks have nothing
396    /// stale to chase.
397    ///
398    /// No-op on host targets.
399    pub fn init_l2_cache_mode() {
400        #[cfg(target_arch = "riscv32")]
401        unsafe {
402            let set_mode: RomCacheSetL2Mode =
403                core::mem::transmute(ROM_CACHE_SET_L2_CACHE_MODE);
404            set_mode(CACHE_SIZE_256K, CACHE_8WAYS_ASSOC, CACHE_LINE_SIZE_64B);
405            let inv_all: RomCacheAll = core::mem::transmute(ROM_CACHE_INVALIDATE_ALL);
406            let _ = inv_all(CACHE_MAP_L2_CACHE);
407        }
408    }
409}
410
411fn aligned_range(addr: *const u8, size: usize) -> Option<CacheAlignedRange> {
412    if size == 0 {
413        return None;
414    }
415
416    let start = (addr as usize) & !CACHE_LINE_MASK;
417    let end = (addr as usize)
418        .checked_add(size)
419        .expect("cache sync range overflow")
420        .checked_add(CACHE_LINE_MASK)
421        .expect("cache sync range overflow")
422        & !CACHE_LINE_MASK;
423
424    Some(CacheAlignedRange {
425        start,
426        size: end - start,
427    })
428}
429
430fn wait_for_dma_reset_clear<F>(mut read_bus_mode: F) -> Result<(), DmaError>
431where
432    F: FnMut() -> u32,
433{
434    for _ in 0..DMA_RESET_TIMEOUT_POLLS {
435        if read_bus_mode() & regs::bits::dmabusmode::SWR == 0 {
436            return Ok(());
437        }
438    }
439
440    Err(DmaError::ResetTimeout)
441}
442
443fn dma_bus_mode_value(pbl: u32) -> u32 {
444    let pbl = pbl & 0x3f;
445
446    regs::bits::dmabusmode::AAL
447        | regs::bits::dmabusmode::FB
448        | regs::bits::dmabusmode::USP
449        | (pbl << regs::bits::dmabusmode::PBL_SHIFT)
450        | (pbl << regs::bits::dmabusmode::RPBL_SHIFT)
451}
452
453fn dma_op_mode_value() -> u32 {
454    // RSF intentionally NOT set: P4 EMAC RX FIFO can't hold full frames, so
455    // store-and-forward silently drops anything > ~256 bytes wire size. We use
456    // cut-through receive (RTC threshold 64 bytes by default).
457    regs::bits::dmaopmode::TSF | regs::bits::dmaopmode::FUF
458}
459
460fn dma_interrupt_enable_value() -> u32 {
461    regs::bits::dmainten::TIE
462        | regs::bits::dmainten::TSE
463        | regs::bits::dmainten::TUE
464        | regs::bits::dmainten::TJE
465        | regs::bits::dmainten::OVE
466        | regs::bits::dmainten::UNE
467        | regs::bits::dmainten::RIE
468        | regs::bits::dmainten::RUE
469        | regs::bits::dmainten::RSE
470        | regs::bits::dmainten::RWE
471        | regs::bits::dmainten::ETE
472        | regs::bits::dmainten::FBE
473        | regs::bits::dmainten::ERE
474        | regs::bits::dmainten::AIE
475        | regs::bits::dmainten::NIE
476}
477
478/// Cache operations that DMA helpers can request. Maps onto the two ROM helpers
479/// `Cache_WriteBack_Addr` and `Cache_Invalidate_Addr`.
480#[derive(Clone, Copy, Debug, Eq, PartialEq)]
481enum CacheOp {
482    Writeback,
483    Invalidate,
484}
485
486/// Last return code from a Cache_WriteBack_Addr ROM call. 0 means success per
487/// IDF rom/cache.h. Anything else is an error path we'd otherwise miss.
488#[cfg(target_arch = "riscv32")]
489pub static LAST_WRITEBACK_RC: core::sync::atomic::AtomicI32 =
490    core::sync::atomic::AtomicI32::new(-999);
491/// Last return code from a Cache_Invalidate_Addr ROM call.
492#[cfg(target_arch = "riscv32")]
493pub static LAST_INVALIDATE_RC: core::sync::atomic::AtomicI32 =
494    core::sync::atomic::AtomicI32::new(-999);
495
496// --- Wall-clock instrumentation for cache ROM calls --------------------------
497//
498// SYSTIMER ticks at 16 MHz (62.5 ns per tick). Counters wrap at 2^32 ticks
499// ≈ 268 s, so per-second deltas via `wrapping_sub` are safe. Calls counters
500// are u32 incremented per ROM call. Atomics are unconditional (16 bytes
501// total) so host tests can exercise the recording path; on host
502// `sync_cache_region` is a no-op so they stay at zero in production.
503pub static CACHE_INV_TICKS: core::sync::atomic::AtomicU32 =
504    core::sync::atomic::AtomicU32::new(0);
505pub static CACHE_INV_CALLS: core::sync::atomic::AtomicU32 =
506    core::sync::atomic::AtomicU32::new(0);
507pub static CACHE_WB_TICKS: core::sync::atomic::AtomicU32 =
508    core::sync::atomic::AtomicU32::new(0);
509pub static CACHE_WB_CALLS: core::sync::atomic::AtomicU32 =
510    core::sync::atomic::AtomicU32::new(0);
511
512/// Adds a per-call (ticks, calls += 1) sample for one ROM cache op. Extracted
513/// from `sync_cache_region` so the recording path is testable without a real
514/// SYSTIMER or ROM table on the host target.
515#[allow(dead_code)]
516#[inline]
517fn record_cache_call(op: CacheOp, dt_ticks: u32) {
518    use core::sync::atomic::Ordering::Relaxed;
519    match op {
520        CacheOp::Writeback => {
521            CACHE_WB_TICKS.fetch_add(dt_ticks, Relaxed);
522            CACHE_WB_CALLS.fetch_add(1, Relaxed);
523        }
524        CacheOp::Invalidate => {
525            CACHE_INV_TICKS.fetch_add(dt_ticks, Relaxed);
526            CACHE_INV_CALLS.fetch_add(1, Relaxed);
527        }
528    }
529}
530
531#[inline]
532fn sync_cache_region(_op: CacheOp, _range: CacheAlignedRange) {
533    #[cfg(target_arch = "riscv32")]
534    {
535        // SYSTIMER tick timestamps before/after the ROM call. We sample the
536        // 32-bit low word only — at 16 MHz it wraps every ~268 s, but each
537        // cache call is microseconds so wrapping_sub gives the correct delta.
538        let t0 = crate::systimer::now_ticks() as u32;
539        // Hold a critical_section across the ROM call so a SYSTIMER (or
540        // EMAC) interrupt cannot trap the CPU while the cache controller
541        // is mid-transaction. 2026-04-28 stress confirmed CS suppression
542        // buys ~5 percentage points of reliability on warm reboots.
543        let rc = critical_section::with(|_| unsafe {
544            fence(Ordering::SeqCst);
545            compiler_fence(Ordering::SeqCst);
546            let rc = match _op {
547                CacheOp::Writeback => {
548                    // _Addr writeback variant returns success but does NOT
549                    // actually flush data to RAM on `--ram --no-stub` boots
550                    // (verified 2026-04-26 + 2026-04-28: with _Addr-only
551                    // path the data plane breaks — `regs::read(MAC_CONFIG)`
552                    // after `start()` hangs the AHB because the DMA read
553                    // garbage). Use `_All` for both L1 and L2 so the
554                    // descriptors actually become DMA-visible.
555                    let f: RomCacheAll = core::mem::transmute(ROM_CACHE_WRITEBACK_ALL);
556                    let rc1 = f(CACHE_MAP_L1_DCACHE);
557                    let rc2 = f(CACHE_MAP_L2_CACHE);
558                    if rc1 != 0 {
559                        rc1
560                    } else {
561                        rc2
562                    }
563                }
564                CacheOp::Invalidate => {
565                    let f: RomCacheOp = core::mem::transmute(ROM_CACHE_INVALIDATE_ADDR);
566                    let rc1 = f(CACHE_MAP_L1_DCACHE, _range.start as u32, _range.size as u32);
567                    let rc2 = f(CACHE_MAP_L2_CACHE, _range.start as u32, _range.size as u32);
568                    if rc1 != 0 {
569                        rc1
570                    } else {
571                        rc2
572                    }
573                }
574            };
575            compiler_fence(Ordering::SeqCst);
576            fence(Ordering::SeqCst);
577            rc
578        });
579        let t1 = crate::systimer::now_ticks() as u32;
580        let dt = t1.wrapping_sub(t0);
581        match _op {
582            CacheOp::Writeback => {
583                LAST_WRITEBACK_RC.store(rc, core::sync::atomic::Ordering::Relaxed);
584            }
585            CacheOp::Invalidate => {
586                LAST_INVALIDATE_RC.store(rc, core::sync::atomic::Ordering::Relaxed);
587            }
588        }
589        record_cache_call(_op, dt);
590    }
591}
592
593#[cfg(test)]
594mod tests {
595    use super::{
596        aligned_range, dma_bus_mode_value, dma_interrupt_enable_value, dma_op_mode_value,
597        record_cache_call, wait_for_dma_reset_clear, CacheAlignedRange, CacheOp, Dma, DmaError,
598        DmaInterruptStatus, CACHE_INV_CALLS, CACHE_INV_TICKS, CACHE_LINE_SIZE, CACHE_WB_CALLS,
599        CACHE_WB_TICKS, DMA_BURST_LENGTH, DMA_RESET_TIMEOUT_POLLS,
600    };
601    use crate::{regs, zeroed_rx_descriptors, zeroed_tx_descriptors};
602    use core::sync::atomic::Ordering;
603    use std::sync::Mutex;
604
605    /// Cache counters live in `static`s so parallel tests would race. Tests
606    /// that touch them must take this lock, then call [`reset_cache_counters`]
607    /// before observing.
608    static CACHE_COUNTER_LOCK: Mutex<()> = Mutex::new(());
609
610    fn reset_cache_counters() {
611        CACHE_INV_TICKS.store(0, Ordering::Relaxed);
612        CACHE_INV_CALLS.store(0, Ordering::Relaxed);
613        CACHE_WB_TICKS.store(0, Ordering::Relaxed);
614        CACHE_WB_CALLS.store(0, Ordering::Relaxed);
615    }
616
617    #[test]
618    fn zero_size_range_is_skipped() {
619        assert_eq!(aligned_range(0x2000 as *const u8, 0), None);
620    }
621
622    #[test]
623    fn aligned_range_is_left_unchanged() {
624        assert_eq!(
625            aligned_range(0x2000 as *const u8, CACHE_LINE_SIZE),
626            Some(CacheAlignedRange {
627                start: 0x2000,
628                size: CACHE_LINE_SIZE,
629            })
630        );
631    }
632
633    #[test]
634    fn unaligned_range_is_expanded_to_cache_lines() {
635        // 0x2033..0x2083 expanded to 128-byte alignment → 0x2000..0x2100.
636        assert_eq!(
637            aligned_range(0x2033 as *const u8, 80),
638            Some(CacheAlignedRange {
639                start: 0x2000,
640                size: 256,
641            })
642        );
643    }
644
645    #[test]
646    fn dma_reset_succeeds_when_swr_clears() {
647        let mut polls = 0usize;
648        let result = wait_for_dma_reset_clear(|| {
649            polls += 1;
650            if polls < 3 {
651                regs::bits::dmabusmode::SWR
652            } else {
653                0
654            }
655        });
656
657        assert_eq!(result, Ok(()));
658        assert_eq!(polls, 3);
659    }
660
661    #[test]
662    fn dma_reset_times_out_when_swr_stays_set() {
663        let mut polls = 0usize;
664        let result = wait_for_dma_reset_clear(|| {
665            polls += 1;
666            regs::bits::dmabusmode::SWR
667        });
668
669        assert_eq!(result, Err(DmaError::ResetTimeout));
670        assert_eq!(polls, DMA_RESET_TIMEOUT_POLLS);
671    }
672
673    #[test]
674    fn dma_reset_programs_software_reset_and_observes_clear_on_host() {
675        regs::reset_test_registers();
676
677        assert_eq!(Dma::dma_reset(), Ok(()));
678
679        assert_eq!(regs::read(regs::dma::BUS_MODE), 0);
680    }
681
682    #[test]
683    fn dma_bus_mode_enables_burst_and_alignment() {
684        let bus_mode = dma_bus_mode_value(DMA_BURST_LENGTH);
685
686        assert_ne!(bus_mode & regs::bits::dmabusmode::AAL, 0);
687        assert_ne!(bus_mode & regs::bits::dmabusmode::FB, 0);
688        assert_ne!(bus_mode & regs::bits::dmabusmode::USP, 0);
689        assert_eq!(
690            bus_mode & regs::bits::dmabusmode::PBL_MASK,
691            DMA_BURST_LENGTH << regs::bits::dmabusmode::PBL_SHIFT
692        );
693        assert_eq!(
694            bus_mode & regs::bits::dmabusmode::RPBL_MASK,
695            DMA_BURST_LENGTH << regs::bits::dmabusmode::RPBL_SHIFT
696        );
697        assert_eq!(bus_mode & regs::bits::dmabusmode::DSL_MASK, 0);
698    }
699
700    #[test]
701    fn dma_op_mode_uses_tx_store_forward_and_cut_through_rx() {
702        let op_mode = dma_op_mode_value();
703
704        assert_eq!(
705            op_mode,
706            regs::bits::dmaopmode::TSF | regs::bits::dmaopmode::FUF
707        );
708        assert_eq!(op_mode & regs::bits::dmaopmode::RSF, 0,
709            "RSF must stay 0 on P4 — RX FIFO too small for full frames");
710    }
711
712    #[test]
713    fn dma_interrupt_enable_value_covers_normal_and_abnormal_paths() {
714        let inten = dma_interrupt_enable_value();
715
716        assert_ne!(inten & regs::bits::dmainten::TIE, 0);
717        assert_ne!(inten & regs::bits::dmainten::RIE, 0);
718        assert_ne!(inten & regs::bits::dmainten::OVE, 0);
719        assert_ne!(inten & regs::bits::dmainten::UNE, 0);
720        assert_ne!(inten & regs::bits::dmainten::FBE, 0);
721        assert_ne!(inten & regs::bits::dmainten::AIE, 0);
722        assert_ne!(inten & regs::bits::dmainten::NIE, 0);
723    }
724
725    #[test]
726    fn dma_init_programs_bus_op_mode_and_interrupt_mask() {
727        regs::reset_test_registers();
728
729        Dma::dma_init();
730
731        assert_eq!(
732            regs::read(regs::dma::BUS_MODE),
733            dma_bus_mode_value(DMA_BURST_LENGTH)
734        );
735        assert_eq!(regs::read(regs::dma::OP_MODE), dma_op_mode_value());
736        assert_eq!(regs::read(regs::dma::INT_EN), dma_interrupt_enable_value());
737    }
738
739    #[test]
740    fn dma_descriptor_lists_are_programmed_from_slice_addresses() {
741        regs::reset_test_registers();
742        let tx_descriptors = zeroed_tx_descriptors();
743        let rx_descriptors = zeroed_rx_descriptors();
744
745        Dma::set_descriptor_lists(&tx_descriptors, &rx_descriptors);
746
747        assert_eq!(
748            regs::read(regs::dma::TX_DESC_LIST),
749            tx_descriptors.as_ptr() as usize as u32
750        );
751        assert_eq!(
752            regs::read(regs::dma::RX_DESC_LIST),
753            rx_descriptors.as_ptr() as usize as u32
754        );
755    }
756
757    #[test]
758    fn dma_poll_demands_and_interrupt_disable_update_registers() {
759        regs::reset_test_registers();
760        regs::write(regs::dma::INT_EN, u32::MAX);
761
762        Dma::demand_tx_poll();
763        Dma::demand_rx_poll();
764        Dma::disable_interrupts();
765
766        assert_eq!(regs::read(regs::dma::TX_POLL_DEMAND), 1);
767        assert_eq!(regs::read(regs::dma::RX_POLL_DEMAND), 1);
768        assert_eq!(regs::read(regs::dma::INT_EN), 0);
769    }
770
771    #[test]
772    fn dma_interrupt_status_extracts_recovery_flags() {
773        let status = DmaInterruptStatus::from_raw(
774            regs::bits::dmastatus::OVF
775                | regs::bits::dmastatus::RU
776                | regs::bits::dmastatus::UNF
777                | regs::bits::dmastatus::FBI
778                | regs::bits::dmastatus::AIS
779                | regs::bits::dmastatus::NIS
780                | (0b111 << regs::bits::dmastatus::EB_SHIFT),
781        );
782
783        assert!(status.has_rx_overflow());
784        assert!(status.has_rx_buffer_unavailable());
785        assert!(status.has_tx_underflow());
786        assert!(status.has_fatal_bus_error());
787        assert!(status.should_kick_rx());
788        assert!(status.should_kick_tx());
789        assert!(status.is_abnormal_summary());
790        assert!(status.is_normal_summary());
791        assert_eq!(status.error_bits(), 0b111);
792    }
793
794    #[test]
795    fn dma_interrupt_status_clear_mask_does_not_touch_state_fields() {
796        let status = DmaInterruptStatus::from_raw(
797            regs::bits::dmastatus::TI
798                | regs::bits::dmastatus::OVF
799                | regs::bits::dmastatus::AIS
800                | regs::bits::dmastatus::NIS
801                | (0b101 << regs::bits::dmastatus::RS_SHIFT)
802                | (0b110 << regs::bits::dmastatus::TS_SHIFT)
803                | (0b011 << regs::bits::dmastatus::EB_SHIFT),
804        );
805
806        assert_eq!(
807            status.clear_mask(),
808            regs::bits::dmastatus::TI
809                | regs::bits::dmastatus::OVF
810                | regs::bits::dmastatus::AIS
811                | regs::bits::dmastatus::NIS
812        );
813    }
814
815    #[test]
816    fn dma_clear_interrupt_status_writes_only_clearable_bits() {
817        regs::reset_test_registers();
818        let raw = regs::bits::dmastatus::RI
819            | regs::bits::dmastatus::AIS
820            | (0b101 << regs::bits::dmastatus::RS_SHIFT);
821        let status = DmaInterruptStatus::from_raw(raw);
822
823        Dma::clear_interrupt_status(status);
824
825        assert_eq!(
826            regs::read(regs::dma::STATUS),
827            regs::bits::dmastatus::RI | regs::bits::dmastatus::AIS
828        );
829    }
830
831    #[test]
832    fn dma_clear_interrupt_status_skips_zero_clear_mask() {
833        regs::reset_test_registers();
834        regs::write(regs::dma::STATUS, 0xDEAD_BEEF);
835        let status = DmaInterruptStatus::from_raw(0b101 << regs::bits::dmastatus::RS_SHIFT);
836
837        Dma::clear_interrupt_status(status);
838
839        assert_eq!(regs::read(regs::dma::STATUS), 0xDEAD_BEEF);
840    }
841
842    #[test]
843    fn dma_interrupt_status_clear_mask_covers_all_sticky_bits() {
844        let clearable = regs::bits::dmastatus::TI
845            | regs::bits::dmastatus::TPS
846            | regs::bits::dmastatus::TU
847            | regs::bits::dmastatus::TJT
848            | regs::bits::dmastatus::OVF
849            | regs::bits::dmastatus::UNF
850            | regs::bits::dmastatus::RI
851            | regs::bits::dmastatus::RU
852            | regs::bits::dmastatus::RPS
853            | regs::bits::dmastatus::RWT
854            | regs::bits::dmastatus::ETI
855            | regs::bits::dmastatus::FBI
856            | regs::bits::dmastatus::ERI
857            | regs::bits::dmastatus::AIS
858            | regs::bits::dmastatus::NIS;
859        let state_fields = (0b101 << regs::bits::dmastatus::RS_SHIFT)
860            | (0b110 << regs::bits::dmastatus::TS_SHIFT)
861            | (0b011 << regs::bits::dmastatus::EB_SHIFT);
862
863        let status = DmaInterruptStatus::from_raw(clearable | state_fields);
864
865        assert_eq!(status.clear_mask(), clearable);
866    }
867
868    #[test]
869    fn record_cache_call_invalidate_increments_only_invalidate_counters() {
870        let _g = CACHE_COUNTER_LOCK.lock().unwrap();
871        reset_cache_counters();
872
873        record_cache_call(CacheOp::Invalidate, 100);
874
875        assert_eq!(CACHE_INV_TICKS.load(Ordering::Relaxed), 100);
876        assert_eq!(CACHE_INV_CALLS.load(Ordering::Relaxed), 1);
877        assert_eq!(CACHE_WB_TICKS.load(Ordering::Relaxed), 0);
878        assert_eq!(CACHE_WB_CALLS.load(Ordering::Relaxed), 0);
879    }
880
881    #[test]
882    fn record_cache_call_writeback_increments_only_writeback_counters() {
883        let _g = CACHE_COUNTER_LOCK.lock().unwrap();
884        reset_cache_counters();
885
886        record_cache_call(CacheOp::Writeback, 200);
887
888        assert_eq!(CACHE_WB_TICKS.load(Ordering::Relaxed), 200);
889        assert_eq!(CACHE_WB_CALLS.load(Ordering::Relaxed), 1);
890        assert_eq!(CACHE_INV_TICKS.load(Ordering::Relaxed), 0);
891        assert_eq!(CACHE_INV_CALLS.load(Ordering::Relaxed), 0);
892    }
893
894    #[test]
895    fn record_cache_call_accumulates_across_multiple_calls() {
896        let _g = CACHE_COUNTER_LOCK.lock().unwrap();
897        reset_cache_counters();
898
899        record_cache_call(CacheOp::Invalidate, 50);
900        record_cache_call(CacheOp::Invalidate, 75);
901        record_cache_call(CacheOp::Writeback, 1000);
902        record_cache_call(CacheOp::Invalidate, 25);
903        record_cache_call(CacheOp::Writeback, 500);
904
905        assert_eq!(CACHE_INV_TICKS.load(Ordering::Relaxed), 150);
906        assert_eq!(CACHE_INV_CALLS.load(Ordering::Relaxed), 3);
907        assert_eq!(CACHE_WB_TICKS.load(Ordering::Relaxed), 1500);
908        assert_eq!(CACHE_WB_CALLS.load(Ordering::Relaxed), 2);
909    }
910
911    /// `wrapping_sub` against the prior SYSTIMER snapshot must give a correct
912    /// delta even when the counter rolls past 2^32 inside the ROM call.
913    /// Simulates t0 just below wrap and t1 just above.
914    #[test]
915    fn record_cache_call_handles_systimer_wraparound() {
916        let _g = CACHE_COUNTER_LOCK.lock().unwrap();
917        reset_cache_counters();
918
919        // Real call site does `t1.wrapping_sub(t0)` and passes the delta in.
920        let t0: u32 = u32::MAX - 10;
921        let t1: u32 = 50; // ROM call took ~60 ticks across the wrap.
922        let dt = t1.wrapping_sub(t0);
923        assert_eq!(dt, 61, "wrapping_sub across u32 boundary must give 61");
924
925        record_cache_call(CacheOp::Invalidate, dt);
926        assert_eq!(CACHE_INV_TICKS.load(Ordering::Relaxed), 61);
927    }
928
929    /// Counters survive their own internal u32 wrap. Catches a defensive
930    /// regression where someone might add a saturating_add or panic-on-overflow
931    /// branch and break the wrapping convention.
932    #[test]
933    fn record_cache_call_counters_wrap_cleanly_on_overflow() {
934        let _g = CACHE_COUNTER_LOCK.lock().unwrap();
935        reset_cache_counters();
936
937        CACHE_INV_TICKS.store(u32::MAX - 100, Ordering::Relaxed);
938        record_cache_call(CacheOp::Invalidate, 200);
939
940        // Wrapping_add on AtomicU32::fetch_add: result is (u32::MAX - 100) + 200 mod 2^32 = 99.
941        assert_eq!(CACHE_INV_TICKS.load(Ordering::Relaxed), 99);
942    }
943}