Skip to main content

squib_snapshot/
dirty.rs

1//! Dirty page tracking — `Box<[AtomicU64]>` shadow bitmap, adaptive heuristic.
2//!
3//! Per [16-snapshots.md § 4](../../../specs/16-snapshots.md#4-dirty-page-tracking):
4//! after a clean checkpoint, the VMM strips `HV_MEMORY_WRITE` from the entire tracked
5//! range; guest writes generate ESR EC=0x24, WnR=1 exits; the vCPU exit handler sets
6//! the corresponding bit in this bitmap and re-grants WRITE on the page (or 2 MiB
7//! block).
8//!
9//! Three sizes interact (D21, codified in [`squib_arch::PageGeometry`]):
10//! - `HOST_PAGE_SIZE` = 16 KiB on Apple Silicon.
11//! - `HVF_STAGE2_GRANULE` = 16 KiB (matches the host page).
12//! - `TRACKING_PAGE_SIZE` = 2 MiB default; adaptive step-down to 16 KiB for hot regions per the
13//!   heuristic in 16 § 4.2.
14//!
15//! The shadow bitmap is per-RAM-region, sized for the **default** (2 MiB) tracking
16//! granularity. When the heuristic steps down, we promote the same region to a
17//! finer-grained bitmap; the coarse bitmap's set bits propagate as "all pages in
18//! that 2 MiB block are dirty" so the diff snapshot doesn't drop coverage during the
19//! transition.
20
21use std::{
22    sync::atomic::{AtomicU32, AtomicU64, Ordering},
23    time::{Duration, Instant},
24};
25
26use squib_arch::PageGeometry;
27use tracing::{debug, info};
28
29use crate::error::SnapshotError;
30
31/// Default fault-rate threshold for adaptive step-down: 32 faults per 100 ms per
32/// 2 MiB tracking block. Matches 16 § 4.2.
33pub const DEFAULT_STEP_DOWN_THRESHOLD: u32 = 32;
34
35/// Sliding-window length for the step-down heuristic.
36pub const ADAPTIVE_WINDOW: Duration = Duration::from_millis(100);
37
38/// Tracking granularity for one RAM region.
39#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40pub enum TrackingGranule {
41    /// 2 MiB tracking pages — default cheap mode.
42    Coarse,
43    /// 16 KiB (host-page) tracking pages — adaptive hot mode.
44    Fine,
45}
46
47impl TrackingGranule {
48    /// Page size in bytes for this granule.
49    #[must_use]
50    pub const fn page_size(self, geom: PageGeometry) -> u64 {
51        match self {
52            Self::Coarse => geom.tracking_page_default,
53            Self::Fine => geom.tracking_page_hot,
54        }
55    }
56}
57
58/// `Box<[AtomicU64]>` shadow bitmap of dirty tracking pages.
59///
60/// The map covers `[ram_start, ram_start + ram_size)`. `set_dirty` is called from
61/// the vCPU exit handler; `drain` is called by the snapshot writer. Both are
62/// lock-free.
63#[derive(Debug)]
64pub struct DirtyBitmap {
65    /// Inclusive base of the tracked range.
66    ram_start: u64,
67    /// Total size of the tracked range in bytes.
68    ram_size: u64,
69    /// Page size in bytes (16 KiB or 2 MiB).
70    page_size: u64,
71    /// Number of bits in the bitmap (= number of pages).
72    page_count: u64,
73    /// Storage: each `AtomicU64` carries 64 bits.
74    words: Box<[AtomicU64]>,
75}
76
77impl DirtyBitmap {
78    /// Build a bitmap covering `[ram_start, ram_start + ram_size)` at `page_size`.
79    ///
80    /// # Errors
81    /// [`SnapshotError::InvalidPath`] (re-used for "rejected configuration") if
82    /// `page_size` is not a power of two, the range is empty, or the range overflows.
83    pub fn new(ram_start: u64, ram_size: u64, page_size: u64) -> Result<Self, SnapshotError> {
84        if !page_size.is_power_of_two() {
85            return Err(SnapshotError::InvalidPath(format!(
86                "page_size must be a power of two: {page_size}"
87            )));
88        }
89        if ram_size == 0 {
90            return Err(SnapshotError::InvalidPath(
91                "dirty bitmap requires a non-empty RAM range".into(),
92            ));
93        }
94        if ram_start.checked_add(ram_size).is_none() {
95            return Err(SnapshotError::InvalidPath(format!(
96                "RAM range overflows: start={ram_start:#x}, size={ram_size:#x}"
97            )));
98        }
99        // Round up the page count, so a partial trailing page is still tracked.
100        let page_count = ram_size.div_ceil(page_size);
101        let word_count = usize::try_from(page_count.div_ceil(64)).map_err(|_| {
102            SnapshotError::InvalidPath(format!(
103                "bitmap word count exceeds usize::MAX (page_count={page_count})"
104            ))
105        })?;
106        let words: Vec<AtomicU64> = (0..word_count).map(|_| AtomicU64::new(0)).collect();
107        Ok(Self {
108            ram_start,
109            ram_size,
110            page_size,
111            page_count,
112            words: words.into_boxed_slice(),
113        })
114    }
115
116    /// Total tracked range start.
117    #[must_use]
118    pub const fn ram_start(&self) -> u64 {
119        self.ram_start
120    }
121
122    /// Total tracked range size in bytes.
123    #[must_use]
124    pub const fn ram_size(&self) -> u64 {
125        self.ram_size
126    }
127
128    /// Page size (in bytes).
129    #[must_use]
130    pub const fn page_size(&self) -> u64 {
131        self.page_size
132    }
133
134    /// Number of tracked pages.
135    #[must_use]
136    pub const fn page_count(&self) -> u64 {
137        self.page_count
138    }
139
140    /// `log2(page_size)` — used by callers that compute the FAR-to-bit-index shift.
141    #[must_use]
142    pub fn page_shift(&self) -> u32 {
143        self.page_size.trailing_zeros()
144    }
145
146    /// Compute the bit index of the page containing `addr`.
147    ///
148    /// Returns `None` if `addr` is outside the tracked range.
149    #[must_use]
150    pub fn page_index_of(&self, addr: u64) -> Option<u64> {
151        if addr < self.ram_start {
152            return None;
153        }
154        let offset = addr - self.ram_start;
155        if offset >= self.ram_size {
156            return None;
157        }
158        Some(offset >> self.page_shift())
159    }
160
161    /// Set the dirty bit for the page containing `addr`. Idempotent.
162    ///
163    /// Returns `true` if the bit transitioned from clean to dirty.
164    pub fn set_dirty(&self, addr: u64) -> bool {
165        let Some(page) = self.page_index_of(addr) else {
166            return false;
167        };
168        self.set_dirty_by_index(page)
169    }
170
171    /// Set a specific page's dirty bit. Same return semantics as [`Self::set_dirty`].
172    pub fn set_dirty_by_index(&self, page: u64) -> bool {
173        if page >= self.page_count {
174            return false;
175        }
176        let word_idx = (page / 64) as usize;
177        let bit = page % 64;
178        let mask = 1u64 << bit;
179        let prev = self.words[word_idx].fetch_or(mask, Ordering::Relaxed);
180        prev & mask == 0
181    }
182
183    /// Read whether `addr` is dirty (without clearing).
184    #[must_use]
185    pub fn is_dirty(&self, addr: u64) -> bool {
186        let Some(page) = self.page_index_of(addr) else {
187            return false;
188        };
189        self.is_dirty_by_index(page)
190    }
191
192    /// Same as [`Self::is_dirty`] but accepts a pre-computed page index.
193    #[must_use]
194    pub fn is_dirty_by_index(&self, page: u64) -> bool {
195        if page >= self.page_count {
196            return false;
197        }
198        let word_idx = (page / 64) as usize;
199        let bit = page % 64;
200        let word = self.words[word_idx].load(Ordering::Acquire);
201        word & (1u64 << bit) != 0
202    }
203
204    /// Atomically swap each word to zero, returning a list of dirty page indices.
205    ///
206    /// The acquire ordering on the swap pairs with the release on `set_dirty`'s
207    /// `fetch_or` (Relaxed is sufficient there because the only consumer is this
208    /// drain on the snapshot path, and the drain seq-cst-like ordering is owed by
209    /// the surrounding pause/resume vCPU dance, not by the bitmap itself; we use
210    /// Acquire here to synchronize-with any future code that relaxes the surrounding
211    /// vCPU coordination).
212    pub fn drain(&self) -> Vec<u64> {
213        let mut out = Vec::new();
214        for (word_idx, word) in self.words.iter().enumerate() {
215            let mut bits = word.swap(0, Ordering::Acquire);
216            while bits != 0 {
217                let bit = u64::from(bits.trailing_zeros());
218                bits &= bits - 1;
219                let page = (word_idx as u64) * 64 + bit;
220                if page < self.page_count {
221                    out.push(page);
222                }
223            }
224        }
225        out
226    }
227
228    /// Drain into a callback, avoiding the intermediate Vec allocation. Used by the
229    /// memory-file writer's hot path.
230    pub fn drain_into<F: FnMut(u64)>(&self, mut callback: F) {
231        for (word_idx, word) in self.words.iter().enumerate() {
232            let mut bits = word.swap(0, Ordering::Acquire);
233            while bits != 0 {
234                let bit = u64::from(bits.trailing_zeros());
235                bits &= bits - 1;
236                let page = (word_idx as u64) * 64 + bit;
237                if page < self.page_count {
238                    callback(page);
239                }
240            }
241        }
242    }
243
244    /// Fold all coarse-block dirty bits into a finer-grained bitmap.
245    ///
246    /// When the heuristic steps down a region from 2 MiB to 16 KiB, the coarse
247    /// bitmap's set bits each correspond to a 2 MiB block: every fine page in that
248    /// block becomes dirty. This preserves snapshot coverage across the transition.
249    ///
250    /// `fine` must cover the same `[ram_start, ram_start + ram_size)` and have a
251    /// strictly smaller `page_size`.
252    ///
253    /// # Errors
254    /// [`SnapshotError::InvalidPath`] if the geometry doesn't match.
255    pub fn fold_into_finer(&self, fine: &DirtyBitmap) -> Result<(), SnapshotError> {
256        if fine.ram_start != self.ram_start || fine.ram_size != self.ram_size {
257            return Err(SnapshotError::InvalidPath(
258                "fold_into_finer requires identical ranges".into(),
259            ));
260        }
261        if fine.page_size >= self.page_size {
262            return Err(SnapshotError::InvalidPath(
263                "fold_into_finer expects a strictly smaller page size".into(),
264            ));
265        }
266        if !self.page_size.is_multiple_of(fine.page_size) {
267            return Err(SnapshotError::InvalidPath(
268                "coarse page must be a multiple of fine page".into(),
269            ));
270        }
271        let ratio = self.page_size / fine.page_size;
272        for (word_idx, word) in self.words.iter().enumerate() {
273            let bits = word.load(Ordering::Acquire);
274            if bits == 0 {
275                continue;
276            }
277            let mut bits = bits;
278            while bits != 0 {
279                let bit = u64::from(bits.trailing_zeros());
280                bits &= bits - 1;
281                let coarse_page = (word_idx as u64) * 64 + bit;
282                if coarse_page >= self.page_count {
283                    break;
284                }
285                let fine_start = coarse_page * ratio;
286                let fine_end = (fine_start + ratio).min(fine.page_count);
287                for fine_page in fine_start..fine_end {
288                    fine.set_dirty_by_index(fine_page);
289                }
290            }
291        }
292        Ok(())
293    }
294}
295
296/// Per-region adaptive controller: tracks **per-2-MiB-block** fault rate and
297/// signals when a region should step down from 2 MiB to 16 KiB tracking.
298///
299/// Per [16-snapshots.md §
300/// 4.2](../../../specs/16-snapshots.md#42-bitmap-sizing-and-adaptive-heuristic) the threshold is
301/// "32 faults / 100 ms / 2 MiB block" — per-block, not aggregate across the region. A
302/// uniform-low-rate workload that touches many blocks must not trigger step-down; only a workload
303/// concentrated in *some* block must.
304#[derive(Debug)]
305pub struct AdaptiveController {
306    threshold: u32,
307    window: Duration,
308    /// One counter per coarse (2 MiB) block in the region. The hot-block detector
309    /// walks the slice on each tick.
310    block_faults: Box<[AtomicU32]>,
311    /// Sequence-of-window guard. The window itself is reset by the supervising
312    /// thread that polls [`Self::tick`].
313    last_reset: parking_lot::Mutex<Instant>,
314    granule: parking_lot::RwLock<TrackingGranule>,
315}
316
317impl AdaptiveController {
318    /// Build a controller with the spec-default threshold (32 faults / 100 ms /
319    /// 2 MiB block) for `block_count` 2-MiB blocks.
320    #[must_use]
321    pub fn new(block_count: u64) -> Self {
322        Self::with_threshold(block_count, DEFAULT_STEP_DOWN_THRESHOLD)
323    }
324
325    /// Build with a custom threshold (used by `[squib].snapshot.dirty_step_down_threshold`
326    /// in user config — § 4.2).
327    #[must_use]
328    pub fn with_threshold(block_count: u64, threshold: u32) -> Self {
329        let len = usize::try_from(block_count).unwrap_or(usize::MAX);
330        let blocks: Vec<AtomicU32> = (0..len).map(|_| AtomicU32::new(0)).collect();
331        Self {
332            threshold,
333            window: ADAPTIVE_WINDOW,
334            block_faults: blocks.into_boxed_slice(),
335            last_reset: parking_lot::Mutex::new(Instant::now()),
336            granule: parking_lot::RwLock::new(TrackingGranule::Coarse),
337        }
338    }
339
340    /// Active granule.
341    pub fn granule(&self) -> TrackingGranule {
342        *self.granule.read()
343    }
344
345    /// Increment the fault counter for the given coarse block. Called by the
346    /// dirty-tracking exit handler on every page fault re-grant.
347    ///
348    /// Out-of-range `block_idx` is silently ignored (defence-in-depth — the bitmap
349    /// rejects out-of-range pages too).
350    pub fn record_fault(&self, block_idx: u64) {
351        let Ok(idx) = usize::try_from(block_idx) else {
352            return;
353        };
354        let Some(slot) = self.block_faults.get(idx) else {
355            return;
356        };
357        slot.fetch_add(1, Ordering::Relaxed);
358    }
359
360    /// Check and (if we've crossed a window boundary) decide whether the granule
361    /// should step down. Returns the (possibly-changed) active granule.
362    ///
363    /// Step-down fires when **any** block's fault counter exceeded the threshold
364    /// in the last window. Step-down is one-way for the lifetime of one tracking
365    /// session; ramping back up to 2 MiB requires a clean checkpoint.
366    pub fn tick(&self) -> TrackingGranule {
367        let mut last = self.last_reset.lock();
368        let now = Instant::now();
369        if now.duration_since(*last) < self.window {
370            return self.granule();
371        }
372        // Atomically swap each block's counter to 0 and find the max in this
373        // window. The swap pairs with `fetch_add`'s Relaxed ordering — Acquire
374        // here is overkill but cheap and futureproof.
375        let mut max_in_window: u32 = 0;
376        for slot in &self.block_faults {
377            let count = slot.swap(0, Ordering::Acquire);
378            if count > max_in_window {
379                max_in_window = count;
380            }
381        }
382        *last = now;
383        drop(last);
384
385        // Promote to fine if any block was hot, never demote within a session.
386        let mut current = self.granule.write();
387        if max_in_window > self.threshold && *current == TrackingGranule::Coarse {
388            info!(
389                hottest_block_faults = max_in_window,
390                threshold = self.threshold,
391                "dirty tracker stepping down to fine granule (16 KiB)"
392            );
393            *current = TrackingGranule::Fine;
394        } else {
395            debug!(
396                hottest_block_faults = max_in_window,
397                granule = ?*current,
398                "dirty tracker tick"
399            );
400        }
401        *current
402    }
403}
404
405impl Default for AdaptiveController {
406    /// One-block default — useful only as a placeholder for tests that exercise
407    /// the granule API without driving real fault traffic. Production code calls
408    /// [`Self::new`] with the per-region block count.
409    fn default() -> Self {
410        Self::new(1)
411    }
412}
413
414/// One RAM region under dirty-tracking. Combines a bitmap and an adaptive
415/// controller; the bitmap is rebuilt at finer granularity if the controller signals
416/// step-down.
417#[derive(Debug)]
418pub struct TrackedRegion {
419    /// Active bitmap.
420    bitmap: parking_lot::RwLock<std::sync::Arc<DirtyBitmap>>,
421    /// Adaptive controller.
422    controller: AdaptiveController,
423    /// Page geometry (carries 16 KiB / 2 MiB constants).
424    geometry: PageGeometry,
425}
426
427impl TrackedRegion {
428    /// Build a region under coarse (2 MiB) tracking by default.
429    ///
430    /// # Errors
431    /// As [`DirtyBitmap::new`].
432    pub fn new(
433        ram_start: u64,
434        ram_size: u64,
435        geometry: PageGeometry,
436    ) -> Result<Self, SnapshotError> {
437        let bitmap = DirtyBitmap::new(ram_start, ram_size, geometry.tracking_page_default)?;
438        let block_count = bitmap.page_count();
439        Ok(Self {
440            bitmap: parking_lot::RwLock::new(std::sync::Arc::new(bitmap)),
441            controller: AdaptiveController::new(block_count),
442            geometry,
443        })
444    }
445
446    /// Construct with a custom step-down threshold.
447    ///
448    /// # Errors
449    /// As [`DirtyBitmap::new`].
450    pub fn with_threshold(
451        ram_start: u64,
452        ram_size: u64,
453        geometry: PageGeometry,
454        threshold: u32,
455    ) -> Result<Self, SnapshotError> {
456        let bitmap = DirtyBitmap::new(ram_start, ram_size, geometry.tracking_page_default)?;
457        let block_count = bitmap.page_count();
458        Ok(Self {
459            bitmap: parking_lot::RwLock::new(std::sync::Arc::new(bitmap)),
460            controller: AdaptiveController::with_threshold(block_count, threshold),
461            geometry,
462        })
463    }
464
465    /// Borrow the active bitmap (cheap `Arc` clone).
466    pub fn bitmap(&self) -> std::sync::Arc<DirtyBitmap> {
467        self.bitmap.read().clone()
468    }
469
470    /// Active granule.
471    pub fn granule(&self) -> TrackingGranule {
472        self.controller.granule()
473    }
474
475    /// Mark `addr` dirty (called from the vCPU exit handler) and record a fault
476    /// for the adaptive controller.
477    ///
478    /// The fault is attributed to the **coarse 2-MiB block** containing `addr`,
479    /// even when the active bitmap has stepped down to fine granularity — that
480    /// way the per-block counter has the same semantics as § 4.2 specifies it
481    /// across the granule transition.
482    ///
483    /// Returns `true` if the bit transitioned from clean to dirty.
484    pub fn mark_dirty(&self, addr: u64) -> bool {
485        let coarse_block_idx = addr
486            .checked_sub(self.bitmap.read().ram_start())
487            .map(|off| off / self.geometry.tracking_page_default);
488        if let Some(block_idx) = coarse_block_idx {
489            self.controller.record_fault(block_idx);
490        }
491        let bitmap = self.bitmap.read().clone();
492        bitmap.set_dirty(addr)
493    }
494
495    /// Tick the adaptive controller; promote to a finer-grained bitmap if needed.
496    ///
497    /// Called periodically (e.g. once per WFI loop or once per second by a
498    /// dedicated tick thread). Idempotent.
499    pub fn tick_adaptive(&self) -> TrackingGranule {
500        let new_granule = self.controller.tick();
501        let mut current = self.bitmap.write();
502        let active_size = current.page_size();
503        let target_size = new_granule.page_size(self.geometry);
504        if target_size != active_size {
505            let Ok(fine) = DirtyBitmap::new(current.ram_start(), current.ram_size(), target_size)
506            else {
507                return self.controller.granule();
508            };
509            // Best-effort fold; if it errors (it shouldn't for valid geometry) we
510            // keep the existing bitmap.
511            if current.fold_into_finer(&fine).is_ok() {
512                *current = std::sync::Arc::new(fine);
513            }
514        }
515        new_granule
516    }
517}
518
519#[cfg(test)]
520mod tests {
521    use super::*;
522
523    const REGION_BASE: u64 = 0x8000_0000;
524    const REGION_SIZE: u64 = 0x1000_0000; // 256 MiB
525    const PAGE_2M: u64 = 2 * 1024 * 1024;
526    const PAGE_16K: u64 = 16 * 1024;
527
528    #[test]
529    fn test_should_count_pages_with_2mib_default() {
530        let bm = DirtyBitmap::new(REGION_BASE, REGION_SIZE, PAGE_2M).unwrap();
531        assert_eq!(bm.page_count(), 128); // 256 MiB / 2 MiB
532    }
533
534    #[test]
535    fn test_should_track_a_dirty_page_at_an_aligned_address() {
536        let bm = DirtyBitmap::new(REGION_BASE, REGION_SIZE, PAGE_2M).unwrap();
537        let target = REGION_BASE + 5 * PAGE_2M;
538        assert!(!bm.is_dirty(target));
539        let transitioned = bm.set_dirty(target);
540        assert!(transitioned);
541        assert!(bm.is_dirty(target));
542
543        let again = bm.set_dirty(target);
544        assert!(!again, "set_dirty must report transition only once");
545    }
546
547    #[test]
548    fn test_should_round_an_unaligned_address_into_its_page() {
549        let bm = DirtyBitmap::new(REGION_BASE, REGION_SIZE, PAGE_2M).unwrap();
550        let aligned = REGION_BASE + 3 * PAGE_2M;
551        let unaligned = aligned + 0x1234;
552        bm.set_dirty(unaligned);
553        assert!(bm.is_dirty(aligned));
554    }
555
556    #[test]
557    fn test_should_reject_addresses_outside_the_tracked_range() {
558        let bm = DirtyBitmap::new(REGION_BASE, REGION_SIZE, PAGE_2M).unwrap();
559        assert!(!bm.set_dirty(REGION_BASE - 1));
560        assert!(!bm.set_dirty(REGION_BASE + REGION_SIZE));
561    }
562
563    #[test]
564    fn test_should_drain_dirty_pages_in_index_order() {
565        let bm = DirtyBitmap::new(REGION_BASE, REGION_SIZE, PAGE_2M).unwrap();
566        for &page in &[3u64, 100, 0, 64] {
567            bm.set_dirty_by_index(page);
568        }
569        let mut drained = bm.drain();
570        drained.sort_unstable();
571        assert_eq!(drained, vec![0, 3, 64, 100]);
572
573        // After drain the bitmap is clean.
574        assert!(!bm.is_dirty(REGION_BASE));
575        assert_eq!(bm.drain(), Vec::<u64>::new());
576    }
577
578    #[test]
579    fn test_should_drain_into_callback_without_allocation() {
580        let bm = DirtyBitmap::new(REGION_BASE, REGION_SIZE, PAGE_2M).unwrap();
581        bm.set_dirty_by_index(0);
582        bm.set_dirty_by_index(127);
583        let mut count = 0;
584        bm.drain_into(|_p| count += 1);
585        assert_eq!(count, 2);
586    }
587
588    #[test]
589    fn test_should_reject_non_power_of_two_page_size() {
590        let res = DirtyBitmap::new(REGION_BASE, REGION_SIZE, 3000);
591        assert!(matches!(res, Err(SnapshotError::InvalidPath(_))));
592    }
593
594    #[test]
595    fn test_should_reject_zero_size_range() {
596        let res = DirtyBitmap::new(REGION_BASE, 0, PAGE_2M);
597        assert!(matches!(res, Err(SnapshotError::InvalidPath(_))));
598    }
599
600    #[test]
601    fn test_should_fold_coarse_bits_into_fine_bitmap() {
602        let coarse = DirtyBitmap::new(REGION_BASE, REGION_SIZE, PAGE_2M).unwrap();
603        let fine = DirtyBitmap::new(REGION_BASE, REGION_SIZE, PAGE_16K).unwrap();
604        coarse.set_dirty_by_index(2); // 2 MiB block 2 → fine pages 256..384
605        coarse.fold_into_finer(&fine).unwrap();
606        assert!(fine.is_dirty(REGION_BASE + 2 * PAGE_2M));
607        assert!(fine.is_dirty(REGION_BASE + 2 * PAGE_2M + 100 * PAGE_16K));
608        assert!(!fine.is_dirty(REGION_BASE + PAGE_2M));
609    }
610
611    #[test]
612    fn test_should_reject_fold_with_mismatched_ranges() {
613        let coarse = DirtyBitmap::new(REGION_BASE, REGION_SIZE, PAGE_2M).unwrap();
614        let fine = DirtyBitmap::new(REGION_BASE + PAGE_16K, REGION_SIZE, PAGE_16K).unwrap();
615        let res = coarse.fold_into_finer(&fine);
616        assert!(matches!(res, Err(SnapshotError::InvalidPath(_))));
617    }
618
619    #[test]
620    fn test_should_reject_fold_when_target_is_not_finer() {
621        let a = DirtyBitmap::new(REGION_BASE, REGION_SIZE, PAGE_2M).unwrap();
622        let b = DirtyBitmap::new(REGION_BASE, REGION_SIZE, PAGE_2M).unwrap();
623        let res = a.fold_into_finer(&b);
624        assert!(matches!(res, Err(SnapshotError::InvalidPath(_))));
625    }
626
627    #[test]
628    fn test_should_promote_to_fine_when_one_block_exceeds_threshold() {
629        let region =
630            TrackedRegion::with_threshold(REGION_BASE, REGION_SIZE, PageGeometry::APPLE_SILICON, 2)
631                .unwrap();
632        assert_eq!(region.granule(), TrackingGranule::Coarse);
633        // 10 faults all in one 2 MiB block — exceeds threshold (2) for that
634        // block.
635        for _ in 0..10 {
636            region.mark_dirty(REGION_BASE);
637        }
638        // Fast-forward: pretend the window has elapsed.
639        {
640            let mut last = region.controller.last_reset.lock();
641            *last = Instant::now()
642                .checked_sub(ADAPTIVE_WINDOW * 2)
643                .expect("test fixture: monotonic clock starts above the window length");
644        }
645        let g = region.tick_adaptive();
646        assert_eq!(g, TrackingGranule::Fine);
647        let bm = region.bitmap();
648        assert_eq!(bm.page_size(), PAGE_16K);
649    }
650
651    #[test]
652    fn test_should_not_promote_when_faults_are_spread_uniformly_across_blocks() {
653        // 64 faults across 64 different 2 MiB blocks — each block sees one
654        // fault. Threshold is 4 (per block), so step-down must NOT fire even
655        // though aggregate fault count (64) is far above threshold. This is
656        // the per-block-vs-aggregate regression the Phase 5 review caught.
657        let region =
658            TrackedRegion::with_threshold(REGION_BASE, REGION_SIZE, PageGeometry::APPLE_SILICON, 4)
659                .unwrap();
660        assert_eq!(region.granule(), TrackingGranule::Coarse);
661        for block in 0..64u64 {
662            region.mark_dirty(REGION_BASE + block * PAGE_2M);
663        }
664        {
665            let mut last = region.controller.last_reset.lock();
666            *last = Instant::now()
667                .checked_sub(ADAPTIVE_WINDOW * 2)
668                .expect("test fixture: monotonic clock starts above the window length");
669        }
670        let g = region.tick_adaptive();
671        assert_eq!(
672            g,
673            TrackingGranule::Coarse,
674            "step-down must use per-block counts, not aggregate fault count"
675        );
676    }
677
678    #[test]
679    fn test_should_be_thread_safe_under_concurrent_set_dirty() {
680        // 8 threads × 1000 sets at scattered addresses.
681        let bm = std::sync::Arc::new(DirtyBitmap::new(REGION_BASE, REGION_SIZE, PAGE_16K).unwrap());
682        let mut handles = vec![];
683        for t in 0..8u64 {
684            let bm = bm.clone();
685            handles.push(std::thread::spawn(move || {
686                for i in 0..1000u64 {
687                    let page_idx = t * 1000 + i;
688                    let addr = REGION_BASE + page_idx * PAGE_16K;
689                    if addr < REGION_BASE + REGION_SIZE {
690                        bm.set_dirty(addr);
691                    }
692                }
693            }));
694        }
695        for h in handles {
696            h.join().unwrap();
697        }
698        let drained = bm.drain();
699        assert_eq!(drained.len(), 8000);
700    }
701}