openentropy-core 0.12.3

//! Kernel scheduler preemption boundary detection via CNTVCT_EL0.
//!
//! The ARM64 virtual system counter (`CNTVCT_EL0`) is a 64-bit hardware
//! register that increments at a fixed 24 MHz rate. Reading it with
//! consecutive `MRS` instructions in a tight loop normally advances by
//! 0 ticks (both reads complete within the same 41.67 ns tick period).
//!
//! ## The Preemption Signal
//!
//! Occasionally, the kernel's **scheduler interrupt** fires between two
//! consecutive `MRS` reads. When this happens, the timer jumps forward
//! by a large, irregular amount — the exact time the kernel spent
//! dispatching another thread before returning control to ours.
//!
//! Measured on M4 Mac mini (10,000 consecutive reads):
//! - 84.3% of pairs: Δ = 0 (same tick, below 24MHz resolution)
//! - 15.7% of pairs: Δ > 0 (timer advanced, interrupt boundary)
//! - Maximum observed Δ: **4,625 ticks (193 µs)**
//!
//! ## Why This Is Entropy
//!
//! Each timer jump encodes:
//!
//! 1. **Which interrupt fired**: Different interrupt sources have different
//!    handler execution times. The NVMe interrupt handler is faster than
//!    the USB stack. The timer quantum interrupt is faster than an Ethernet
//!    receive burst. The jump size reveals the interrupt type.
//!
//! 2. **Runqueue depth at context switch**: If a higher-priority thread
//!    was waiting, the kernel dispatches it and the preemption window is
//!    shorter. A long preemption means the kernel did significant bookkeeping.
//!
//! 3. **Kernel memory allocator state**: Some interrupt handlers allocate
//!    memory (mbuf, sk_buff equivalent). Lock contention on the allocator
//!    increases preemption time.
//!
//! 4. **Network/disk activity from other processes**: Network packet receive
//!    and NVMe completion callbacks fire as IRQs. Their timing reflects
//!    exactly when remote packets arrive — which depends on network latency
//!    to external hosts.
//!
//! ## "CIA Backdoor" Analog
//!
//! This source reads **kernel scheduler state** and **hardware interrupt
//! timing** from EL0 (userspace) using only a single ARM read instruction.
//! No system call. No privileged code. No permissions required.
//!
//! The jump sizes are genuine physical entropy: they encode thermal noise
//! in network PHY clocks, mechanical disk seek time, USB clock recovery
//! jitter, and the nondeterministic dispatch of concurrent OS threads.
//!
//! ## CNTVCT vs mach_absolute_time
//!
//! `mach_absolute_time()` wraps `CNTVCT_EL0` but adds ~10ns of overhead
//! from the C function call. For tight-loop timing, direct `MRS` gives
//! cleaner preemption detection: consecutive reads with overhead <1 tick.

use crate::source::{EntropySource, Platform, SourceCategory, SourceInfo};

#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
use crate::sources::helpers::xor_fold_u64;

static PREEMPTION_BOUNDARY_INFO: SourceInfo = SourceInfo {
    name: "preemption_boundary",
    description: "Kernel scheduler preemption timing via consecutive CNTVCT_EL0 reads",
    physics: "Reads the ARM64 virtual counter in a tight loop. Consecutive reads normally \
              return the same tick (84% of pairs at 24MHz). When the kernel's scheduler \
              interrupt fires between two reads, the counter jumps forward by an irregular \
              amount (measured max: 4,625 ticks = 193\u{00b5}s). Jump magnitude encodes: which \
              IRQ fired (different handlers take different time), runqueue depth at context \
              switch, kernel memory allocator lock contention, and network/disk interrupt \
              latency from remote hosts. Reads kernel scheduler state from EL0 with \
              zero syscall overhead via a single MRS instruction.",
    category: SourceCategory::Scheduling,
    platform: Platform::MacOS,
    requirements: &[],
    entropy_rate_estimate: 2.0,
    composite: false,
    is_fast: false,
};

/// Entropy source from kernel scheduler preemption boundary timing.
pub struct PreemptionBoundarySource;

#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
impl EntropySource for PreemptionBoundarySource {
    fn info(&self) -> &SourceInfo {
        &PREEMPTION_BOUNDARY_INFO
    }

    fn is_available(&self) -> bool {
        true
    }

    fn collect(&self, n_samples: usize) -> Vec<u8> {
        // Strategy:
        // 1. Read CNTVCT in a very tight loop (~16K reads).
        // 2. Collect all non-zero deltas (preemption events).
        // 3. Use the jump sizes as entropy input.
        //
        // The jump rate is ~15.7% at 24MHz, so 16K reads gives ~2,500 events.
        // Each event contributes ~8-12 bits of entropy (range 1–4625 ticks).

        let loop_count = (n_samples * 8).max(16_384);
        let mut preemption_times: Vec<u64> = Vec::with_capacity(loop_count / 6);

        let mut prev: u64;
        unsafe {
            core::arch::asm!(
                "mrs {v}, cntvct_el0",
                v = out(reg) prev,
                options(nostack, nomem),
            );
        }

        for _ in 0..loop_count {
            let cur: u64;
            unsafe {
                core::arch::asm!(
                    "mrs {v}, cntvct_el0",
                    v = out(reg) cur,
                    options(nostack, nomem),
                );
            }

            let delta = cur.wrapping_sub(prev);

            // Non-zero delta = timer advanced = interrupt/preemption boundary.
            // Cap at 10M ticks (~416ms) to reject suspend/resume events.
            if delta > 0 && delta < 10_000_000 {
                preemption_times.push(delta);
            }

            prev = cur;
        }

        if preemption_times.is_empty() {
            // No preemption events observed — return empty to signal collection
            // failure rather than emitting predictable CNTVCT counter bytes.
            return Vec::new();
        }

        // Preemption jumps are sparse events (not a continuous timing stream),
        // so extract_timing_entropy's delta pipeline is wrong here.
        // Instead, XOR-fold each jump magnitude directly and XOR consecutive
        // pairs for mixing.
        let mut out = Vec::with_capacity(n_samples);
        for pair in preemption_times.windows(2) {
            out.push(xor_fold_u64(pair[0] ^ pair[1]));
            if out.len() >= n_samples {
                break;
            }
        }
        // If we still need more, fold individual values.
        if out.len() < n_samples {
            for &t in &preemption_times {
                out.push(xor_fold_u64(t));
                if out.len() >= n_samples {
                    break;
                }
            }
        }
        out.truncate(n_samples);
        out
    }
}

#[cfg(not(all(target_os = "macos", target_arch = "aarch64")))]
impl EntropySource for PreemptionBoundarySource {
    fn info(&self) -> &SourceInfo {
        &PREEMPTION_BOUNDARY_INFO
    }
    fn is_available(&self) -> bool {
        false
    }
    fn collect(&self, _n_samples: usize) -> Vec<u8> {
        Vec::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn info() {
        let src = PreemptionBoundarySource;
        assert_eq!(src.info().name, "preemption_boundary");
        assert!(matches!(src.info().category, SourceCategory::Scheduling));
        assert_eq!(src.info().platform, Platform::MacOS);
        assert!(!src.info().composite);
    }

    #[test]
    #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
    fn is_available_on_apple_silicon() {
        assert!(PreemptionBoundarySource.is_available());
    }

    #[test]
    #[ignore]
    fn collects_preemption_events() {
        let data = PreemptionBoundarySource.collect(32);
        assert!(!data.is_empty());
    }
}