Skip to main content

cjc_runtime/
runtime_policy.rs

1//! Runtime Policy Layer — deterministic, thermally-bounded execution policy.
2//!
3//! This module is the "green compute" control surface for CJC-Lang. It lets a
4//! run declare *how much machine* it is willing to use — thread caps, batch
5//! sizing, audit depth — and exposes a **deterministic** energy estimate so a
6//! program can reason about joules-per-result instead of merely wall-clock
7//! seconds.
8//!
9//! The guiding philosophy: do not let CJC-Lang blindly saturate the CPU. Use
10//! *deterministic bounded execution*. Thermal/energy limits are made explicit
11//! and deterministic rather than left to the OS scheduler.
12//!
13//! # Builtins (registered in [`crate::builtins`])
14//!
15//! Policy query / mutate:
16//! - `runtime_policy_thermal_mode() -> String`
17//! - `runtime_policy_set_thermal_mode(mode: String) -> String`
18//! - `runtime_policy_threads() -> Int`           (resolved effective cap)
19//! - `runtime_policy_set_threads(n: Int) -> Int`
20//! - `runtime_policy_batch_size() -> Int`
21//! - `runtime_policy_set_batch_size(n: Int) -> Int`
22//! - `runtime_policy_audit_mode() -> String`
23//! - `runtime_policy_set_audit_mode(mode: String) -> String`
24//! - `runtime_policy_numeric_mode() -> String`
25//! - `runtime_policy_set_numeric_mode(mode: String) -> String`
26//! - `runtime_policy_reset() -> Int`
27//! - `runtime_policy_summary() -> String`
28//!
29//! Energy model:
30//! - `energy_estimate(flops: Int, bytes: Int) -> Float`   (joules)
31//! - `energy_per_flop() -> Float`
32//! - `energy_per_byte() -> Float`
33//!
34//! # Determinism story
35//!
36//! Two invariants make this layer safe under Prime Directive #3
37//! (same seed = bit-identical output):
38//!
39//! 1. **Thread count never changes results.** The parallel kernels in
40//!    [`crate::tensor`] reduce with Kahan / [`crate::accumulator`] binned
41//!    summation over a *fixed chunk order*, so the numeric output is identical
42//!    regardless of how many rayon workers are live. The thermal mode and
43//!    thread cap therefore move *only* the performance/heat axis, never the
44//!    answer axis. Capping threads is pure "deterministic bounded execution".
45//!
46//! 2. **Energy is estimated from workload counts, never from wall time.**
47//!    [`energy_estimate_joules`] is a pure function of integer FLOP and byte
48//!    counts times fixed documented constants. Wall-clock time is explicitly
49//!    *not* an input, because it varies run-to-run and would poison
50//!    determinism. Same program + same seed → same FLOP count → same joule
51//!    estimate, bit-for-bit. The two multiplies are kept in separate `let`
52//!    bindings so the compiler cannot contract them into a single FMA (the
53//!    same no-FMA discipline the SIMD kernels follow).
54//!
55//! The policy itself lives in a thread-local `RefCell<RuntimePolicy>`, mirroring
56//! the [`crate::profile`] counter sink. The interpreter thread reads and writes
57//! it; the actual rayon thread cap is applied once per process by
58//! [`apply_thread_cap`] (the CLI calls this at startup) because rayon's global
59//! pool can only be configured once. No RNG is touched. No `HashMap` is used.
60
61use std::cell::RefCell;
62
63// ── Mode enums ───────────────────────────────────────────────────────────
64
65/// Determinism guarantee level. `Strict` is the only mode that ships today;
66/// `Relaxed` is reserved so the field exists in the policy surface without
67/// implying it weakens any current guarantee.
68#[derive(Clone, Copy, Debug, PartialEq, Eq)]
69pub enum Determinism {
70    /// Bit-identical output across runs and platforms (the default).
71    Strict,
72    /// Reserved — does not currently relax any guarantee.
73    Relaxed,
74}
75
76impl Determinism {
77    pub fn as_str(self) -> &'static str {
78        match self {
79            Determinism::Strict => "strict",
80            Determinism::Relaxed => "relaxed",
81        }
82    }
83
84    pub fn from_str(s: &str) -> Option<Self> {
85        match s {
86            "strict" => Some(Determinism::Strict),
87            "relaxed" => Some(Determinism::Relaxed),
88            _ => None,
89        }
90    }
91}
92
93/// Floating-point reduction strategy. Maps to the existing accumulator family;
94/// every variant preserves determinism (none enables FMA or random ordering).
95#[derive(Clone, Copy, Debug, PartialEq, Eq)]
96pub enum NumericMode {
97    /// Compensated (Kahan) summation — the default scalar strategy.
98    Kahan,
99    /// Order-invariant binned accumulator — best for parallel reductions.
100    Binned,
101    /// Fixed pairwise reduction tree — deterministic divide-and-conquer.
102    FixedTree,
103}
104
105impl NumericMode {
106    pub fn as_str(self) -> &'static str {
107        match self {
108            NumericMode::Kahan => "kahan",
109            NumericMode::Binned => "binned",
110            NumericMode::FixedTree => "fixed-tree",
111        }
112    }
113
114    pub fn from_str(s: &str) -> Option<Self> {
115        match s {
116            "kahan" => Some(NumericMode::Kahan),
117            "binned" => Some(NumericMode::Binned),
118            "fixed-tree" | "fixedtree" | "fixed_tree" => Some(NumericMode::FixedTree),
119            _ => None,
120        }
121    }
122}
123
124/// Audit/forensics depth. Controls how much *cold-path* work (logs, Merkle
125/// trees, full lineage) runs alongside the hot numerical path. Deeper modes
126/// trade speed and energy for traceability; they never change numeric output.
127#[derive(Clone, Copy, Debug, PartialEq, Eq)]
128pub enum AuditMode {
129    /// Cheapest — aggregate summaries only.
130    Summary,
131    /// Per-operation audit records.
132    Full,
133    /// Maximum traceability (full lineage + hashes). Most expensive.
134    Forensic,
135}
136
137impl AuditMode {
138    pub fn as_str(self) -> &'static str {
139        match self {
140            AuditMode::Summary => "summary",
141            AuditMode::Full => "full",
142            AuditMode::Forensic => "forensic",
143        }
144    }
145
146    pub fn from_str(s: &str) -> Option<Self> {
147        match s {
148            "summary" => Some(AuditMode::Summary),
149            "full" => Some(AuditMode::Full),
150            "forensic" => Some(AuditMode::Forensic),
151            _ => None,
152        }
153    }
154}
155
156/// Thermal/energy execution profile. This is the headline "green" knob: it
157/// bounds how aggressively a run uses the CPU so a laptop does not cook itself
158/// sustaining turbo across all cores.
159#[derive(Clone, Copy, Debug, PartialEq, Eq)]
160pub enum ThermalMode {
161    /// Gentle on the machine — quarter of cores, small batches, summary audit.
162    Cool,
163    /// Laptop-safe default — half the cores, normal batches, full audit.
164    Balanced,
165    /// Benchmark mode — all cores, large batches, minimal audit overhead.
166    MaxPerf,
167}
168
169impl ThermalMode {
170    pub fn as_str(self) -> &'static str {
171        match self {
172            ThermalMode::Cool => "cool",
173            ThermalMode::Balanced => "balanced",
174            ThermalMode::MaxPerf => "max-perf",
175        }
176    }
177
178    pub fn from_str(s: &str) -> Option<Self> {
179        match s {
180            "cool" => Some(ThermalMode::Cool),
181            "balanced" => Some(ThermalMode::Balanced),
182            "max-perf" | "maxperf" | "max_perf" => Some(ThermalMode::MaxPerf),
183            _ => None,
184        }
185    }
186
187    /// Preset batch size for this thermal mode. Smaller batches under `Cool`
188    /// keep sustained heat spikes down; larger batches under `MaxPerf` favor
189    /// throughput.
190    pub fn preset_batch_size(self) -> usize {
191        match self {
192            ThermalMode::Cool => 32,
193            ThermalMode::Balanced => 128,
194            ThermalMode::MaxPerf => 512,
195        }
196    }
197
198    /// Preset audit depth for this thermal mode. `Cool` and `MaxPerf` both pick
199    /// `Summary` (minimal cold-path overhead — one to be gentle, the other to
200    /// keep benchmark timing clean); `Balanced` keeps `Full` audit for normal
201    /// operation.
202    pub fn preset_audit_mode(self) -> AuditMode {
203        match self {
204            ThermalMode::Cool => AuditMode::Summary,
205            ThermalMode::Balanced => AuditMode::Full,
206            ThermalMode::MaxPerf => AuditMode::Summary,
207        }
208    }
209}
210
211// ── Energy model ──────────────────────────────────────────────────────────
212
213/// Estimated energy cost of a single double-precision FLOP, in joules.
214///
215/// This is an order-of-magnitude *representative* figure for a modern CPU
216/// (~100 pJ per useful FLOP once issue/fetch overhead is amortized), not a
217/// measured value for any specific chip. It exists so programs can compute a
218/// *relative, deterministic* "joules per result" metric. Treat the absolute
219/// number as an estimate; treat ratios between two CJC-Lang runs as meaningful.
220pub const ENERGY_PER_FLOP_JOULES: f64 = 1.0e-10;
221
222/// Estimated energy cost of moving one byte through the memory hierarchy, in
223/// joules (~100 pJ/byte, representative of DRAM traffic). Memory traffic is a
224/// dominant energy consumer, which is why TidyView's sparse/dictionary-encoded
225/// layouts matter for the green story. Same caveat as [`ENERGY_PER_FLOP_JOULES`]:
226/// an estimate for relative comparison, not a calibrated absolute.
227pub const ENERGY_PER_BYTE_JOULES: f64 = 1.0e-10;
228
229/// Deterministic energy estimate in joules for a workload of `flops`
230/// floating-point operations and `bytes` of memory traffic.
231///
232/// Pure function of the (non-negative) integer counts and the two fixed
233/// constants above — **no wall-clock time, no RNG, no FMA**. Negative inputs
234/// are clamped to zero so the result is always non-negative and finite.
235pub fn energy_estimate_joules(flops: i64, bytes: i64) -> f64 {
236    let f = flops.max(0) as f64;
237    let b = bytes.max(0) as f64;
238    // Kept in separate bindings so the two multiplies cannot be contracted
239    // into a single fused-multiply-add (preserves bit-identical results).
240    let flop_energy = f * ENERGY_PER_FLOP_JOULES;
241    let byte_energy = b * ENERGY_PER_BYTE_JOULES;
242    flop_energy + byte_energy
243}
244
245// ── The policy struct ───────────────────────────────────────────────────────
246
247/// A fully-resolved runtime execution policy.
248///
249/// `max_threads == 0` means "auto" — resolve the effective cap from
250/// [`ThermalMode`] and the detected core count via [`effective_threads`].
251#[derive(Clone, Copy, Debug, PartialEq, Eq)]
252pub struct RuntimePolicy {
253    pub determinism: Determinism,
254    pub numeric_mode: NumericMode,
255    pub thermal_mode: ThermalMode,
256    /// Hard thread cap; `0` = auto (derive from `thermal_mode`).
257    pub max_threads: usize,
258    /// Advisory batch size for chunked workloads (training, ABNG, TidyView).
259    pub batch_size: usize,
260    pub audit_mode: AuditMode,
261    /// Race-to-idle scheduling: when `true`, parallel work runs at *full* width
262    /// for a short burst and only throttles to the thermal cap once load is
263    /// *sustained* (see [`run_parallel`]). Recovers burst performance while
264    /// keeping the sustained thermal bound. When `false`, the cap applies
265    /// uniformly (a fixed, reproducible schedule). Moot when the cap equals the
266    /// core count (`max-perf`). Never affects results — only the schedule.
267    pub adaptive: bool,
268}
269
270impl RuntimePolicy {
271    /// Build the policy implied by a thermal profile: the profile sets the
272    /// thermal mode, its preset batch size, its preset audit depth, and leaves
273    /// the thread count on `auto` (0). Determinism stays `Strict` and the
274    /// numeric mode stays `Kahan` — those are orthogonal to thermal behavior.
275    pub fn for_thermal_mode(mode: ThermalMode) -> Self {
276        Self {
277            determinism: Determinism::Strict,
278            numeric_mode: NumericMode::Kahan,
279            thermal_mode: mode,
280            max_threads: 0,
281            batch_size: mode.preset_batch_size(),
282            audit_mode: mode.preset_audit_mode(),
283            adaptive: true,
284        }
285    }
286
287    /// One-line, deterministic, BTreeMap-free summary for reporting.
288    pub fn summary(&self) -> String {
289        format!(
290            "runtime_policy: thermal={} threads={} batch={} audit={} numeric={} determinism={} adaptive={}",
291            self.thermal_mode.as_str(),
292            effective_threads(self, detect_cores()),
293            self.batch_size,
294            self.audit_mode.as_str(),
295            self.numeric_mode.as_str(),
296            self.determinism.as_str(),
297            self.adaptive,
298        )
299    }
300}
301
302impl Default for RuntimePolicy {
303    /// The laptop-safe default is `Balanced`, not "max all cores forever".
304    fn default() -> Self {
305        Self::for_thermal_mode(ThermalMode::Balanced)
306    }
307}
308
309// ── Thread resolution ─────────────────────────────────────────────────────
310
311/// Detected logical core count (`>= 1`). Falls back to 1 if the platform
312/// cannot report parallelism. This is the only place we read machine topology.
313pub fn detect_cores() -> usize {
314    std::thread::available_parallelism()
315        .map(|n| n.get())
316        .unwrap_or(1)
317}
318
319/// Resolve the effective thread cap for a policy given a detected core count.
320///
321/// An explicit `max_threads > 0` wins (clamped to the detected cores so we
322/// never over-subscribe). Otherwise the thermal mode derives the cap:
323/// `Cool` ≈ quarter of cores, `Balanced` ≈ half, `MaxPerf` = all. The result
324/// is always in `1..=cores`. Pure function — deterministic given its inputs.
325pub fn effective_threads(policy: &RuntimePolicy, detected_cores: usize) -> usize {
326    let cores = detected_cores.max(1);
327    if policy.max_threads > 0 {
328        policy.max_threads.min(cores)
329    } else {
330        match policy.thermal_mode {
331            ThermalMode::Cool => (cores / 4).max(1),
332            ThermalMode::Balanced => (cores / 2).max(1),
333            ThermalMode::MaxPerf => cores,
334        }
335    }
336}
337
338/// Pre-warm the throttle pool for a thread cap. Returns the live worker count.
339///
340/// **Phase 2 (race-to-idle) changed the model.** The global rayon pool is left
341/// at its default (all cores) so that *bursts* can use full parallelism; the
342/// thermal cap is enforced per-operation by [`run_parallel`], which `install`s
343/// sustained work into a smaller, cached pool. This call just pre-builds that
344/// capped pool so the first sustained op doesn't pay the build cost. `n == 0`
345/// or `n >= cores` means "no cap" and builds nothing.
346#[cfg(feature = "parallel")]
347pub fn apply_thread_cap(n: usize) -> usize {
348    let full = detect_cores();
349    if n > 0 && n < full {
350        let _ = capped_pool(n); // pre-build; OnceLock fixes the size
351    }
352    rayon::current_num_threads()
353}
354
355/// No-parallel fallback: there is exactly one thread of execution.
356#[cfg(not(feature = "parallel"))]
357pub fn apply_thread_cap(_n: usize) -> usize {
358    1
359}
360
361// ── Race-to-idle adaptive scheduling (Phase 2) ─────────────────────────────
362
363/// Burst budget: parallel work runs at full width for this long before a
364/// *sustained* workload is throttled to the thermal cap. Sized to the thermal
365/// time constant — sub-second bursts don't heat-soak the package, so they run
366/// free; only multi-second sustained load (the kind that actually throttles a
367/// laptop) gets capped.
368#[cfg(feature = "parallel")]
369const SUSTAIN_WINDOW: std::time::Duration = std::time::Duration::from_millis(2000);
370
371/// An idle gap longer than this resets the burst timer, so a fresh burst after
372/// a pause again gets full width.
373#[cfg(feature = "parallel")]
374const IDLE_RESET: std::time::Duration = std::time::Duration::from_millis(500);
375
376#[cfg(feature = "parallel")]
377#[derive(Default)]
378struct AdaptiveState {
379    burst_start: Option<std::time::Instant>,
380    last_op: Option<std::time::Instant>,
381}
382
383#[cfg(feature = "parallel")]
384thread_local! {
385    static ADAPTIVE: RefCell<AdaptiveState> = RefCell::new(AdaptiveState::default());
386}
387
388/// Pure burst/sustained decision — separated from the clock so it is unit
389/// testable with explicit instants. Returns `true` (throttle) once the current
390/// burst has been active for at least `window`; an idle gap beyond `idle`
391/// starts a fresh burst. Mutates `state` to record the burst start / last op.
392#[cfg(feature = "parallel")]
393fn decide_sustained(
394    state: &mut AdaptiveState,
395    now: std::time::Instant,
396    window: std::time::Duration,
397    idle: std::time::Duration,
398) -> bool {
399    if let Some(last) = state.last_op {
400        if now.duration_since(last) > idle {
401            state.burst_start = None;
402        }
403    }
404    let start = *state.burst_start.get_or_insert(now);
405    state.last_op = Some(now);
406    now.duration_since(start) >= window
407}
408
409#[cfg(feature = "parallel")]
410fn is_sustained_now() -> bool {
411    let now = std::time::Instant::now();
412    ADAPTIVE.with(|s| decide_sustained(&mut s.borrow_mut(), now, SUSTAIN_WINDOW, IDLE_RESET))
413}
414
415/// Reset the burst timer (e.g. between test runs on a reused thread).
416#[cfg(feature = "parallel")]
417fn reset_adaptive_state() {
418    ADAPTIVE.with(|s| *s.borrow_mut() = AdaptiveState::default());
419}
420
421#[cfg(not(feature = "parallel"))]
422fn reset_adaptive_state() {}
423
424/// The cap-sized throttle pool. `OnceLock` fixes the size at first use (the cap
425/// is stable per process — set once by the CLI at startup). `None` if rayon
426/// failed to build it, in which case [`run_parallel`] degrades to no throttle.
427#[cfg(feature = "parallel")]
428static CAPPED_POOL: std::sync::OnceLock<Option<rayon::ThreadPool>> = std::sync::OnceLock::new();
429
430#[cfg(feature = "parallel")]
431fn capped_pool(cap: usize) -> Option<&'static rayon::ThreadPool> {
432    CAPPED_POOL
433        .get_or_init(|| rayon::ThreadPoolBuilder::new().num_threads(cap).build().ok())
434        .as_ref()
435}
436
437/// Run `work` under the active thermal policy, throttling parallelism to the
438/// thermal cap only when appropriate.
439///
440/// Wrap a parallel kernel's body in this. The rules:
441/// - cap ≥ cores (`max-perf` / `--threads ≥ N`): run on the full global pool.
442/// - already inside a rayon worker (nested call): run inline on the current
443///   pool — never nest-`install` (avoids surprising thread fan-out / blocking).
444/// - `adaptive` (default): full width during a burst, throttle once load is
445///   sustained ([`decide_sustained`]).
446/// - `adaptive == false`: always throttle to the cap (fixed, reproducible).
447///
448/// Throttling means `install`-ing into the [`capped_pool`], inside which
449/// `rayon::current_num_threads()` reports the cap — so existing chunkers that
450/// size their work to the live thread count auto-scale. **Determinism is
451/// preserved:** the choice of pool changes only how many bands/rows run
452/// concurrently, never the per-element math (reductions keep their fixed
453/// within-row order), so output is bit-identical regardless of this decision.
454#[cfg(feature = "parallel")]
455pub fn run_parallel<R, F>(work: F) -> R
456where
457    R: Send,
458    F: FnOnce() -> R + Send,
459{
460    // Nested call from within a pool worker: run inline, don't re-install.
461    if rayon::current_thread_index().is_some() {
462        return work();
463    }
464    let policy = get();
465    let cap = effective_threads(&policy, detect_cores());
466    if cap >= detect_cores() {
467        return work(); // no cap — full global pool
468    }
469    let throttle = if policy.adaptive { is_sustained_now() } else { true };
470    if !throttle {
471        return work(); // burst — full global pool
472    }
473    match capped_pool(cap) {
474        Some(pool) => pool.install(work),
475        None => work(),
476    }
477}
478
479/// No-parallel fallback: run inline.
480#[cfg(not(feature = "parallel"))]
481pub fn run_parallel<R, F>(work: F) -> R
482where
483    F: FnOnce() -> R,
484{
485    work()
486}
487
488// ── Thread-local policy state ─────────────────────────────────────────────
489
490thread_local! {
491    /// The active runtime policy for this thread. The interpreter runs on one
492    /// thread and reads/writes this; rayon workers honor the thread cap via the
493    /// global pool, not via this cell.
494    pub(crate) static POLICY: RefCell<RuntimePolicy> = RefCell::new(RuntimePolicy::default());
495}
496
497/// Snapshot the current policy.
498pub fn get() -> RuntimePolicy {
499    POLICY.with(|c| *c.borrow())
500}
501
502/// Reset to the laptop-safe `Balanced` default. Tests and the REPL call this
503/// to avoid cross-run leakage on a reused thread. Also clears the race-to-idle
504/// burst timer so a fresh run starts in the burst regime.
505pub fn reset() {
506    POLICY.with(|c| *c.borrow_mut() = RuntimePolicy::default());
507    reset_adaptive_state();
508}
509
510/// Adopt a thermal profile wholesale: sets the thermal mode plus its preset
511/// batch size and audit depth, and resets the thread cap to `auto`. Explicit
512/// per-field setters called *after* this win (the CLI applies the profile
513/// first, then individual `--threads` / `--batch-size` / `--audit-mode` overrides).
514pub fn set_thermal_mode(mode: ThermalMode) {
515    POLICY.with(|c| {
516        let mut p = c.borrow_mut();
517        p.thermal_mode = mode;
518        p.batch_size = mode.preset_batch_size();
519        p.audit_mode = mode.preset_audit_mode();
520        p.max_threads = 0;
521    });
522}
523
524/// Set an explicit thread cap (`0` = auto).
525pub fn set_threads(n: usize) {
526    POLICY.with(|c| c.borrow_mut().max_threads = n);
527}
528
529/// Set the advisory batch size.
530pub fn set_batch_size(n: usize) {
531    POLICY.with(|c| c.borrow_mut().batch_size = n);
532}
533
534/// Set the audit depth.
535pub fn set_audit_mode(mode: AuditMode) {
536    POLICY.with(|c| c.borrow_mut().audit_mode = mode);
537}
538
539/// Set the numeric reduction mode.
540pub fn set_numeric_mode(mode: NumericMode) {
541    POLICY.with(|c| c.borrow_mut().numeric_mode = mode);
542}
543
544/// Set the determinism level.
545pub fn set_determinism(d: Determinism) {
546    POLICY.with(|c| c.borrow_mut().determinism = d);
547}
548
549/// Enable/disable race-to-idle adaptive scheduling. `false` = fixed cap
550/// (reproducible schedule); `true` = burst-then-throttle (the default).
551pub fn set_adaptive(on: bool) {
552    POLICY.with(|c| c.borrow_mut().adaptive = on);
553}
554
555/// Resolved effective thread cap for the current policy on this machine.
556pub fn current_effective_threads() -> usize {
557    effective_threads(&get(), detect_cores())
558}
559
560#[cfg(test)]
561mod tests {
562    use super::*;
563
564    #[test]
565    fn default_is_balanced() {
566        let p = RuntimePolicy::default();
567        assert_eq!(p.thermal_mode, ThermalMode::Balanced);
568        assert_eq!(p.determinism, Determinism::Strict);
569        assert_eq!(p.numeric_mode, NumericMode::Kahan);
570        assert_eq!(p.max_threads, 0);
571        assert_eq!(p.batch_size, 128);
572        assert_eq!(p.audit_mode, AuditMode::Full);
573        assert!(p.adaptive, "adaptive (race-to-idle) is on by default");
574    }
575
576    #[test]
577    fn set_adaptive_round_trip() {
578        reset();
579        assert!(get().adaptive);
580        set_adaptive(false);
581        assert!(!get().adaptive);
582        reset();
583        assert!(get().adaptive, "reset restores adaptive default");
584    }
585
586    #[cfg(feature = "parallel")]
587    #[test]
588    fn decide_sustained_burst_then_throttle() {
589        use std::time::{Duration, Instant};
590        let mut st = AdaptiveState::default();
591        let t0 = Instant::now();
592        let win = Duration::from_millis(2000);
593        let idle = Duration::from_millis(500);
594        let at = |ms: u64| t0 + Duration::from_millis(ms);
595        // Frequent ops (300ms gaps < the 500ms idle threshold) so the burst
596        // timer accumulates rather than resetting.
597        assert!(!decide_sustained(&mut st, t0, win, idle), "burst starts");
598        let mut ms = 300;
599        while ms < 2000 {
600            assert!(!decide_sustained(&mut st, at(ms), win, idle), "still burst at {ms}ms");
601            ms += 300;
602        }
603        // Past the 2s window with continuous activity → throttle.
604        assert!(decide_sustained(&mut st, at(2100), win, idle), "sustained past window");
605    }
606
607    #[cfg(feature = "parallel")]
608    #[test]
609    fn decide_sustained_idle_resets_burst() {
610        use std::time::{Duration, Instant};
611        let mut st = AdaptiveState::default();
612        let t0 = Instant::now();
613        let win = Duration::from_millis(2000);
614        let idle = Duration::from_millis(500);
615        let at = |ms: u64| t0 + Duration::from_millis(ms);
616        // Drive to sustained with frequent ops.
617        assert!(!decide_sustained(&mut st, t0, win, idle));
618        let mut ms = 300;
619        while ms <= 2100 {
620            decide_sustained(&mut st, at(ms), win, idle);
621            ms += 300;
622        }
623        assert!(decide_sustained(&mut st, at(2400), win, idle), "is sustained");
624        // A pause longer than the idle threshold (600ms > 500ms) starts a fresh burst.
625        assert!(
626            !decide_sustained(&mut st, at(3000), win, idle),
627            "idle gap should reset the burst → full speed again"
628        );
629    }
630
631    #[cfg(feature = "parallel")]
632    #[test]
633    fn run_parallel_preserves_value_under_throttle() {
634        // The throttle decision must never change results: run a tiny reduction
635        // both ways and assert byte-identical output. (Concurrency differs;
636        // the per-element math does not.)
637        reset();
638        set_thermal_mode(ThermalMode::Cool);
639        set_adaptive(false); // force throttle path
640        let throttled: f64 = run_parallel(|| (0..1000).map(|i| i as f64).sum());
641        set_adaptive(true);
642        reset_adaptive_state(); // burst path
643        let burst: f64 = run_parallel(|| (0..1000).map(|i| i as f64).sum());
644        assert_eq!(throttled, burst);
645        reset();
646    }
647
648    #[test]
649    fn thermal_presets_distinct() {
650        assert_eq!(ThermalMode::Cool.preset_batch_size(), 32);
651        assert_eq!(ThermalMode::Balanced.preset_batch_size(), 128);
652        assert_eq!(ThermalMode::MaxPerf.preset_batch_size(), 512);
653        assert_eq!(ThermalMode::Cool.preset_audit_mode(), AuditMode::Summary);
654        assert_eq!(ThermalMode::Balanced.preset_audit_mode(), AuditMode::Full);
655        assert_eq!(ThermalMode::MaxPerf.preset_audit_mode(), AuditMode::Summary);
656    }
657
658    #[test]
659    fn effective_threads_monotonic_in_thermal_mode() {
660        let cores = 8;
661        let cool = effective_threads(&RuntimePolicy::for_thermal_mode(ThermalMode::Cool), cores);
662        let bal = effective_threads(&RuntimePolicy::for_thermal_mode(ThermalMode::Balanced), cores);
663        let max = effective_threads(&RuntimePolicy::for_thermal_mode(ThermalMode::MaxPerf), cores);
664        assert!(cool <= bal, "cool {cool} should not exceed balanced {bal}");
665        assert!(bal <= max, "balanced {bal} should not exceed max-perf {max}");
666        assert_eq!(cool, 2);
667        assert_eq!(bal, 4);
668        assert_eq!(max, 8);
669    }
670
671    #[test]
672    fn effective_threads_always_in_range() {
673        for cores in [1usize, 2, 3, 7, 16, 64] {
674            for mode in [ThermalMode::Cool, ThermalMode::Balanced, ThermalMode::MaxPerf] {
675                let t = effective_threads(&RuntimePolicy::for_thermal_mode(mode), cores);
676                assert!(t >= 1, "threads must be >= 1 (cores={cores}, mode={mode:?})");
677                assert!(t <= cores, "threads {t} must be <= cores {cores}");
678            }
679        }
680    }
681
682    #[test]
683    fn explicit_thread_cap_wins_and_clamps() {
684        let mut p = RuntimePolicy::for_thermal_mode(ThermalMode::MaxPerf);
685        p.max_threads = 3;
686        assert_eq!(effective_threads(&p, 8), 3, "explicit cap should be honored");
687        p.max_threads = 100;
688        assert_eq!(effective_threads(&p, 8), 8, "cap clamps to detected cores");
689    }
690
691    #[test]
692    fn effective_threads_zero_cores_is_one() {
693        let p = RuntimePolicy::default();
694        assert_eq!(effective_threads(&p, 0), 1);
695    }
696
697    #[test]
698    fn energy_is_non_negative_and_zero_at_zero() {
699        assert_eq!(energy_estimate_joules(0, 0), 0.0);
700        assert!(energy_estimate_joules(-5, -7) >= 0.0);
701        assert_eq!(energy_estimate_joules(-5, -7), 0.0, "negatives clamp to zero");
702    }
703
704    #[test]
705    fn energy_is_monotonic() {
706        let a = energy_estimate_joules(1000, 1000);
707        let b = energy_estimate_joules(2000, 1000);
708        let c = energy_estimate_joules(2000, 2000);
709        assert!(b > a, "more flops => more energy");
710        assert!(c > b, "more bytes => more energy");
711    }
712
713    #[test]
714    fn energy_is_additive_in_components() {
715        let flop_only = energy_estimate_joules(1_000_000, 0);
716        let byte_only = energy_estimate_joules(0, 1_000_000);
717        let both = energy_estimate_joules(1_000_000, 1_000_000);
718        assert_eq!(both, flop_only + byte_only);
719    }
720
721    #[test]
722    fn energy_is_deterministic_across_calls() {
723        let first = energy_estimate_joules(123_456, 789);
724        for _ in 0..1000 {
725            assert_eq!(energy_estimate_joules(123_456, 789), first);
726        }
727    }
728
729    #[test]
730    fn enum_round_trips() {
731        for m in [ThermalMode::Cool, ThermalMode::Balanced, ThermalMode::MaxPerf] {
732            assert_eq!(ThermalMode::from_str(m.as_str()), Some(m));
733        }
734        for m in [NumericMode::Kahan, NumericMode::Binned, NumericMode::FixedTree] {
735            assert_eq!(NumericMode::from_str(m.as_str()), Some(m));
736        }
737        for m in [AuditMode::Summary, AuditMode::Full, AuditMode::Forensic] {
738            assert_eq!(AuditMode::from_str(m.as_str()), Some(m));
739        }
740        for m in [Determinism::Strict, Determinism::Relaxed] {
741            assert_eq!(Determinism::from_str(m.as_str()), Some(m));
742        }
743    }
744
745    #[test]
746    fn invalid_mode_strings_return_none() {
747        assert_eq!(ThermalMode::from_str("blazing"), None);
748        assert_eq!(NumericMode::from_str(""), None);
749        assert_eq!(AuditMode::from_str("paranoid"), None);
750        assert_eq!(Determinism::from_str("yolo"), None);
751    }
752
753    #[test]
754    fn set_get_round_trip_and_reset() {
755        reset();
756        set_thermal_mode(ThermalMode::Cool);
757        let p = get();
758        assert_eq!(p.thermal_mode, ThermalMode::Cool);
759        assert_eq!(p.batch_size, 32);
760        assert_eq!(p.audit_mode, AuditMode::Summary);
761
762        set_threads(2);
763        set_batch_size(64);
764        set_audit_mode(AuditMode::Forensic);
765        set_numeric_mode(NumericMode::Binned);
766        let p = get();
767        assert_eq!(p.max_threads, 2);
768        assert_eq!(p.batch_size, 64);
769        assert_eq!(p.audit_mode, AuditMode::Forensic);
770        assert_eq!(p.numeric_mode, NumericMode::Binned);
771
772        reset();
773        assert_eq!(get(), RuntimePolicy::default());
774    }
775
776    #[test]
777    fn profile_then_override_precedence() {
778        reset();
779        // CLI order: profile first, then explicit override.
780        set_thermal_mode(ThermalMode::MaxPerf);
781        assert_eq!(get().batch_size, 512);
782        set_batch_size(16);
783        assert_eq!(get().batch_size, 16, "explicit override wins over profile preset");
784        assert_eq!(get().thermal_mode, ThermalMode::MaxPerf, "mode unchanged by batch override");
785        reset();
786    }
787
788    #[test]
789    fn apply_thread_cap_never_panics_and_is_positive() {
790        // Does not assert an exact count: in the test process rayon's pool may
791        // already be initialized, so build_global may be a no-op. We only
792        // require a sane positive worker count and no panic.
793        let n = apply_thread_cap(2);
794        assert!(n >= 1);
795    }
796
797    #[test]
798    fn summary_is_stable() {
799        reset();
800        let s1 = get().summary();
801        let s2 = get().summary();
802        assert_eq!(s1, s2);
803        assert!(s1.contains("thermal=balanced"));
804        assert!(s1.contains("determinism=strict"));
805        reset();
806    }
807}