cjc_runtime/runtime_policy.rs
1//! Runtime Policy Layer — deterministic, thermally-bounded execution policy.
2//!
3//! This module is the "green compute" control surface for CJC-Lang. It lets a
4//! run declare *how much machine* it is willing to use — thread caps, batch
5//! sizing, audit depth — and exposes a **deterministic** energy estimate so a
6//! program can reason about joules-per-result instead of merely wall-clock
7//! seconds.
8//!
9//! The guiding philosophy: do not let CJC-Lang blindly saturate the CPU. Use
10//! *deterministic bounded execution*. Thermal/energy limits are made explicit
11//! and deterministic rather than left to the OS scheduler.
12//!
13//! # Builtins (registered in [`crate::builtins`])
14//!
15//! Policy query / mutate:
16//! - `runtime_policy_thermal_mode() -> String`
17//! - `runtime_policy_set_thermal_mode(mode: String) -> String`
18//! - `runtime_policy_threads() -> Int` (resolved effective cap)
19//! - `runtime_policy_set_threads(n: Int) -> Int`
20//! - `runtime_policy_batch_size() -> Int`
21//! - `runtime_policy_set_batch_size(n: Int) -> Int`
22//! - `runtime_policy_audit_mode() -> String`
23//! - `runtime_policy_set_audit_mode(mode: String) -> String`
24//! - `runtime_policy_numeric_mode() -> String`
25//! - `runtime_policy_set_numeric_mode(mode: String) -> String`
26//! - `runtime_policy_reset() -> Int`
27//! - `runtime_policy_summary() -> String`
28//!
29//! Energy model:
30//! - `energy_estimate(flops: Int, bytes: Int) -> Float` (joules)
31//! - `energy_per_flop() -> Float`
32//! - `energy_per_byte() -> Float`
33//!
34//! # Determinism story
35//!
36//! Two invariants make this layer safe under Prime Directive #3
37//! (same seed = bit-identical output):
38//!
39//! 1. **Thread count never changes results.** The parallel kernels in
40//! [`crate::tensor`] reduce with Kahan / [`crate::accumulator`] binned
41//! summation over a *fixed chunk order*, so the numeric output is identical
42//! regardless of how many rayon workers are live. The thermal mode and
43//! thread cap therefore move *only* the performance/heat axis, never the
44//! answer axis. Capping threads is pure "deterministic bounded execution".
45//!
46//! 2. **Energy is estimated from workload counts, never from wall time.**
47//! [`energy_estimate_joules`] is a pure function of integer FLOP and byte
48//! counts times fixed documented constants. Wall-clock time is explicitly
49//! *not* an input, because it varies run-to-run and would poison
50//! determinism. Same program + same seed → same FLOP count → same joule
51//! estimate, bit-for-bit. The two multiplies are kept in separate `let`
52//! bindings so the compiler cannot contract them into a single FMA (the
53//! same no-FMA discipline the SIMD kernels follow).
54//!
55//! The policy itself lives in a thread-local `RefCell<RuntimePolicy>`, mirroring
56//! the [`crate::profile`] counter sink. The interpreter thread reads and writes
57//! it; the actual rayon thread cap is applied once per process by
58//! [`apply_thread_cap`] (the CLI calls this at startup) because rayon's global
59//! pool can only be configured once. No RNG is touched. No `HashMap` is used.
60
61use std::cell::RefCell;
62
63// ── Mode enums ───────────────────────────────────────────────────────────
64
65/// Determinism guarantee level. `Strict` is the only mode that ships today;
66/// `Relaxed` is reserved so the field exists in the policy surface without
67/// implying it weakens any current guarantee.
68#[derive(Clone, Copy, Debug, PartialEq, Eq)]
69pub enum Determinism {
70 /// Bit-identical output across runs and platforms (the default).
71 Strict,
72 /// Reserved — does not currently relax any guarantee.
73 Relaxed,
74}
75
76impl Determinism {
77 pub fn as_str(self) -> &'static str {
78 match self {
79 Determinism::Strict => "strict",
80 Determinism::Relaxed => "relaxed",
81 }
82 }
83
84 pub fn from_str(s: &str) -> Option<Self> {
85 match s {
86 "strict" => Some(Determinism::Strict),
87 "relaxed" => Some(Determinism::Relaxed),
88 _ => None,
89 }
90 }
91}
92
93/// Floating-point reduction strategy. Maps to the existing accumulator family;
94/// every variant preserves determinism (none enables FMA or random ordering).
95#[derive(Clone, Copy, Debug, PartialEq, Eq)]
96pub enum NumericMode {
97 /// Compensated (Kahan) summation — the default scalar strategy.
98 Kahan,
99 /// Order-invariant binned accumulator — best for parallel reductions.
100 Binned,
101 /// Fixed pairwise reduction tree — deterministic divide-and-conquer.
102 FixedTree,
103}
104
105impl NumericMode {
106 pub fn as_str(self) -> &'static str {
107 match self {
108 NumericMode::Kahan => "kahan",
109 NumericMode::Binned => "binned",
110 NumericMode::FixedTree => "fixed-tree",
111 }
112 }
113
114 pub fn from_str(s: &str) -> Option<Self> {
115 match s {
116 "kahan" => Some(NumericMode::Kahan),
117 "binned" => Some(NumericMode::Binned),
118 "fixed-tree" | "fixedtree" | "fixed_tree" => Some(NumericMode::FixedTree),
119 _ => None,
120 }
121 }
122}
123
124/// Audit/forensics depth. Controls how much *cold-path* work (logs, Merkle
125/// trees, full lineage) runs alongside the hot numerical path. Deeper modes
126/// trade speed and energy for traceability; they never change numeric output.
127#[derive(Clone, Copy, Debug, PartialEq, Eq)]
128pub enum AuditMode {
129 /// Cheapest — aggregate summaries only.
130 Summary,
131 /// Per-operation audit records.
132 Full,
133 /// Maximum traceability (full lineage + hashes). Most expensive.
134 Forensic,
135}
136
137impl AuditMode {
138 pub fn as_str(self) -> &'static str {
139 match self {
140 AuditMode::Summary => "summary",
141 AuditMode::Full => "full",
142 AuditMode::Forensic => "forensic",
143 }
144 }
145
146 pub fn from_str(s: &str) -> Option<Self> {
147 match s {
148 "summary" => Some(AuditMode::Summary),
149 "full" => Some(AuditMode::Full),
150 "forensic" => Some(AuditMode::Forensic),
151 _ => None,
152 }
153 }
154}
155
156/// Thermal/energy execution profile. This is the headline "green" knob: it
157/// bounds how aggressively a run uses the CPU so a laptop does not cook itself
158/// sustaining turbo across all cores.
159#[derive(Clone, Copy, Debug, PartialEq, Eq)]
160pub enum ThermalMode {
161 /// Gentle on the machine — quarter of cores, small batches, summary audit.
162 Cool,
163 /// Laptop-safe default — half the cores, normal batches, full audit.
164 Balanced,
165 /// Benchmark mode — all cores, large batches, minimal audit overhead.
166 MaxPerf,
167}
168
169impl ThermalMode {
170 pub fn as_str(self) -> &'static str {
171 match self {
172 ThermalMode::Cool => "cool",
173 ThermalMode::Balanced => "balanced",
174 ThermalMode::MaxPerf => "max-perf",
175 }
176 }
177
178 pub fn from_str(s: &str) -> Option<Self> {
179 match s {
180 "cool" => Some(ThermalMode::Cool),
181 "balanced" => Some(ThermalMode::Balanced),
182 "max-perf" | "maxperf" | "max_perf" => Some(ThermalMode::MaxPerf),
183 _ => None,
184 }
185 }
186
187 /// Preset batch size for this thermal mode. Smaller batches under `Cool`
188 /// keep sustained heat spikes down; larger batches under `MaxPerf` favor
189 /// throughput.
190 pub fn preset_batch_size(self) -> usize {
191 match self {
192 ThermalMode::Cool => 32,
193 ThermalMode::Balanced => 128,
194 ThermalMode::MaxPerf => 512,
195 }
196 }
197
198 /// Preset audit depth for this thermal mode. `Cool` and `MaxPerf` both pick
199 /// `Summary` (minimal cold-path overhead — one to be gentle, the other to
200 /// keep benchmark timing clean); `Balanced` keeps `Full` audit for normal
201 /// operation.
202 pub fn preset_audit_mode(self) -> AuditMode {
203 match self {
204 ThermalMode::Cool => AuditMode::Summary,
205 ThermalMode::Balanced => AuditMode::Full,
206 ThermalMode::MaxPerf => AuditMode::Summary,
207 }
208 }
209}
210
211// ── Energy model ──────────────────────────────────────────────────────────
212
213/// Estimated energy cost of a single double-precision FLOP, in joules.
214///
215/// This is an order-of-magnitude *representative* figure for a modern CPU
216/// (~100 pJ per useful FLOP once issue/fetch overhead is amortized), not a
217/// measured value for any specific chip. It exists so programs can compute a
218/// *relative, deterministic* "joules per result" metric. Treat the absolute
219/// number as an estimate; treat ratios between two CJC-Lang runs as meaningful.
220pub const ENERGY_PER_FLOP_JOULES: f64 = 1.0e-10;
221
222/// Estimated energy cost of moving one byte through the memory hierarchy, in
223/// joules (~100 pJ/byte, representative of DRAM traffic). Memory traffic is a
224/// dominant energy consumer, which is why TidyView's sparse/dictionary-encoded
225/// layouts matter for the green story. Same caveat as [`ENERGY_PER_FLOP_JOULES`]:
226/// an estimate for relative comparison, not a calibrated absolute.
227pub const ENERGY_PER_BYTE_JOULES: f64 = 1.0e-10;
228
229/// Deterministic energy estimate in joules for a workload of `flops`
230/// floating-point operations and `bytes` of memory traffic.
231///
232/// Pure function of the (non-negative) integer counts and the two fixed
233/// constants above — **no wall-clock time, no RNG, no FMA**. Negative inputs
234/// are clamped to zero so the result is always non-negative and finite.
235pub fn energy_estimate_joules(flops: i64, bytes: i64) -> f64 {
236 let f = flops.max(0) as f64;
237 let b = bytes.max(0) as f64;
238 // Kept in separate bindings so the two multiplies cannot be contracted
239 // into a single fused-multiply-add (preserves bit-identical results).
240 let flop_energy = f * ENERGY_PER_FLOP_JOULES;
241 let byte_energy = b * ENERGY_PER_BYTE_JOULES;
242 flop_energy + byte_energy
243}
244
245// ── The policy struct ───────────────────────────────────────────────────────
246
247/// A fully-resolved runtime execution policy.
248///
249/// `max_threads == 0` means "auto" — resolve the effective cap from
250/// [`ThermalMode`] and the detected core count via [`effective_threads`].
251#[derive(Clone, Copy, Debug, PartialEq, Eq)]
252pub struct RuntimePolicy {
253 pub determinism: Determinism,
254 pub numeric_mode: NumericMode,
255 pub thermal_mode: ThermalMode,
256 /// Hard thread cap; `0` = auto (derive from `thermal_mode`).
257 pub max_threads: usize,
258 /// Advisory batch size for chunked workloads (training, ABNG, TidyView).
259 pub batch_size: usize,
260 pub audit_mode: AuditMode,
261 /// Race-to-idle scheduling: when `true`, parallel work runs at *full* width
262 /// for a short burst and only throttles to the thermal cap once load is
263 /// *sustained* (see [`run_parallel`]). Recovers burst performance while
264 /// keeping the sustained thermal bound. When `false`, the cap applies
265 /// uniformly (a fixed, reproducible schedule). Moot when the cap equals the
266 /// core count (`max-perf`). Never affects results — only the schedule.
267 pub adaptive: bool,
268}
269
270impl RuntimePolicy {
271 /// Build the policy implied by a thermal profile: the profile sets the
272 /// thermal mode, its preset batch size, its preset audit depth, and leaves
273 /// the thread count on `auto` (0). Determinism stays `Strict` and the
274 /// numeric mode stays `Kahan` — those are orthogonal to thermal behavior.
275 pub fn for_thermal_mode(mode: ThermalMode) -> Self {
276 Self {
277 determinism: Determinism::Strict,
278 numeric_mode: NumericMode::Kahan,
279 thermal_mode: mode,
280 max_threads: 0,
281 batch_size: mode.preset_batch_size(),
282 audit_mode: mode.preset_audit_mode(),
283 adaptive: true,
284 }
285 }
286
287 /// One-line, deterministic, BTreeMap-free summary for reporting.
288 pub fn summary(&self) -> String {
289 format!(
290 "runtime_policy: thermal={} threads={} batch={} audit={} numeric={} determinism={} adaptive={}",
291 self.thermal_mode.as_str(),
292 effective_threads(self, detect_cores()),
293 self.batch_size,
294 self.audit_mode.as_str(),
295 self.numeric_mode.as_str(),
296 self.determinism.as_str(),
297 self.adaptive,
298 )
299 }
300}
301
302impl Default for RuntimePolicy {
303 /// The laptop-safe default is `Balanced`, not "max all cores forever".
304 fn default() -> Self {
305 Self::for_thermal_mode(ThermalMode::Balanced)
306 }
307}
308
309// ── Thread resolution ─────────────────────────────────────────────────────
310
311/// Detected logical core count (`>= 1`). Falls back to 1 if the platform
312/// cannot report parallelism. This is the only place we read machine topology.
313pub fn detect_cores() -> usize {
314 std::thread::available_parallelism()
315 .map(|n| n.get())
316 .unwrap_or(1)
317}
318
319/// Resolve the effective thread cap for a policy given a detected core count.
320///
321/// An explicit `max_threads > 0` wins (clamped to the detected cores so we
322/// never over-subscribe). Otherwise the thermal mode derives the cap:
323/// `Cool` ≈ quarter of cores, `Balanced` ≈ half, `MaxPerf` = all. The result
324/// is always in `1..=cores`. Pure function — deterministic given its inputs.
325pub fn effective_threads(policy: &RuntimePolicy, detected_cores: usize) -> usize {
326 let cores = detected_cores.max(1);
327 if policy.max_threads > 0 {
328 policy.max_threads.min(cores)
329 } else {
330 match policy.thermal_mode {
331 ThermalMode::Cool => (cores / 4).max(1),
332 ThermalMode::Balanced => (cores / 2).max(1),
333 ThermalMode::MaxPerf => cores,
334 }
335 }
336}
337
338/// Pre-warm the throttle pool for a thread cap. Returns the live worker count.
339///
340/// **Phase 2 (race-to-idle) changed the model.** The global rayon pool is left
341/// at its default (all cores) so that *bursts* can use full parallelism; the
342/// thermal cap is enforced per-operation by [`run_parallel`], which `install`s
343/// sustained work into a smaller, cached pool. This call just pre-builds that
344/// capped pool so the first sustained op doesn't pay the build cost. `n == 0`
345/// or `n >= cores` means "no cap" and builds nothing.
346#[cfg(feature = "parallel")]
347pub fn apply_thread_cap(n: usize) -> usize {
348 let full = detect_cores();
349 if n > 0 && n < full {
350 let _ = capped_pool(n); // pre-build; OnceLock fixes the size
351 }
352 rayon::current_num_threads()
353}
354
355/// No-parallel fallback: there is exactly one thread of execution.
356#[cfg(not(feature = "parallel"))]
357pub fn apply_thread_cap(_n: usize) -> usize {
358 1
359}
360
361// ── Race-to-idle adaptive scheduling (Phase 2) ─────────────────────────────
362
363/// Burst budget: parallel work runs at full width for this long before a
364/// *sustained* workload is throttled to the thermal cap. Sized to the thermal
365/// time constant — sub-second bursts don't heat-soak the package, so they run
366/// free; only multi-second sustained load (the kind that actually throttles a
367/// laptop) gets capped.
368#[cfg(feature = "parallel")]
369const SUSTAIN_WINDOW: std::time::Duration = std::time::Duration::from_millis(2000);
370
371/// An idle gap longer than this resets the burst timer, so a fresh burst after
372/// a pause again gets full width.
373#[cfg(feature = "parallel")]
374const IDLE_RESET: std::time::Duration = std::time::Duration::from_millis(500);
375
376#[cfg(feature = "parallel")]
377#[derive(Default)]
378struct AdaptiveState {
379 burst_start: Option<std::time::Instant>,
380 last_op: Option<std::time::Instant>,
381}
382
383#[cfg(feature = "parallel")]
384thread_local! {
385 static ADAPTIVE: RefCell<AdaptiveState> = RefCell::new(AdaptiveState::default());
386}
387
388/// Pure burst/sustained decision — separated from the clock so it is unit
389/// testable with explicit instants. Returns `true` (throttle) once the current
390/// burst has been active for at least `window`; an idle gap beyond `idle`
391/// starts a fresh burst. Mutates `state` to record the burst start / last op.
392#[cfg(feature = "parallel")]
393fn decide_sustained(
394 state: &mut AdaptiveState,
395 now: std::time::Instant,
396 window: std::time::Duration,
397 idle: std::time::Duration,
398) -> bool {
399 if let Some(last) = state.last_op {
400 if now.duration_since(last) > idle {
401 state.burst_start = None;
402 }
403 }
404 let start = *state.burst_start.get_or_insert(now);
405 state.last_op = Some(now);
406 now.duration_since(start) >= window
407}
408
409#[cfg(feature = "parallel")]
410fn is_sustained_now() -> bool {
411 let now = std::time::Instant::now();
412 ADAPTIVE.with(|s| decide_sustained(&mut s.borrow_mut(), now, SUSTAIN_WINDOW, IDLE_RESET))
413}
414
415/// Reset the burst timer (e.g. between test runs on a reused thread).
416#[cfg(feature = "parallel")]
417fn reset_adaptive_state() {
418 ADAPTIVE.with(|s| *s.borrow_mut() = AdaptiveState::default());
419}
420
421#[cfg(not(feature = "parallel"))]
422fn reset_adaptive_state() {}
423
424/// The cap-sized throttle pool. `OnceLock` fixes the size at first use (the cap
425/// is stable per process — set once by the CLI at startup). `None` if rayon
426/// failed to build it, in which case [`run_parallel`] degrades to no throttle.
427#[cfg(feature = "parallel")]
428static CAPPED_POOL: std::sync::OnceLock<Option<rayon::ThreadPool>> = std::sync::OnceLock::new();
429
430#[cfg(feature = "parallel")]
431fn capped_pool(cap: usize) -> Option<&'static rayon::ThreadPool> {
432 CAPPED_POOL
433 .get_or_init(|| rayon::ThreadPoolBuilder::new().num_threads(cap).build().ok())
434 .as_ref()
435}
436
437/// Run `work` under the active thermal policy, throttling parallelism to the
438/// thermal cap only when appropriate.
439///
440/// Wrap a parallel kernel's body in this. The rules:
441/// - cap ≥ cores (`max-perf` / `--threads ≥ N`): run on the full global pool.
442/// - already inside a rayon worker (nested call): run inline on the current
443/// pool — never nest-`install` (avoids surprising thread fan-out / blocking).
444/// - `adaptive` (default): full width during a burst, throttle once load is
445/// sustained ([`decide_sustained`]).
446/// - `adaptive == false`: always throttle to the cap (fixed, reproducible).
447///
448/// Throttling means `install`-ing into the [`capped_pool`], inside which
449/// `rayon::current_num_threads()` reports the cap — so existing chunkers that
450/// size their work to the live thread count auto-scale. **Determinism is
451/// preserved:** the choice of pool changes only how many bands/rows run
452/// concurrently, never the per-element math (reductions keep their fixed
453/// within-row order), so output is bit-identical regardless of this decision.
454#[cfg(feature = "parallel")]
455pub fn run_parallel<R, F>(work: F) -> R
456where
457 R: Send,
458 F: FnOnce() -> R + Send,
459{
460 // Nested call from within a pool worker: run inline, don't re-install.
461 if rayon::current_thread_index().is_some() {
462 return work();
463 }
464 let policy = get();
465 let cap = effective_threads(&policy, detect_cores());
466 if cap >= detect_cores() {
467 return work(); // no cap — full global pool
468 }
469 let throttle = if policy.adaptive { is_sustained_now() } else { true };
470 if !throttle {
471 return work(); // burst — full global pool
472 }
473 match capped_pool(cap) {
474 Some(pool) => pool.install(work),
475 None => work(),
476 }
477}
478
479/// No-parallel fallback: run inline.
480#[cfg(not(feature = "parallel"))]
481pub fn run_parallel<R, F>(work: F) -> R
482where
483 F: FnOnce() -> R,
484{
485 work()
486}
487
488// ── Thread-local policy state ─────────────────────────────────────────────
489
490thread_local! {
491 /// The active runtime policy for this thread. The interpreter runs on one
492 /// thread and reads/writes this; rayon workers honor the thread cap via the
493 /// global pool, not via this cell.
494 pub(crate) static POLICY: RefCell<RuntimePolicy> = RefCell::new(RuntimePolicy::default());
495}
496
497/// Snapshot the current policy.
498pub fn get() -> RuntimePolicy {
499 POLICY.with(|c| *c.borrow())
500}
501
502/// Reset to the laptop-safe `Balanced` default. Tests and the REPL call this
503/// to avoid cross-run leakage on a reused thread. Also clears the race-to-idle
504/// burst timer so a fresh run starts in the burst regime.
505pub fn reset() {
506 POLICY.with(|c| *c.borrow_mut() = RuntimePolicy::default());
507 reset_adaptive_state();
508}
509
510/// Adopt a thermal profile wholesale: sets the thermal mode plus its preset
511/// batch size and audit depth, and resets the thread cap to `auto`. Explicit
512/// per-field setters called *after* this win (the CLI applies the profile
513/// first, then individual `--threads` / `--batch-size` / `--audit-mode` overrides).
514pub fn set_thermal_mode(mode: ThermalMode) {
515 POLICY.with(|c| {
516 let mut p = c.borrow_mut();
517 p.thermal_mode = mode;
518 p.batch_size = mode.preset_batch_size();
519 p.audit_mode = mode.preset_audit_mode();
520 p.max_threads = 0;
521 });
522}
523
524/// Set an explicit thread cap (`0` = auto).
525pub fn set_threads(n: usize) {
526 POLICY.with(|c| c.borrow_mut().max_threads = n);
527}
528
529/// Set the advisory batch size.
530pub fn set_batch_size(n: usize) {
531 POLICY.with(|c| c.borrow_mut().batch_size = n);
532}
533
534/// Set the audit depth.
535pub fn set_audit_mode(mode: AuditMode) {
536 POLICY.with(|c| c.borrow_mut().audit_mode = mode);
537}
538
539/// Set the numeric reduction mode.
540pub fn set_numeric_mode(mode: NumericMode) {
541 POLICY.with(|c| c.borrow_mut().numeric_mode = mode);
542}
543
544/// Set the determinism level.
545pub fn set_determinism(d: Determinism) {
546 POLICY.with(|c| c.borrow_mut().determinism = d);
547}
548
549/// Enable/disable race-to-idle adaptive scheduling. `false` = fixed cap
550/// (reproducible schedule); `true` = burst-then-throttle (the default).
551pub fn set_adaptive(on: bool) {
552 POLICY.with(|c| c.borrow_mut().adaptive = on);
553}
554
555/// Resolved effective thread cap for the current policy on this machine.
556pub fn current_effective_threads() -> usize {
557 effective_threads(&get(), detect_cores())
558}
559
560#[cfg(test)]
561mod tests {
562 use super::*;
563
564 #[test]
565 fn default_is_balanced() {
566 let p = RuntimePolicy::default();
567 assert_eq!(p.thermal_mode, ThermalMode::Balanced);
568 assert_eq!(p.determinism, Determinism::Strict);
569 assert_eq!(p.numeric_mode, NumericMode::Kahan);
570 assert_eq!(p.max_threads, 0);
571 assert_eq!(p.batch_size, 128);
572 assert_eq!(p.audit_mode, AuditMode::Full);
573 assert!(p.adaptive, "adaptive (race-to-idle) is on by default");
574 }
575
576 #[test]
577 fn set_adaptive_round_trip() {
578 reset();
579 assert!(get().adaptive);
580 set_adaptive(false);
581 assert!(!get().adaptive);
582 reset();
583 assert!(get().adaptive, "reset restores adaptive default");
584 }
585
586 #[cfg(feature = "parallel")]
587 #[test]
588 fn decide_sustained_burst_then_throttle() {
589 use std::time::{Duration, Instant};
590 let mut st = AdaptiveState::default();
591 let t0 = Instant::now();
592 let win = Duration::from_millis(2000);
593 let idle = Duration::from_millis(500);
594 let at = |ms: u64| t0 + Duration::from_millis(ms);
595 // Frequent ops (300ms gaps < the 500ms idle threshold) so the burst
596 // timer accumulates rather than resetting.
597 assert!(!decide_sustained(&mut st, t0, win, idle), "burst starts");
598 let mut ms = 300;
599 while ms < 2000 {
600 assert!(!decide_sustained(&mut st, at(ms), win, idle), "still burst at {ms}ms");
601 ms += 300;
602 }
603 // Past the 2s window with continuous activity → throttle.
604 assert!(decide_sustained(&mut st, at(2100), win, idle), "sustained past window");
605 }
606
607 #[cfg(feature = "parallel")]
608 #[test]
609 fn decide_sustained_idle_resets_burst() {
610 use std::time::{Duration, Instant};
611 let mut st = AdaptiveState::default();
612 let t0 = Instant::now();
613 let win = Duration::from_millis(2000);
614 let idle = Duration::from_millis(500);
615 let at = |ms: u64| t0 + Duration::from_millis(ms);
616 // Drive to sustained with frequent ops.
617 assert!(!decide_sustained(&mut st, t0, win, idle));
618 let mut ms = 300;
619 while ms <= 2100 {
620 decide_sustained(&mut st, at(ms), win, idle);
621 ms += 300;
622 }
623 assert!(decide_sustained(&mut st, at(2400), win, idle), "is sustained");
624 // A pause longer than the idle threshold (600ms > 500ms) starts a fresh burst.
625 assert!(
626 !decide_sustained(&mut st, at(3000), win, idle),
627 "idle gap should reset the burst → full speed again"
628 );
629 }
630
631 #[cfg(feature = "parallel")]
632 #[test]
633 fn run_parallel_preserves_value_under_throttle() {
634 // The throttle decision must never change results: run a tiny reduction
635 // both ways and assert byte-identical output. (Concurrency differs;
636 // the per-element math does not.)
637 reset();
638 set_thermal_mode(ThermalMode::Cool);
639 set_adaptive(false); // force throttle path
640 let throttled: f64 = run_parallel(|| (0..1000).map(|i| i as f64).sum());
641 set_adaptive(true);
642 reset_adaptive_state(); // burst path
643 let burst: f64 = run_parallel(|| (0..1000).map(|i| i as f64).sum());
644 assert_eq!(throttled, burst);
645 reset();
646 }
647
648 #[test]
649 fn thermal_presets_distinct() {
650 assert_eq!(ThermalMode::Cool.preset_batch_size(), 32);
651 assert_eq!(ThermalMode::Balanced.preset_batch_size(), 128);
652 assert_eq!(ThermalMode::MaxPerf.preset_batch_size(), 512);
653 assert_eq!(ThermalMode::Cool.preset_audit_mode(), AuditMode::Summary);
654 assert_eq!(ThermalMode::Balanced.preset_audit_mode(), AuditMode::Full);
655 assert_eq!(ThermalMode::MaxPerf.preset_audit_mode(), AuditMode::Summary);
656 }
657
658 #[test]
659 fn effective_threads_monotonic_in_thermal_mode() {
660 let cores = 8;
661 let cool = effective_threads(&RuntimePolicy::for_thermal_mode(ThermalMode::Cool), cores);
662 let bal = effective_threads(&RuntimePolicy::for_thermal_mode(ThermalMode::Balanced), cores);
663 let max = effective_threads(&RuntimePolicy::for_thermal_mode(ThermalMode::MaxPerf), cores);
664 assert!(cool <= bal, "cool {cool} should not exceed balanced {bal}");
665 assert!(bal <= max, "balanced {bal} should not exceed max-perf {max}");
666 assert_eq!(cool, 2);
667 assert_eq!(bal, 4);
668 assert_eq!(max, 8);
669 }
670
671 #[test]
672 fn effective_threads_always_in_range() {
673 for cores in [1usize, 2, 3, 7, 16, 64] {
674 for mode in [ThermalMode::Cool, ThermalMode::Balanced, ThermalMode::MaxPerf] {
675 let t = effective_threads(&RuntimePolicy::for_thermal_mode(mode), cores);
676 assert!(t >= 1, "threads must be >= 1 (cores={cores}, mode={mode:?})");
677 assert!(t <= cores, "threads {t} must be <= cores {cores}");
678 }
679 }
680 }
681
682 #[test]
683 fn explicit_thread_cap_wins_and_clamps() {
684 let mut p = RuntimePolicy::for_thermal_mode(ThermalMode::MaxPerf);
685 p.max_threads = 3;
686 assert_eq!(effective_threads(&p, 8), 3, "explicit cap should be honored");
687 p.max_threads = 100;
688 assert_eq!(effective_threads(&p, 8), 8, "cap clamps to detected cores");
689 }
690
691 #[test]
692 fn effective_threads_zero_cores_is_one() {
693 let p = RuntimePolicy::default();
694 assert_eq!(effective_threads(&p, 0), 1);
695 }
696
697 #[test]
698 fn energy_is_non_negative_and_zero_at_zero() {
699 assert_eq!(energy_estimate_joules(0, 0), 0.0);
700 assert!(energy_estimate_joules(-5, -7) >= 0.0);
701 assert_eq!(energy_estimate_joules(-5, -7), 0.0, "negatives clamp to zero");
702 }
703
704 #[test]
705 fn energy_is_monotonic() {
706 let a = energy_estimate_joules(1000, 1000);
707 let b = energy_estimate_joules(2000, 1000);
708 let c = energy_estimate_joules(2000, 2000);
709 assert!(b > a, "more flops => more energy");
710 assert!(c > b, "more bytes => more energy");
711 }
712
713 #[test]
714 fn energy_is_additive_in_components() {
715 let flop_only = energy_estimate_joules(1_000_000, 0);
716 let byte_only = energy_estimate_joules(0, 1_000_000);
717 let both = energy_estimate_joules(1_000_000, 1_000_000);
718 assert_eq!(both, flop_only + byte_only);
719 }
720
721 #[test]
722 fn energy_is_deterministic_across_calls() {
723 let first = energy_estimate_joules(123_456, 789);
724 for _ in 0..1000 {
725 assert_eq!(energy_estimate_joules(123_456, 789), first);
726 }
727 }
728
729 #[test]
730 fn enum_round_trips() {
731 for m in [ThermalMode::Cool, ThermalMode::Balanced, ThermalMode::MaxPerf] {
732 assert_eq!(ThermalMode::from_str(m.as_str()), Some(m));
733 }
734 for m in [NumericMode::Kahan, NumericMode::Binned, NumericMode::FixedTree] {
735 assert_eq!(NumericMode::from_str(m.as_str()), Some(m));
736 }
737 for m in [AuditMode::Summary, AuditMode::Full, AuditMode::Forensic] {
738 assert_eq!(AuditMode::from_str(m.as_str()), Some(m));
739 }
740 for m in [Determinism::Strict, Determinism::Relaxed] {
741 assert_eq!(Determinism::from_str(m.as_str()), Some(m));
742 }
743 }
744
745 #[test]
746 fn invalid_mode_strings_return_none() {
747 assert_eq!(ThermalMode::from_str("blazing"), None);
748 assert_eq!(NumericMode::from_str(""), None);
749 assert_eq!(AuditMode::from_str("paranoid"), None);
750 assert_eq!(Determinism::from_str("yolo"), None);
751 }
752
753 #[test]
754 fn set_get_round_trip_and_reset() {
755 reset();
756 set_thermal_mode(ThermalMode::Cool);
757 let p = get();
758 assert_eq!(p.thermal_mode, ThermalMode::Cool);
759 assert_eq!(p.batch_size, 32);
760 assert_eq!(p.audit_mode, AuditMode::Summary);
761
762 set_threads(2);
763 set_batch_size(64);
764 set_audit_mode(AuditMode::Forensic);
765 set_numeric_mode(NumericMode::Binned);
766 let p = get();
767 assert_eq!(p.max_threads, 2);
768 assert_eq!(p.batch_size, 64);
769 assert_eq!(p.audit_mode, AuditMode::Forensic);
770 assert_eq!(p.numeric_mode, NumericMode::Binned);
771
772 reset();
773 assert_eq!(get(), RuntimePolicy::default());
774 }
775
776 #[test]
777 fn profile_then_override_precedence() {
778 reset();
779 // CLI order: profile first, then explicit override.
780 set_thermal_mode(ThermalMode::MaxPerf);
781 assert_eq!(get().batch_size, 512);
782 set_batch_size(16);
783 assert_eq!(get().batch_size, 16, "explicit override wins over profile preset");
784 assert_eq!(get().thermal_mode, ThermalMode::MaxPerf, "mode unchanged by batch override");
785 reset();
786 }
787
788 #[test]
789 fn apply_thread_cap_never_panics_and_is_positive() {
790 // Does not assert an exact count: in the test process rayon's pool may
791 // already be initialized, so build_global may be a no-op. We only
792 // require a sane positive worker count and no panic.
793 let n = apply_thread_cap(2);
794 assert!(n >= 1);
795 }
796
797 #[test]
798 fn summary_is_stable() {
799 reset();
800 let s1 = get().summary();
801 let s2 = get().summary();
802 assert_eq!(s1, s2);
803 assert!(s1.contains("thermal=balanced"));
804 assert!(s1.contains("determinism=strict"));
805 reset();
806 }
807}