#![forbid(unsafe_code)]
use anyhow::Result;
use clap::Parser;
use dsfb_database::grammar::{MotifClass, MotifEngine, MotifGrammar};
use dsfb_database::non_claims;
use dsfb_database::residual::{ResidualClass, ResidualSample, ResidualStream};
use rand::{Rng, SeedableRng};
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
#[derive(Parser)]
#[command(
name = "null_trace",
about = "Phase-A3: false-alarm calibration on a quiet Gaussian null trace.",
version
)]
struct Cli {
#[arg(long, default_value_t = 1)]
seed_lo: u64,
#[arg(long, default_value_t = 32)]
seed_hi: u64,
#[arg(long, default_value_t = 3600.0)]
duration_s: f64,
#[arg(long, default_value_t = 1.0)]
rate_hz: f64,
#[arg(long, default_value = "out")]
out: PathBuf,
#[arg(long, default_value_t = 1.0)]
sigma_scale: f64,
}
fn sigma_for(class: ResidualClass) -> f64 {
match class {
ResidualClass::PlanRegression => 0.03,
ResidualClass::Cardinality => 0.03,
ResidualClass::Contention => 0.005,
ResidualClass::CacheIo => 0.003,
ResidualClass::WorkloadPhase => 0.01,
}
}
fn gauss_pair<R: Rng>(rng: &mut R) -> (f64, f64) {
let u1: f64 = rng.gen_range(f64::EPSILON..1.0);
let u2: f64 = rng.gen_range(0.0..1.0);
let r = (-2.0 * u1.ln()).sqrt();
let theta = 2.0 * std::f64::consts::PI * u2;
(r * theta.cos(), r * theta.sin())
}
fn null_stream(seed: u64, duration_s: f64, rate_hz: f64, sigma_scale: f64) -> ResidualStream {
debug_assert!(duration_s > 0.0 && rate_hz > 0.0 && sigma_scale > 0.0);
let mut stream = ResidualStream::new(format!(
"null-trace-seed{seed}-dur{:.0}s-rate{:.2}hz-sigma{:.3}",
duration_s, rate_hz, sigma_scale
));
let mut rng = rand_pcg::Pcg64::seed_from_u64(seed);
let n = (duration_s * rate_hz).round() as u64;
let dt = 1.0 / rate_hz;
for class in ResidualClass::ALL {
let sigma = sigma_for(class) * sigma_scale;
let channel = format!("null_{}", class.name());
let mut i = 0u64;
while i < n {
let (g1, g2) = gauss_pair(&mut rng);
let t0 = i as f64 * dt;
stream.push(ResidualSample::new(t0, class, sigma * g1).with_channel(channel.clone()));
if i + 1 < n {
let t1 = (i + 1) as f64 * dt;
stream
.push(ResidualSample::new(t1, class, sigma * g2).with_channel(channel.clone()));
}
i += 2;
}
}
stream.sort();
stream
}
fn false_alarms_per_motif(stream: &ResidualStream) -> HashMap<MotifClass, usize> {
let grammar = MotifGrammar::default();
let eps = MotifEngine::new(grammar).run(stream);
let mut counts: HashMap<MotifClass, usize> = MotifClass::ALL.iter().map(|m| (*m, 0)).collect();
for e in &eps {
*counts.entry(e.motif).or_insert(0) += 1;
}
counts
}
#[derive(Clone, Default)]
struct Welford {
n: u64,
mean: f64,
m2: f64,
min: f64,
max: f64,
}
impl Welford {
fn new() -> Self {
Self {
n: 0,
mean: 0.0,
m2: 0.0,
min: f64::INFINITY,
max: f64::NEG_INFINITY,
}
}
fn push(&mut self, x: f64) {
debug_assert!(x.is_finite());
self.n += 1;
let d = x - self.mean;
self.mean += d / self.n as f64;
let d2 = x - self.mean;
self.m2 += d * d2;
if x < self.min {
self.min = x;
}
if x > self.max {
self.max = x;
}
}
fn stddev(&self) -> f64 {
if self.n <= 1 {
0.0
} else {
(self.m2 / (self.n - 1) as f64).sqrt()
}
}
fn ci95(&self) -> (f64, f64) {
if self.n == 0 {
return (0.0, 0.0);
}
let se = self.stddev() / (self.n as f64).sqrt();
let half = 1.96 * se;
((self.mean - half).max(0.0), self.mean + half)
}
}
fn write_null_csv(path: &Path, cli: &Cli, accum: &HashMap<MotifClass, Welford>) -> Result<()> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
let mut wtr = csv::Writer::from_path(path)?;
wtr.write_record([
"motif",
"n_seeds",
"duration_s",
"rate_hz",
"sigma_scale",
"mean_false_alarms_per_hour",
"stddev_false_alarms_per_hour",
"min_per_hour",
"max_per_hour",
"ci95_lo_per_hour",
"ci95_hi_per_hour",
"sigma_quiet",
"seed_lo",
"seed_hi",
])?;
for m in MotifClass::ALL {
let w = accum
.get(&m)
.expect("accumulator populated for every motif");
let (lo, hi) = w.ci95();
let sigma = sigma_for(m.residual_class()) * cli.sigma_scale;
wtr.write_record([
m.name(),
&w.n.to_string(),
&format!("{:.3}", cli.duration_s),
&format!("{:.3}", cli.rate_hz),
&format!("{:.3}", cli.sigma_scale),
&format!("{:.6}", w.mean),
&format!("{:.6}", w.stddev()),
&format!("{:.6}", w.min),
&format!("{:.6}", w.max),
&format!("{:.6}", lo),
&format!("{:.6}", hi),
&format!("{:.6}", sigma),
&cli.seed_lo.to_string(),
&cli.seed_hi.to_string(),
])?;
}
wtr.flush()?;
Ok(())
}
fn main() -> Result<()> {
let cli = Cli::parse();
non_claims::print();
anyhow::ensure!(cli.seed_lo <= cli.seed_hi, "--seed-lo must be <= --seed-hi");
anyhow::ensure!(cli.duration_s > 0.0, "--duration-s must be > 0");
anyhow::ensure!(cli.rate_hz > 0.0, "--rate-hz must be > 0");
anyhow::ensure!(cli.sigma_scale > 0.0, "--sigma-scale must be > 0");
let mut accum: HashMap<MotifClass, Welford> = MotifClass::ALL
.iter()
.map(|m| (*m, Welford::new()))
.collect();
let hours = cli.duration_s / 3600.0;
for seed in cli.seed_lo..=cli.seed_hi {
let stream = null_stream(seed, cli.duration_s, cli.rate_hz, cli.sigma_scale);
let counts = false_alarms_per_motif(&stream);
for m in MotifClass::ALL {
let count = *counts.get(&m).unwrap_or(&0) as f64;
accum.get_mut(&m).unwrap().push(count / hours);
}
}
fs::create_dir_all(&cli.out)?;
let csv_path = cli.out.join("null.csv");
write_null_csv(&csv_path, &cli, &accum)?;
eprintln!(
"null_trace: seeds {}..={}, duration {:.1} s @ {:.2} Hz, sigma_scale {:.3}, wrote {}",
cli.seed_lo,
cli.seed_hi,
cli.duration_s,
cli.rate_hz,
cli.sigma_scale,
csv_path.display()
);
Ok(())
}