use std::collections::HashMap;
use std::fs::{File, OpenOptions, create_dir};
use std::io::Write;
use std::path::PathBuf;
use std::str::FromStr;
use arrrg::CommandLine;
use sig_fig_histogram::Histogram;
use statslicer::{
UnboundParameters, UntypedParameters, compute_difference, experiment_and_parameters, summarize,
};
#[derive(Default, Eq, PartialEq, arrrg_derive::CommandLine)]
struct StatSlicerOptions {}
fn format_sig_figs(sig_figs: i32, value: f64) -> String {
assert!((1..=4).contains(&sig_figs));
match sig_figs {
1 => format!("{value:0.00e}"),
2 => format!("{value:0.01e}"),
3 => format!("{value:0.02e}"),
4 => format!("{value:0.03e}"),
_ => {
unreachable!();
}
}
}
fn include(s: String, includes: &mut Vec<String>) {
let include = PathBuf::from(&s);
if include.exists() {
includes.push(format!(r#"load "{s}""#));
}
}
#[derive(Eq, PartialEq, arrrg_derive::CommandLine)]
struct CdfOptions {
#[arrrg(optional, "Number of significant figures to downsample to.")]
sig_figs: i32,
#[arrrg(optional, "Comma-separated list of fixed variables.")]
fix: String,
#[arrrg(required, "Dependent variable.")]
dependent: String,
}
impl Default for CdfOptions {
fn default() -> Self {
Self {
sig_figs: 3,
fix: "".to_string(),
dependent: "".to_string(),
}
}
}
fn cdf_write(
_: &StatSlicerOptions,
cdf: &CdfOptions,
files: Vec<String>,
) -> Result<Vec<String>, String> {
let mut unbound_params = UnboundParameters::from_str(&cdf.fix)?;
unbound_params.push(cdf.dependent.clone());
let mut exp = None;
let mut agg = HashMap::new();
for file in files.iter() {
assert!(file.ends_with(".dat"));
let (e, params) = experiment_and_parameters(&file[..file.len() - 4])?;
if exp.is_none() {
exp = Some(e);
}
let exp = exp.unwrap();
if exp != e {
return Err(format!(
"experiments {exp} and {e} provided; provide just one"
));
}
let fixed = unbound_params.project(¶ms)?;
if fixed.len() != params.len() {
return Err(format!("cannot parse {file}: logic error"));
}
let hist = Histogram::load(File::open(file).expect("should be able to open input file"))
.expect("histogram should load");
if hist.sig_figs() < cdf.sig_figs {
return Err(format!(
"cannot upsample histogram in {file} from {} to {} significant figures",
hist.sig_figs(),
cdf.sig_figs
));
}
let hist = if hist.sig_figs() > cdf.sig_figs {
hist.downsample(cdf.sig_figs)
} else {
hist
};
let entry = agg
.entry(fixed.to_string())
.or_insert(Histogram::new(cdf.sig_figs));
*entry = Histogram::merge(entry, &hist);
}
if exp.is_none() {
return Ok(vec![]);
}
let exp = exp.unwrap();
let _ = create_dir("exp");
let _ = create_dir("exp/cdf");
let mut outputs = vec![];
for (fixed, hist) in agg.into_iter() {
let output = format!("exp/cdf/{exp}:{fixed}.dat");
outputs.push(output.clone());
let mut output = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(&output)
.expect("should be able to open output");
let sfb = sig_fig_histogram::SigFigBucketizer::new(cdf.sig_figs);
let total = hist.iter().map(|x| x.1).fold(0, u64::saturating_add);
let mut sum = 0;
writeln!(output, "0 0").expect("write should succeed");
for (idx, (_, value)) in hist.iter().enumerate() {
sum += value;
writeln!(
output,
"{} {}",
format_sig_figs(cdf.sig_figs, sfb.boundary_for(idx as i32)),
sum as f64 / total as f64
)
.expect("write should succeed");
}
output.flush().expect("flush should succeed");
drop(output);
}
Ok(outputs)
}
fn cdf_gnuplot(_: &StatSlicerOptions, cdf: &CdfOptions, files: Vec<String>) -> Result<(), String> {
let unbound_params = UnboundParameters::from_str(&cdf.fix)?;
let mut exp = None;
let mut agg = HashMap::new();
for file in files.iter() {
assert!(file.ends_with(".dat"));
let (e, params) = experiment_and_parameters(&file[..file.len() - 4])?;
if exp.is_none() {
exp = Some(e);
}
let exp = exp.unwrap();
if exp != e {
return Err(format!(
"experiments {exp} and {e} provided; provide just one"
));
}
let fixed = unbound_params.project(¶ms)?;
let Some(dependent) = params.get(&cdf.dependent) else {
return Err(format!(
"cannot find dependent parameter in {file}: missing {}",
cdf.dependent
));
};
let dependent = UntypedParameters::one(cdf.dependent.clone(), dependent);
let entry = agg.entry(fixed.to_string()).or_insert(vec![]);
entry.push((fixed, dependent, file.clone()));
}
if exp.is_none() {
return Ok(());
}
let exp = exp.unwrap();
let _ = create_dir("exp");
let _ = create_dir("exp/cdf");
let mut unbound_fixed = HashMap::new();
for (fixed_str, series) in agg.iter() {
for (fixed, _, _) in series.iter() {
let unbound: UnboundParameters = fixed.into();
unbound_fixed.insert(fixed_str.clone(), unbound.to_string());
}
}
for (fixed_str, mut series) in agg.into_iter() {
let output = format!("exp/cdf/{exp}:{fixed_str}.gnuplot");
let mut output = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(output)
.expect("should be able to open output");
let exp_display = exp.replace('_', " ");
let fixed_display = fixed_str.replace('_', " ");
let mut includes = vec![];
include("gnuplot/gnuplot.include".to_string(), &mut includes);
include("gnuplot/cdf/gnuplot.include".to_string(), &mut includes);
include(format!("gnuplot/cdf/{exp}.include"), &mut includes);
include(
format!(
"gnuplot/cdf/{exp}:{}.include",
unbound_fixed
.get(&fixed_str)
.expect("we should have added this to the map above")
),
&mut includes,
);
include(
format!("gnuplot/cdf/{exp}:{fixed_str}.include"),
&mut includes,
);
let includes = includes.join("\n");
writeln!(
output,
r#"set terminal svg size 640,395 mouse standalone dynamic enhanced
set key top left;
set yrange [0:1]
set title "{exp_display}:{fixed_display}"
set xlabel "Time (nanoseconds)"
set ylabel "CDF"
{includes}
set output "exp/cdf/{exp}:{fixed_str}.svg""#
)
.expect("write should succeed");
let mut prefix = "plot ";
series.sort_by_key(|x| x.1.cast_float());
for (idx, (_, dependent, file)) in series.into_iter().enumerate() {
write!(
output,
r#"{}"{}" using 1:2 title "{}" with linespoints ls {}"#,
prefix,
file,
dependent.to_string().replace('_', " "),
idx + 1
)
.expect("write should succeed");
prefix = ", \\\n ";
}
writeln!(output, ";").expect("write should succeed");
output.flush().expect("flush should succeed");
drop(output);
}
Ok(())
}
fn cdf_main(options: StatSlicerOptions, cdf: CdfOptions, files: Vec<String>) -> Result<(), String> {
let cdfs = cdf_write(&options, &cdf, files)?;
cdf_gnuplot(&options, &cdf, cdfs)?;
Ok(())
}
#[derive(Eq, PartialEq, arrrg_derive::CommandLine)]
struct LinesOptions {
#[arrrg(optional, "Number of significant figures to downsample to.")]
sig_figs: i32,
#[arrrg(optional, "Comma-separated list of fixed variables.")]
fix: String,
#[arrrg(required, "Independent variable.")]
independent: String,
#[arrrg(required, "Comma-separated list of variables to include as series.")]
series: String,
}
impl Default for LinesOptions {
fn default() -> Self {
Self {
sig_figs: 3,
fix: "".to_string(),
independent: "".to_string(),
series: "".to_string(),
}
}
}
fn lines_write(
_: &StatSlicerOptions,
lines: &LinesOptions,
files: Vec<String>,
) -> Result<Vec<String>, String> {
let mut unbound_params = UnboundParameters::from_str(&lines.fix)?;
for param in UnboundParameters::from_str(&lines.series)?.iter() {
unbound_params.push(param.to_string())
}
let mut exp = None;
let mut agg = HashMap::new();
for file in files.iter() {
assert!(file.ends_with(".dat"));
let (e, params) = experiment_and_parameters(&file[..file.len() - 4])?;
if exp.is_none() {
exp = Some(e);
}
let exp = exp.unwrap();
if exp != e {
return Err(format!(
"experiments {exp} and {e} provided; provide just one"
));
}
let fixed = unbound_params.project(¶ms)?;
let Some(independent) = params.get(&lines.independent) else {
return Err(format!(
"cannot parse {file}: independent parameter missing"
));
};
let hist = Histogram::load(File::open(file).expect("should be able to open input file"))
.expect("histogram should load");
if hist.sig_figs() < lines.sig_figs {
return Err(format!(
"cannot upsample histogram in {file} from {} to {} significant figures",
hist.sig_figs(),
lines.sig_figs
));
}
let hist = if hist.sig_figs() > lines.sig_figs {
hist.downsample(lines.sig_figs)
} else {
hist
};
let entry = agg.entry(fixed.to_string()).or_insert(vec![]);
entry.push((independent, hist));
}
if exp.is_none() {
return Ok(vec![]);
}
let exp = exp.unwrap();
let _ = create_dir("exp");
let _ = create_dir("exp/lines");
let mut outputs = vec![];
for (fix, mut series) in agg.into_iter() {
series.sort_by_key(|x| x.0.cast_float());
let output = format!("exp/lines/{exp}:{fix}.dat");
outputs.push(output.clone());
let mut output = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(output)
.expect("should be able to open output");
for (param, hist) in series.into_iter() {
let summary = summarize(&hist);
writeln!(output, "{} {}", param, summary.mean()).expect("write should succeed");
}
output.flush().expect("flush should succeed");
drop(output);
}
Ok(outputs)
}
fn lines_gnuplot(
_: &StatSlicerOptions,
cdf: &LinesOptions,
files: Vec<String>,
) -> Result<(), String> {
let unbound_params = UnboundParameters::from_str(&cdf.fix)?;
let series_params = UnboundParameters::from_str(&cdf.series)?;
let mut exp = None;
let mut agg = HashMap::new();
for file in files.iter() {
assert!(file.ends_with(".dat"));
let (e, params) = experiment_and_parameters(&file[..file.len() - 4])?;
if exp.is_none() {
exp = Some(e);
}
let exp = exp.unwrap();
if exp != e {
return Err(format!(
"experiments {exp} and {e} provided; provide just one"
));
}
let fixed = unbound_params.project(¶ms)?;
let series = series_params.project(¶ms)?;
if fixed.len() + series.len() != params.len() {
return Err(format!(
"cannot parse {file}: parameters do not match fix/series"
));
}
let entry = agg.entry(fixed.to_string()).or_insert(vec![]);
entry.push((series, file));
}
if exp.is_none() {
return Ok(());
}
let exp = exp.unwrap();
let mut unbound_fixed = HashMap::new();
for (fixed_str, series) in agg.iter() {
for (fixed, _) in series.iter() {
let unbound: UnboundParameters = fixed.into();
unbound_fixed.insert(fixed_str.clone(), unbound.to_string());
}
}
let _ = create_dir("exp");
let _ = create_dir("exp/lines");
for (fix, mut series) in agg.into_iter() {
series.sort();
let output = format!("exp/lines/{exp}:{fix}.gnuplot");
let mut output = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(output)
.expect("should be able to open output");
let exp_display = exp.replace('_', " ");
let fixed_str = fix.to_string();
let fixed_display = fixed_str.replace('_', " ");
let mut includes = vec![];
include("gnuplot/gnuplot.include".to_string(), &mut includes);
include("gnuplot/lines/gnuplot.include".to_string(), &mut includes);
include(format!("gnuplot/lines/{exp}.include"), &mut includes);
include(
format!(
"gnuplot/lines/{exp}:{}.include",
unbound_fixed
.get(&fixed_str)
.expect("we should have added this to the map above")
),
&mut includes,
);
include(
format!("gnuplot/lines/{exp}:{fixed_str}.include"),
&mut includes,
);
let includes = includes.join("\n");
writeln!(
output,
r#"set terminal svg size 640,395 mouse standalone dynamic enhanced
set title "{exp_display}:{fixed_display}"
{includes}
set output "exp/lines/{exp}:{fixed_str}.svg""#
)
.expect("write should succeed");
let mut prefix = "plot ";
for (idx, (series, file)) in series.into_iter().enumerate() {
write!(
output,
r#"{}"{}" using 1:2 title "{}" with linespoints ls {}"#,
prefix,
file,
series.to_string().replace('_', " "),
idx + 1
)
.expect("write should succeed");
prefix = ", \\\n ";
}
writeln!(output, ";").expect("write should succeed");
output.flush().expect("flush should succeed");
drop(output);
}
Ok(())
}
fn lines_main(
options: StatSlicerOptions,
lines: LinesOptions,
files: Vec<String>,
) -> Result<(), String> {
let liness = lines_write(&options, &lines, files)?;
lines_gnuplot(&options, &lines, liness)?;
Ok(())
}
#[derive(Eq, PartialEq, arrrg_derive::CommandLine)]
struct TtestOptions {
#[arrrg(
optional,
"Confidence interval [0, 100], but really one of (80.0, 90.0, 95.0, 98.0, 99.0, 99.5)"
)]
interval: String,
}
impl Default for TtestOptions {
fn default() -> Self {
Self {
interval: "99.0".to_string(),
}
}
}
fn t_test_main(_: StatSlicerOptions, options: TtestOptions, files: Vec<String>) {
let Ok(interval) = f64::from_str(&options.interval) else {
eprintln!("could not parse interval as a floating-point value");
std::process::exit(1);
};
let mut moments = Vec::with_capacity(files.len());
for file in files.iter() {
let hist = Histogram::load(File::open(file).expect("file should open"))
.expect("histogram should load");
moments.push(summarize(&hist));
}
if moments.is_empty() {
return;
}
for (f, m) in std::iter::zip(files.iter(), moments.iter()) {
let Some(diff) = compute_difference(&moments[0], m, interval) else {
eprintln!("could not compute difference of {} and {}", files[0], f);
std::process::exit(1);
};
println!("{f} {diff}");
}
}
fn main() {
let (options, args) = StatSlicerOptions::from_command_line_relaxed(
"USAGE: statslicer [global options] <command> [command options]",
);
if args.is_empty() {
eprintln!("no command specified on the command line");
std::process::exit(1);
}
let subcommand = &args[0];
let args = args.iter().map(|a| a.as_str()).collect::<Vec<_>>();
match subcommand.as_str() {
"cdf" => {
let (cdf, files) =
CdfOptions::from_arguments_relaxed("USAGE: statslicer cdf [options]", &args[1..]);
cdf_main(options, cdf, files).expect("should be able to create cdf");
}
"lines" => {
let (lines, files) = LinesOptions::from_arguments_relaxed(
"USAGE: statslicer lines [options]",
&args[1..],
);
lines_main(options, lines, files).expect("should be able to create lines");
}
"t-test" => {
let (t_test, files) = TtestOptions::from_arguments_relaxed(
"USAGE: statslicer t-test [options]",
&args[1..],
);
t_test_main(options, t_test, files);
}
_ => {
eprintln!("unknown subcommand: {subcommand}");
std::process::exit(1);
}
}
}