use std::process::Command;
use std::time::Instant;
fn cgp_cmd() -> Command {
let mut cmd = Command::new(env!("CARGO"));
cmd.arg("run").arg("-p").arg("cgp").arg("--");
cmd
}
#[test]
fn falsify_cgp_030_detect_10pct_regression() {
let baseline = serde_json::json!({
"version": "2.0", "timestamp": "", "hardware": {"cpu_features": []},
"timing": {
"wall_clock_time_us": 23.2,
"samples": 50,
"stddev_us": 0.3,
"ci_95_low_us": 22.9,
"ci_95_high_us": 23.5
},
"throughput": {"tflops": 11.6, "gflops": 0.0, "bandwidth_gbps": 0.0, "arithmetic_intensity": 0.0},
"muda": []
});
let current = serde_json::json!({
"version": "2.0", "timestamp": "", "hardware": {"cpu_features": []},
"timing": {
"wall_clock_time_us": 25.52,
"samples": 50,
"stddev_us": 0.3,
"ci_95_low_us": 25.2,
"ci_95_high_us": 25.8
},
"throughput": {"tflops": 10.5, "gflops": 0.0, "bandwidth_gbps": 0.0, "arithmetic_intensity": 0.0},
"muda": []
});
std::fs::write("/tmp/cgp-falsify-030-b.json", baseline.to_string()).unwrap();
std::fs::write("/tmp/cgp-falsify-030-c.json", current.to_string()).unwrap();
let output = cgp_cmd()
.args([
"diff",
"--baseline",
"/tmp/cgp-falsify-030-b.json",
"--current",
"/tmp/cgp-falsify-030-c.json",
])
.output()
.expect("Failed to run cgp diff");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(
stdout.contains("REGRESSION"),
"FALSIFY-CGP-030 FAILED: 10% regression not detected.\nOutput:\n{stdout}"
);
let _ = std::fs::remove_file("/tmp/cgp-falsify-030-b.json");
let _ = std::fs::remove_file("/tmp/cgp-falsify-030-c.json");
}
#[test]
fn falsify_cgp_031_no_false_positive_on_noise() {
let run1 = serde_json::json!({
"version": "2.0", "timestamp": "", "hardware": {"cpu_features": []},
"timing": {
"wall_clock_time_us": 23.2,
"samples": 50,
"stddev_us": 0.5,
"ci_95_low_us": 22.7,
"ci_95_high_us": 23.7
},
"throughput": {"tflops": 11.6, "gflops": 0.0, "bandwidth_gbps": 0.0, "arithmetic_intensity": 0.0},
"muda": []
});
let run2 = serde_json::json!({
"version": "2.0", "timestamp": "", "hardware": {"cpu_features": []},
"timing": {
"wall_clock_time_us": 23.4,
"samples": 50,
"stddev_us": 0.5,
"ci_95_low_us": 22.9,
"ci_95_high_us": 23.9
},
"throughput": {"tflops": 11.5, "gflops": 0.0, "bandwidth_gbps": 0.0, "arithmetic_intensity": 0.0},
"muda": []
});
std::fs::write("/tmp/cgp-falsify-031-1.json", run1.to_string()).unwrap();
std::fs::write("/tmp/cgp-falsify-031-2.json", run2.to_string()).unwrap();
let output = cgp_cmd()
.args([
"diff",
"--baseline",
"/tmp/cgp-falsify-031-1.json",
"--current",
"/tmp/cgp-falsify-031-2.json",
])
.output()
.expect("Failed to run cgp diff");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
let timing_lines: Vec<&str> = stdout
.lines()
.filter(|l| l.contains("wall_clock_time_us"))
.collect();
for line in &timing_lines {
assert!(
!line.contains("REGRESSION"),
"FALSIFY-CGP-031 FAILED: false positive on <2% noise.\nLine: {line}\nOutput:\n{stdout}"
);
}
let _ = std::fs::remove_file("/tmp/cgp-falsify-031-1.json");
let _ = std::fs::remove_file("/tmp/cgp-falsify-031-2.json");
}
#[test]
fn falsify_cgp_041_simd_faster_than_scalar() {
let output = cgp_cmd()
.args([
"--json",
"profile",
"compare",
"--kernel",
"gemm",
"--size",
"1024",
"--backends",
"scalar,avx2",
])
.output()
.expect("Failed to run cgp profile compare");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
let parsed: serde_json::Value = serde_json::from_str(&stdout).expect("Compare JSON invalid");
let arr = parsed.as_array().unwrap();
let scalar = arr.iter().find(|r| r["name"] == "scalar").unwrap();
let avx2 = arr.iter().find(|r| r["name"] == "avx2").unwrap();
let scalar_time = scalar["wall_time_us"].as_f64().unwrap();
let avx2_time = avx2["wall_time_us"].as_f64().unwrap();
let speedup = scalar_time / avx2_time;
assert!(
speedup >= 3.0,
"FALSIFY-CGP-041 FAILED: AVX2 speedup {speedup:.1}x < 3x (scalar={scalar_time:.0}us, avx2={avx2_time:.0}us)"
);
}
#[test]
fn falsify_cgp_043_profile_binary() {
let output = cgp_cmd()
.args(["profile", "binary", "nvidia-smi"])
.output()
.expect("Failed to run cgp profile binary");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(
stdout.contains("Binary Profile")
|| stdout.contains("nsys")
|| stdout.contains("nvidia-smi"),
"FALSIFY-CGP-043: Should mention binary profiling.\nOutput:\n{stdout}"
);
}
#[test]
fn falsify_cgp_045_compete_normalized() {
let output = cgp_cmd()
.args([
"compete",
"timing",
"--ours",
"sleep 0.01",
"--theirs",
"sleep 0.02",
"--label",
"fast,slow",
])
.output()
.expect("Failed to run cgp compete");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("Head-to-Head"), "Missing header");
assert!(stdout.contains("Winner"), "Missing winner declaration");
assert!(
stdout.contains("fast") && stdout.contains("slow"),
"FALSIFY-CGP-045: Labels not in output.\nOutput:\n{stdout}"
);
}
#[test]
fn falsify_cgp_047_crash_handling() {
let output = cgp_cmd()
.args(["profile", "binary", "false"])
.output()
.expect("Failed to run cgp profile binary");
let stdout = String::from_utf8_lossy(&output.stdout);
let stderr = String::from_utf8_lossy(&output.stderr);
let combined = format!("{stdout}{stderr}");
assert!(
!combined.trim().is_empty(),
"FALSIFY-CGP-047: cgp produced no output for failing binary"
);
}
#[test]
fn falsify_cgp_062_diff_speed() {
let baseline = serde_json::json!({
"version": "2.0", "timestamp": "", "hardware": {"cpu_features": []},
"timing": {"wall_clock_time_us": 35.7, "samples": 1, "stddev_us": 0.0, "ci_95_low_us": 0.0, "ci_95_high_us": 0.0},
"throughput": {"tflops": 7.5, "gflops": 0.0, "bandwidth_gbps": 0.0, "arithmetic_intensity": 0.0},
"muda": []
});
let current = serde_json::json!({
"version": "2.0", "timestamp": "", "hardware": {"cpu_features": []},
"timing": {"wall_clock_time_us": 23.2, "samples": 1, "stddev_us": 0.0, "ci_95_low_us": 0.0, "ci_95_high_us": 0.0},
"throughput": {"tflops": 11.6, "gflops": 0.0, "bandwidth_gbps": 0.0, "arithmetic_intensity": 0.0},
"muda": []
});
std::fs::write("/tmp/cgp-falsify-062-b.json", baseline.to_string()).unwrap();
std::fs::write("/tmp/cgp-falsify-062-c.json", current.to_string()).unwrap();
let _ = cgp_cmd()
.args([
"diff",
"--baseline",
"/tmp/cgp-falsify-062-b.json",
"--current",
"/tmp/cgp-falsify-062-c.json",
])
.output();
let start = Instant::now();
let output = cgp_cmd()
.args([
"diff",
"--baseline",
"/tmp/cgp-falsify-062-b.json",
"--current",
"/tmp/cgp-falsify-062-c.json",
])
.output()
.expect("Failed to run cgp diff");
let elapsed = start.elapsed();
assert!(output.status.success());
assert!(
elapsed.as_millis() < 500,
"FALSIFY-CGP-062 FAILED: diff took {}ms (limit: 500ms with subprocess overhead)",
elapsed.as_millis()
);
let _ = std::fs::remove_file("/tmp/cgp-falsify-062-b.json");
let _ = std::fs::remove_file("/tmp/cgp-falsify-062-c.json");
}
#[test]
fn falsify_cgp_075_q4k_effective_bandwidth() {
let output = cgp_cmd()
.args([
"profile",
"quant",
"--kernel",
"q4k_gemv",
"--size",
"4096x1x4096",
])
.output()
.expect("Failed to run cgp profile quant");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(
stdout.contains("9.44 MB") || stdout.contains("9.4"),
"FALSIFY-CGP-075 FAILED: Q4K compressed size should be ~9.44 MB.\nOutput:\n{stdout}"
);
if stdout.contains("67") {
assert!(
stdout.contains("FP32 equivalent") || stdout.contains("equivalent"),
"FALSIFY-CGP-075: If 67MB shown, must be labeled as FP32 equivalent"
);
}
}
#[test]
fn falsify_cgp_061_doctor_speed_real() {
let _ = cgp_cmd().args(["doctor"]).output();
let start = Instant::now();
let output = cgp_cmd()
.args(["doctor"])
.output()
.expect("Failed to run cgp doctor");
let elapsed = start.elapsed();
assert!(output.status.success());
assert!(
elapsed.as_millis() < 2500,
"FALSIFY-CGP-061 FAILED: doctor took {}ms",
elapsed.as_millis()
);
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("RTX 4090") || stdout.contains("GPU"));
}
#[test]
fn falsify_cgp_021_ridge_point_math() {
let output = cgp_cmd()
.args(["--json", "roofline", "--target", "cuda"])
.output()
.expect("Failed to run cgp roofline");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
let parsed: serde_json::Value = serde_json::from_str(&stdout).unwrap();
let ridge_points = &parsed["ridge_points"];
if let Some(arr) = ridge_points.as_array() {
let fp16 = arr.iter().find(|r| {
r["precision"]
.as_str()
.map_or(false, |s| s.contains("FP16") || s.contains("Fp16"))
});
if let Some(fp16_ridge) = fp16 {
let ridge = fp16_ridge["ridge_flop_per_byte"].as_f64().unwrap_or(0.0);
let expected = 330_000.0_f64 / 1008.0;
assert!(
(ridge - expected).abs() < 0.5,
"FALSIFY-CGP-021 FAILED: FP16 ridge={ridge:.1}, expected={expected:.1}"
);
}
}
let text_output = cgp_cmd()
.args(["roofline", "--target", "cuda"])
.output()
.expect("Failed");
let text = String::from_utf8_lossy(&text_output.stdout);
assert!(
text.contains("327"),
"FALSIFY-CGP-021: Ridge point 327.x not in output.\n{text}"
);
}
#[test]
fn falsify_cgp_060_profile_speed() {
let _ = cgp_cmd()
.args([
"profile",
"compare",
"--kernel",
"gemm",
"--size",
"512",
"--backends",
"scalar,avx2",
])
.output();
let start = Instant::now();
let output = cgp_cmd()
.args([
"profile",
"compare",
"--kernel",
"gemm",
"--size",
"512",
"--backends",
"scalar,avx2",
])
.output()
.expect("Failed to run cgp profile compare");
let elapsed = start.elapsed();
assert!(output.status.success());
assert!(
elapsed.as_secs() < 31,
"FALSIFY-CGP-060 FAILED: profile took {}s (limit: 30s)",
elapsed.as_secs()
);
}
#[test]
fn falsify_cgp_020_bandwidth_spec() {
let output = cgp_cmd()
.args(["--json", "roofline", "--target", "cuda"])
.output()
.expect("Failed to run cgp roofline");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
let parsed: serde_json::Value = serde_json::from_str(&stdout).unwrap();
let dram_bw = parsed["peak_bandwidth"]["Dram"].as_f64().unwrap_or(0.0);
let expected_bw = 1_008_000_000_000.0_f64; let tolerance = expected_bw * 0.05; assert!(
(dram_bw - expected_bw).abs() < tolerance,
"FALSIFY-CGP-020 FAILED: DRAM bandwidth {:.0} GB/s vs expected {:.0} GB/s",
dram_bw / 1e9,
expected_bw / 1e9
);
}
#[test]
fn falsify_cgp_032_detect_improvement() {
let baseline = serde_json::json!({
"version": "2.0", "timestamp": "", "hardware": {"cpu_features": []},
"timing": {
"wall_clock_time_us": 35.7,
"samples": 50, "stddev_us": 0.5,
"ci_95_low_us": 35.2, "ci_95_high_us": 36.2
},
"throughput": {"tflops": 7.5, "gflops": 0.0, "bandwidth_gbps": 0.0, "arithmetic_intensity": 0.0},
"muda": []
});
let current = serde_json::json!({
"version": "2.0", "timestamp": "", "hardware": {"cpu_features": []},
"timing": {
"wall_clock_time_us": 23.2,
"samples": 50, "stddev_us": 0.3,
"ci_95_low_us": 22.9, "ci_95_high_us": 23.5
},
"throughput": {"tflops": 11.6, "gflops": 0.0, "bandwidth_gbps": 0.0, "arithmetic_intensity": 0.0},
"muda": []
});
std::fs::write("/tmp/cgp-falsify-032-b.json", baseline.to_string()).unwrap();
std::fs::write("/tmp/cgp-falsify-032-c.json", current.to_string()).unwrap();
let output = cgp_cmd()
.args([
"diff",
"--baseline",
"/tmp/cgp-falsify-032-b.json",
"--current",
"/tmp/cgp-falsify-032-c.json",
])
.output()
.expect("Failed to run cgp diff");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(
stdout.contains("IMPROVED"),
"FALSIFY-CGP-032 FAILED: 35.7→23.2us should be IMPROVED.\nOutput:\n{stdout}"
);
let _ = std::fs::remove_file("/tmp/cgp-falsify-032-b.json");
let _ = std::fs::remove_file("/tmp/cgp-falsify-032-c.json");
}
#[test]
fn falsify_cgp_042_cublas_faster_than_ptx() {
let output = cgp_cmd()
.args([
"--json",
"profile",
"compare",
"--kernel",
"gemm",
"--size",
"4096",
"--backends",
"cuda,cublas",
])
.output()
.expect("Failed to run cgp profile compare");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
let parsed: serde_json::Value = serde_json::from_str(&stdout).expect("Compare JSON invalid");
let arr = parsed.as_array().unwrap();
let cuda = arr.iter().find(|r| r["name"] == "cuda").unwrap();
let cublas = arr.iter().find(|r| r["name"] == "cublas").unwrap();
let cuda_tflops = cuda["tflops"].as_f64().unwrap();
let cublas_tflops = cublas["tflops"].as_f64().unwrap();
assert!(
cublas_tflops > cuda_tflops,
"FALSIFY-CGP-042 FAILED: cuBLAS {cublas_tflops:.1} TFLOP/s should exceed pure PTX {cuda_tflops:.1} TFLOP/s at 4096"
);
}
#[test]
fn falsify_cgp_046_cpu_only_competitor() {
let output = cgp_cmd()
.args([
"compete",
"cpu_timing",
"--ours",
"sleep 0.01",
"--theirs",
"sleep 0.015",
"--label",
"fast,slow",
])
.output()
.expect("Failed to run cgp compete");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(
stdout.contains("fast") && stdout.contains("slow"),
"FALSIFY-CGP-046 FAILED: Labels missing.\nOutput:\n{stdout}"
);
assert!(
stdout.contains("Winner"),
"FALSIFY-CGP-046: Should declare a winner.\nOutput:\n{stdout}"
);
}
#[test]
fn falsify_cgp_scaling_001_json_fields() {
let output = cgp_cmd()
.args([
"--json",
"profile",
"scaling",
"--size",
"256",
"--max-threads",
"2",
"--runs",
"1",
])
.output()
.expect("Failed to run cgp profile scaling");
if !output.status.success() {
return;
}
let stdout = String::from_utf8_lossy(&output.stdout);
let parsed: serde_json::Value = serde_json::from_str(&stdout).expect("Scaling JSON invalid");
let arr = parsed.as_array().expect("Should be array");
assert!(!arr.is_empty(), "Should have at least 1 data point");
for point in arr {
assert!(
point.get("threads").is_some(),
"FALSIFY-CGP-SCALING-001: missing 'threads' field"
);
assert!(
point.get("gflops").is_some(),
"FALSIFY-CGP-SCALING-001: missing 'gflops' field"
);
assert!(
point.get("scaling").is_some(),
"FALSIFY-CGP-SCALING-001: missing 'scaling' field"
);
}
}
#[test]
fn falsify_cgp_scaling_002_baseline_is_1x() {
let output = cgp_cmd()
.args([
"--json",
"profile",
"scaling",
"--size",
"256",
"--max-threads",
"1",
"--runs",
"1",
])
.output()
.expect("Failed to run cgp profile scaling");
if !output.status.success() {
return;
}
let stdout = String::from_utf8_lossy(&output.stdout);
let parsed: serde_json::Value = serde_json::from_str(&stdout).expect("Scaling JSON invalid");
let arr = parsed.as_array().unwrap();
if let Some(first) = arr.first() {
let scaling = first["scaling"].as_f64().unwrap_or(0.0);
assert!(
(scaling - 1.0).abs() < 0.15,
"FALSIFY-CGP-SCALING-002: 1T scaling should be ~1.0, got {scaling}"
);
}
}
#[test]
fn falsify_cgp_empirical_010_roofline_output() {
let output = cgp_cmd()
.args(["roofline", "--target", "avx512", "--empirical"])
.output()
.expect("Failed to run cgp roofline --empirical");
assert!(
output.status.success(),
"cgp roofline --empirical must succeed"
);
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(
stdout.contains("Empirical Measurement"),
"FALSIFY-CGP-EMPIRICAL-010: Must show empirical section.\nOutput:\n{stdout}"
);
assert!(
stdout.contains("DRAM Bandwidth"),
"FALSIFY-CGP-EMPIRICAL-010: Must show measured bandwidth.\nOutput:\n{stdout}"
);
assert!(
stdout.contains("Peak FP32 FLOPS"),
"FALSIFY-CGP-EMPIRICAL-010: Must show measured FLOPS.\nOutput:\n{stdout}"
);
assert!(
stdout.contains("Empirical Ridge"),
"FALSIFY-CGP-EMPIRICAL-010: Must show empirical ridge point.\nOutput:\n{stdout}"
);
}
#[test]
fn falsify_cgp_empirical_013_json_output() {
let output = cgp_cmd()
.args(["--json", "roofline", "--target", "avx512", "--empirical"])
.output()
.expect("Failed to run cgp --json roofline --empirical");
assert!(output.status.success(), "Must succeed");
let stdout = String::from_utf8_lossy(&output.stdout);
let parsed: serde_json::Value =
serde_json::from_str(&stdout).expect("Output must be valid JSON");
assert!(
parsed.get("theoretical").is_some(),
"FALSIFY-CGP-EMPIRICAL-013: JSON must have 'theoretical' field.\nGot:\n{stdout}"
);
assert!(
parsed.get("empirical").is_some(),
"FALSIFY-CGP-EMPIRICAL-013: JSON must have 'empirical' field.\nGot:\n{stdout}"
);
let emp = &parsed["empirical"];
assert!(
emp.get("measured_bandwidth_bps").is_some(),
"Must have measured_bandwidth_bps"
);
assert!(
emp.get("measured_peak_flops").is_some(),
"Must have measured_peak_flops"
);
assert!(
emp.get("measured_ridge_point").is_some(),
"Must have measured_ridge_point"
);
}
#[test]
fn falsify_cgp_empirical_011_bandwidth_sanity() {
let output = cgp_cmd()
.args(["roofline", "--target", "avx512", "--empirical"])
.output()
.expect("Failed to run cgp roofline --empirical");
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
if line.contains("DRAM Bandwidth:") {
let bw_str = line
.split("DRAM Bandwidth:")
.nth(1)
.and_then(|s| s.split("GB/s").next())
.map(|s| s.trim());
if let Some(bw_val) = bw_str.and_then(|s| s.parse::<f64>().ok()) {
assert!(
bw_val > 0.1,
"FALSIFY-CGP-EMPIRICAL-011: Bandwidth {bw_val} GB/s must be > 0.1 GB/s"
);
return;
}
}
}
panic!("FALSIFY-CGP-EMPIRICAL-011: Could not parse bandwidth from output");
}
#[test]
fn falsify_cgp_empirical_012_flops_sanity() {
let output = cgp_cmd()
.args(["roofline", "--target", "avx512", "--empirical"])
.output()
.expect("Failed to run cgp roofline --empirical");
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
if line.contains("Peak FP32 FLOPS:") {
let flops_str = line
.split("Peak FP32 FLOPS:")
.nth(1)
.and_then(|s| s.split("GFLOP/s").next())
.map(|s| s.trim());
if let Some(flops_val) = flops_str.and_then(|s| s.parse::<f64>().ok()) {
assert!(
flops_val > 10.0,
"FALSIFY-CGP-EMPIRICAL-012: FLOPS {flops_val} GFLOP/s must be > 10"
);
return;
}
}
}
if !stdout.contains("AVX-512") {
return;
}
panic!("FALSIFY-CGP-EMPIRICAL-012: Could not parse FLOPS from output");
}
#[test]
fn falsify_cgp_compare_050_measured_data() {
let output = cgp_cmd()
.args([
"profile",
"compare",
"--kernel",
"gemm",
"--size",
"1024",
"--backends",
"avx512",
])
.output()
.expect("Failed to run cgp profile compare");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
let bench_exists = std::path::Path::new(
"/mnt/nvme-raid0/targets/trueno/release/examples/benchmark_matrix_suite",
)
.exists();
if bench_exists {
assert!(
stdout.contains("M"),
"FALSIFY-CGP-COMPARE-050: With benchmark binary, should show M=measured.\nOutput:\n{stdout}"
);
}
}
#[test]
fn falsify_cgp_quant_076_roofline_analysis() {
let output = cgp_cmd()
.args([
"profile",
"quant",
"--kernel",
"q4k_gemv",
"--size",
"4096x1x4096",
])
.output()
.expect("Failed to run cgp profile quant");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(
stdout.contains("Super-block:"),
"Must show super-block info"
);
assert!(
stdout.contains("Compression ratio:"),
"Must show compression ratio"
);
let bench_exists = std::path::Path::new(
"/mnt/nvme-raid0/targets/trueno/release/examples/benchmark_matrix_suite",
)
.exists();
if bench_exists {
assert!(
stdout.contains("Roofline Analysis"),
"FALSIFY-CGP-QUANT-076: Must show roofline analysis when timing available.\nOutput:\n{stdout}"
);
assert!(
stdout.contains("Bottleneck:"),
"FALSIFY-CGP-QUANT-076: Must classify bottleneck.\nOutput:\n{stdout}"
);
}
}
#[test]
fn falsify_cgp_quant_077_token_estimation() {
let output = cgp_cmd()
.args([
"profile",
"quant",
"--kernel",
"q4k_gemv",
"--size",
"4096x1x4096",
])
.output()
.expect("Failed to run cgp profile quant");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
let bench_exists = std::path::Path::new(
"/mnt/nvme-raid0/targets/trueno/release/examples/benchmark_matrix_suite",
)
.exists();
if bench_exists {
assert!(
stdout.contains("Token Estimation"),
"FALSIFY-CGP-QUANT-077: Must show LLM token estimation.\nOutput:\n{stdout}"
);
assert!(
stdout.contains("tokens/sec"),
"FALSIFY-CGP-QUANT-077: Must show tokens/sec.\nOutput:\n{stdout}"
);
}
}
#[test]
fn falsify_cgp_091_trueno_vs_ndarray_gemm() {
let output = Command::new(env!("CARGO"))
.args([
"bench",
"--bench",
"gemm_comparison",
"--",
"gemm/trueno/512",
"--quick",
"--sample-size",
"10",
])
.output();
let Ok(out) = output else {
eprintln!("FALSIFY-CGP-091: criterion bench not available, skipping");
return;
};
let stdout = String::from_utf8_lossy(&out.stdout);
let trueno_time = parse_criterion_time(&stdout, "gemm/trueno/512");
let ndarray_time = parse_criterion_time(&stdout, "gemm/ndarray/512");
if trueno_time.is_none() || ndarray_time.is_none() {
eprintln!(
"FALSIFY-CGP-091: Could not parse both times. trueno={:?} ndarray={:?}",
trueno_time, ndarray_time
);
return;
}
let trueno_ms = trueno_time.unwrap();
let ndarray_ms = ndarray_time.unwrap();
let ratio = ndarray_ms / trueno_ms;
eprintln!(
"FALSIFY-CGP-091: trueno={:.3}ms ndarray={:.3}ms ratio={:.2}x",
trueno_ms, ndarray_ms, ratio
);
assert!(
ratio >= 0.9,
"FALSIFY-CGP-091 FAILED: trueno {trueno_ms:.3}ms vs ndarray {ndarray_ms:.3}ms = {ratio:.2}x (need >= 0.9x)"
);
}
#[test]
fn falsify_cgp_090_trueno_gemm_at_peak() {
let output = cgp_cmd()
.args([
"profile",
"compare",
"--kernel",
"gemm",
"--size",
"1024",
"--backends",
"avx512",
])
.output()
.expect("Failed to run cgp profile compare");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
if stdout.contains("M") {
for line in stdout.lines() {
if line.contains("avx512") && line.contains("M") {
let parts: Vec<&str> = line.split_whitespace().collect();
for (i, p) in parts.iter().enumerate() {
if let Ok(tflops) = p.parse::<f64>() {
if tflops > 0.01 && i > 1 {
let gflops = tflops * 1000.0;
eprintln!("FALSIFY-CGP-090: Measured GEMM 1024 = {:.0} GFLOPS", gflops);
assert!(
gflops > 100.0,
"FALSIFY-CGP-090: GEMM 1024 {gflops:.0} GFLOPS must be > 100"
);
return;
}
}
}
}
}
}
eprintln!("FALSIFY-CGP-090: No measured data available, test inconclusive");
}
#[test]
fn falsify_cgp_quant_all_001_summary() {
let output = cgp_cmd()
.args(["profile", "quant", "--all"])
.output()
.expect("Failed to run cgp profile quant --all");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(
stdout.contains("Quant Sweep"),
"FALSIFY-CGP-QUANT-ALL-001: Must show sweep header.\nOutput:\n{stdout}"
);
assert!(
stdout.contains("Summary"),
"FALSIFY-CGP-QUANT-ALL-001: Must show summary.\nOutput:\n{stdout}"
);
assert!(
stdout.contains("ffn_up"),
"FALSIFY-CGP-QUANT-ALL-001: Must show ffn_up layer.\nOutput:\n{stdout}"
);
}
fn parse_criterion_time(output: &str, bench_name: &str) -> Option<f64> {
for line in output.lines() {
if line.contains(bench_name) && line.contains("time:") {
let bracket_content = line.split('[').nth(1)?.split(']').next()?;
let parts: Vec<&str> = bracket_content.split_whitespace().collect();
if parts.len() >= 4 {
let mean_str = parts[2]; let unit = parts[3]; let mut val: f64 = mean_str.parse().ok()?;
if unit == "µs" || unit == "us" {
val /= 1000.0; }
return Some(val);
}
}
}
None
}
#[test]
fn falsify_cgp_contract_001_self_verify() {
let output = cgp_cmd()
.args(["contract", "verify", "--self-verify"])
.output()
.expect("Failed to run cgp contract verify --self");
assert!(
output.status.success(),
"cgp contract verify --self must succeed"
);
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(
!stdout.contains("FAIL"),
"FALSIFY-CGP-CONTRACT-001: Self-verify must not have FAILures.\nOutput:\n{stdout}"
);
}
#[test]
fn falsify_cgp_contract_002_contracts_dir() {
let output = cgp_cmd()
.args([
"contract",
"verify",
"--contracts-dir",
"../../contracts/cgp/",
])
.output()
.expect("Failed to run cgp contract verify");
assert!(
output.status.success(),
"cgp contract verify --contracts-dir must succeed"
);
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(
stdout.contains("Total:"),
"FALSIFY-CGP-CONTRACT-002: Must show Total verification summary.\nOutput:\n{stdout}"
);
assert!(
stdout.contains("PASS"),
"FALSIFY-CGP-CONTRACT-002: Must have at least 1 PASS.\nOutput:\n{stdout}"
);
}