#[cfg(feature = "cuda")]
#[test]
fn f001_brick_trait_implemented() {
let _ = RmsNormBrick::new(vec![1.0; 64], 1e-5);
let _ = QkvBrick::new(64, 64, 64, 64);
let _ = AttentionBrick::new(8, 2, 64);
let _ = FlashAttentionBrick::new(8, 2, 64);
let _ = FfnBrick::new(64, 256);
let _ = FusedFfnBrick::new(64, 256);
let _ = RopeBrick::new(64, 8, 10000.0, 0);
let _ = OProjBrick::new(512, 64);
let _ = ActivationQuantBrick::new(64);
}
#[test]
#[cfg(feature = "cuda")]
fn f002_brick_assertions_nonempty() {
assert!(!RmsNormBrick::new(vec![1.0; 64], 1e-5)
.assertions()
.is_empty());
assert!(!QkvBrick::new(64, 64, 64, 64).assertions().is_empty());
assert!(!AttentionBrick::new(8, 2, 64).assertions().is_empty());
assert!(!FlashAttentionBrick::new(8, 2, 64).assertions().is_empty());
assert!(!FfnBrick::new(64, 256).assertions().is_empty());
assert!(!FusedFfnBrick::new(64, 256).assertions().is_empty());
assert!(!RopeBrick::new(64, 8, 10000.0, 0).assertions().is_empty());
assert!(!OProjBrick::new(512, 64).assertions().is_empty());
assert!(!ActivationQuantBrick::new(64).assertions().is_empty());
}
#[test]
#[cfg(feature = "cuda")]
fn f004_budget_nonzero() {
assert!(RmsNormBrick::new(vec![1.0; 64], 1e-5).budget().us_per_token > 0.0);
assert!(QkvBrick::new(64, 64, 64, 64).budget().us_per_token > 0.0);
assert!(AttentionBrick::new(8, 2, 64).budget().us_per_token > 0.0);
assert!(FlashAttentionBrick::new(8, 2, 64).budget().us_per_token > 0.0);
assert!(FfnBrick::new(64, 256).budget().us_per_token > 0.0);
assert!(FusedFfnBrick::new(64, 256).budget().us_per_token > 0.0);
assert!(ActivationQuantBrick::new(64).budget().us_per_token > 0.0);
}
#[test]
#[cfg(feature = "cuda")]
fn f005_brick_names_unique() {
let names = [
RmsNormBrick::new(vec![1.0; 64], 1e-5).name(),
QkvBrick::new(64, 64, 64, 64).name(),
AttentionBrick::new(8, 2, 64).name(),
FlashAttentionBrick::new(8, 2, 64).name(),
FfnBrick::new(64, 256).name(),
FusedFfnBrick::new(64, 256).name(),
RopeBrick::new(64, 8, 10000.0, 0).name(),
OProjBrick::new(512, 64).name(),
ActivationQuantBrick::new(64).name(),
];
let unique: std::collections::HashSet<_> = names.iter().collect();
assert_eq!(names.len(), unique.len());
}
#[test]
fn f008_token_result_consistent() {
let budget = TokenBudget::from_latency(100.0);
let result: TokenResult<Vec<f32>> = TokenResult::new(vec![], 10, 500.0, &budget);
assert_eq!(result.tokens_processed, 10);
assert!((result.us_per_token - 50.0).abs() < 0.001);
assert!((result.tokens_per_sec - 20000.0).abs() < 1.0);
assert!(result.budget_met); }
#[test]
fn f010_bottleneck_identification() {
let timing = LayerTiming {
attn_norm_us: 1.2,
qkv_us: 8.5,
rope_us: 0.8,
attention_us: 12.3, o_proj_us: 4.1,
ffn_norm_us: 1.2,
ffn_us: 15.8, total_us: 43.9,
};
let (name, us) = timing.bottleneck();
assert_eq!(name, "ffn");
assert!((us - 15.8).abs() < 0.001);
}
#[test]
fn f021_budget_math_consistent() {
let from_latency = TokenBudget::from_latency(50.0);
let from_throughput = TokenBudget::from_throughput(20000.0);
assert!((from_latency.tokens_per_sec - 20000.0).abs() < 1.0);
assert!((from_throughput.us_per_token - 50.0).abs() < 0.001);
}
#[test]
fn f022_budget_enforcement() {
let budget = TokenBudget::from_latency(10.0);
assert!(budget.is_met(5.0)); assert!(budget.is_met(10.0)); assert!(!budget.is_met(15.0));
assert!(budget.gap_factor(5.0) < 1.0);
assert!((budget.gap_factor(10.0) - 1.0).abs() < 0.001);
assert!(budget.gap_factor(15.0) > 1.0);
}
#[test]
fn f049_nan_assertion() {
let assertion = BrickAssertion::no_nan();
assert!(assertion.check_f32(&[1.0, 2.0, 3.0], true).is_ok());
assert!(assertion.check_f32(&[1.0, f32::NAN, 3.0], true).is_err());
}
#[test]
fn rmsnorm_brick_runs() {
let brick =
RmsNormBrick::new(vec![1.0; 4], 1e-5).with_budget(TokenBudget::from_latency(1000.0)); let input = vec![1.0, 2.0, 3.0, 4.0];
let result = brick.run(&input).expect("should run");
assert_eq!(result.output.len(), 4);
assert!(!result.output.iter().any(|x| x.is_nan()));
}
#[test]
fn f003_verify_methods_callable() {
let brick = RmsNormBrick::new(vec![1.0; 4], 1e-5);
let _name = brick.name();
let _budget = brick.budget();
let _assertions = brick.assertions();
let _verification = brick.verify();
let _can_run = brick.can_run();
}
#[test]
fn f006_budget_values_realistic() {
let bricks: Vec<Box<dyn ComputeBrick<Output = Vec<f32>>>> =
vec![Box::new(RmsNormBrick::new(vec![1.0; 896], 1e-5))];
for brick in &bricks {
let budget = brick.budget();
assert!(
budget.us_per_token > 0.0,
"Budget must be > 0, got {}",
budget.us_per_token
);
assert!(
budget.us_per_token < 1000.0,
"Budget must be < 1000µs, got {}",
budget.us_per_token
);
}
}
#[test]
fn f007_total_layer_budget_is_sum() {
let layer = TransformerLayerBrick::from_config(0, 896, 14, 2, 4864, 1e-5, 1000000.0, 2);
let total_budget_us = layer.total_budget_us();
let sum = layer.attn_norm.budget().us_per_token
+ layer.qkv.budget().us_per_token
+ layer.rope.budget().us_per_token
+ layer.attention.budget().us_per_token
+ layer.o_proj.budget().us_per_token
+ layer.ffn_norm.budget().us_per_token
+ layer.ffn.budget().us_per_token;
assert!(
(total_budget_us - sum).abs() < 0.1,
"Total {} should equal sum {}",
total_budget_us,
sum
);
}
#[test]
fn f011_timing_strictly_positive() {
let dim = 16384;
let brick = RmsNormBrick::new(vec![1.0; dim], 1e-5)
.with_budget(TokenBudget::from_latency(100_000.0)); let input: Vec<f32> = (0..dim).map(|i| i as f32).collect();
let result = brick.run(&input).expect("should run");
if result.us_per_token > 0.0 {
assert!(
result.tokens_per_sec > 0.0,
"Throughput must be positive when timing is positive"
);
}
}
#[test]
fn f012_layer_timing_fields_match() {
let timing = LayerTiming::default();
let brick_timings = [
timing.attn_norm_us,
timing.qkv_us,
timing.rope_us,
timing.attention_us,
timing.o_proj_us,
timing.ffn_norm_us,
timing.ffn_us,
];
assert_eq!(brick_timings.len(), 7, "Must have 7 brick timing fields");
}
#[test]
fn f013_cv_calculation_correct() {
let samples = vec![10.0_f64, 10.0, 10.0];
let mean = samples.iter().sum::<f64>() / samples.len() as f64;
let variance =
samples.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / samples.len() as f64;
let stddev = variance.sqrt();
let cv = stddev / mean * 100.0;
assert!(cv.abs() < 0.001, "CV of identical values should be 0");
let samples = vec![5.0_f64, 10.0, 15.0];
let mean = samples.iter().sum::<f64>() / samples.len() as f64;
let variance =
samples.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / samples.len() as f64;
let stddev = variance.sqrt();
let cv = stddev / mean * 100.0;
assert!((cv - 40.82).abs() < 1.0, "CV should be ~40.8%, got {}", cv);
}
#[test]
fn f014_statistical_sample_size() {
let config = BenchmarkConfig::default();
assert!(
config.samples >= 100,
"Default samples should be >= 100, got {}",
config.samples
);
}
#[test]
fn f015_warmup_samples_discarded() {
let config = BenchmarkConfig::default();
assert!(
config.warmup > 0,
"Warmup should be > 0, got {}",
config.warmup
);
assert!(
config.warmup < config.samples,
"Warmup {} should be < samples {}",
config.warmup,
config.samples
);
}
#[test]
fn f017_assertions_checkable() {
let assertions = vec![
BrickAssertion::equiv_scalar(0.001),
BrickAssertion::no_nan(),
BrickAssertion::no_inf(),
BrickAssertion::bounds(-100.0, 100.0),
];
let test_data = &[1.0_f32, 2.0, 3.0];
for assertion in &assertions {
let result = assertion.check_f32(test_data, true);
assert!(result.is_ok(), "Assertion {} should pass", assertion.name);
}
}
#[test]
fn f018_brick_composition_valid() {
let layer = TransformerLayerBrick::from_config(0, 896, 14, 2, 4864, 1e-5, 1000000.0, 2);
assert!(layer.can_run());
assert!(!layer.assertions().is_empty());
assert!(!layer.attn_norm.budget().us_per_token.is_nan());
assert!(!layer.qkv.budget().us_per_token.is_nan());
assert!(!layer.rope.budget().us_per_token.is_nan());
assert!(!layer.attention.budget().us_per_token.is_nan());
assert!(!layer.o_proj.budget().us_per_token.is_nan());
assert!(!layer.ffn_norm.budget().us_per_token.is_nan());
assert!(!layer.ffn.budget().us_per_token.is_nan());
}
#[test]
fn f019_benchmark_report_valid() {
let brick = RmsNormBrick::new(vec![1.0; 1024], 1e-5)
.with_budget(TokenBudget::from_latency(100_000.0)); let input: Vec<f32> = (0..1024).map(|i| i as f32).collect();
let config = BenchmarkConfig {
warmup: 5,
samples: 50, max_cv: 1.0, };
let report = benchmark_brick(
&brick,
|| {
let start = std::time::Instant::now();
let _ = brick.run(&input);
start.elapsed().as_nanos() as f64 / 1000.0
},
&config,
);
assert!(!report.mean_us.is_nan(), "mean must not be NaN");
assert!(!report.std_us.is_nan(), "stddev must not be NaN");
if report.mean_us > 0.0 {
assert!(
!report.cv.is_nan() && !report.cv.is_infinite(),
"CV must be finite if mean > 0"
);
}
assert!(!report.p50_us.is_nan(), "p50 must not be NaN");
assert!(!report.p99_us.is_nan(), "p99 must not be NaN");
assert!(report.p50_us <= report.p99_us, "p50 <= p99");
}
#[test]
fn f050_flash_attention_flops() {
let brick = FlashAttentionBrick::new(8, 2, 64);
let seq_len = 512;
let expected = 4 * 8 * 64 * seq_len; assert_eq!(brick.flops(seq_len) as usize, expected);
}
#[test]
fn f051_flash_attention_memory() {
let brick = FlashAttentionBrick::new(8, 2, 64);
let seq_len = 512;
let (naive, flash) = brick.memory_bytes(seq_len);
assert!(flash < naive, "Flash attention should use less memory");
let reduction = naive as f64 / flash as f64;
assert!(reduction > 1.0, "Memory reduction should be > 1x");
}
#[test]
fn f052_flash_attention_tiles() {
let brick = FlashAttentionBrick::with_tile_size(8, 2, 64, 128);
assert_eq!(brick.num_tiles(512), 4); assert_eq!(brick.num_tiles(500), 4); assert_eq!(brick.num_tiles(129), 2); }
#[test]
fn f053_flash_attention_budget() {
let naive = AttentionBrick::new(8, 2, 64);
let flash = FlashAttentionBrick::new(8, 2, 64);
let speedup = naive.budget().us_per_token / flash.budget().us_per_token;
assert!(
speedup >= 2.0,
"Flash attention should be >= 2x faster, got {:.1}x",
speedup
);
}
#[test]
fn f054_flash_attention_assertions() {
let brick = FlashAttentionBrick::new(8, 2, 64);
let assertions = brick.assertions();
let has_online_softmax = assertions.iter().any(|a| a.name == "online_softmax");
let has_tiled_kv = assertions.iter().any(|a| a.name == "tiled_kv_access");
assert!(has_online_softmax, "Should have online_softmax assertion");
assert!(has_tiled_kv, "Should have tiled_kv_access assertion");
}
#[test]
#[cfg(feature = "cuda")]
fn f055_fused_ffn_flops() {
let brick = FusedFfnBrick::new(64, 256);
let expected = 6 * 64 * 256; assert_eq!(brick.flops() as usize, expected);
}