#![allow(clippy::disallowed_methods)]
#![allow(unused_imports)]
use aprender::autograd::Tensor;
use aprender::demo::Qwen2Config;
use aprender::models::Qwen2Model;
#[test]
fn j2_roofline_analysis() {
use std::time::Instant;
let sizes = [32, 64, 128];
let mut flops_per_byte = Vec::new();
for &n in &sizes {
let a = Tensor::ones(&[n, n]);
let b = Tensor::ones(&[n, n]);
let start = Instant::now();
let _ = a.matmul(&b);
let elapsed = start.elapsed().as_secs_f64();
let flops = 2.0 * (n as f64).powi(3);
let bytes = 3.0 * (n as f64).powi(2) * 4.0;
let intensity = flops / bytes;
flops_per_byte.push(intensity);
assert!(elapsed < 1.0, "J2: Matmul too slow at size {}", n);
}
assert!(flops_per_byte[0] > 0.0, "J2: Compute intensity positive");
}
#[test]
fn j7_operation_timing() {
use std::time::Instant;
let a = Tensor::ones(&[64, 64]);
let b = Tensor::ones(&[64, 64]);
let start = Instant::now();
let _ = a.matmul(&b);
let matmul_time = start.elapsed();
let start = Instant::now();
let _ = a.add(&b);
let add_time = start.elapsed();
assert!(
matmul_time >= add_time || add_time.as_nanos() < 1000,
"J7: Matmul >= Add time (or both very fast)"
);
}
#[test]
fn j8_memory_bandwidth() {
use std::time::Instant;
let sizes = [64, 128, 256];
let mut bandwidths = Vec::new();
for &n in &sizes {
let tensor = Tensor::ones(&[n, n]);
let start = Instant::now();
let data = tensor.data();
let _sum: f32 = data.iter().sum(); let elapsed = start.elapsed().as_secs_f64();
let bytes = (n * n * 4) as f64; let bandwidth = bytes / elapsed / 1e9;
bandwidths.push(bandwidth);
}
assert!(bandwidths[0] > 0.0, "J8: Memory bandwidth measurable");
}
#[test]
fn j9_cache_efficiency() {
use std::time::Instant;
let size = 1024;
let data: Vec<f32> = (0..size).map(|i| i as f32).collect();
let start = Instant::now();
let _sum1: f32 = data.iter().sum();
let seq_time = start.elapsed();
let start = Instant::now();
let stride = 64; let mut sum2 = 0.0f32;
for i in 0..stride {
for j in (i..size).step_by(stride) {
sum2 += data[j];
}
}
let strided_time = start.elapsed();
assert!(seq_time.as_nanos() > 0, "J9: Sequential access measurable");
assert!(strided_time.as_nanos() > 0, "J9: Strided access measurable");
assert!(sum2.is_finite(), "J9: Strided sum is valid");
}
#[test]
fn j10_vectorization() {
use std::time::Instant;
let a = Tensor::ones(&[1024, 1024]);
let b = Tensor::ones(&[1024, 1024]);
let start = Instant::now();
let c = a.add(&b);
let _elapsed = start.elapsed();
assert!(
(c.data()[0] - 2.0).abs() < 1e-5,
"J10: Vectorized add correct"
);
}
#[test]
fn j14_call_graph_structure() {
let config = Qwen2Config {
hidden_size: 64,
num_attention_heads: 4,
num_kv_heads: 2,
num_layers: 2,
vocab_size: 100,
max_seq_len: 32,
intermediate_size: 128,
rope_theta: 10000.0,
};
let model = Qwen2Model::new(&config);
assert_eq!(
config.num_layers, 2,
"J14: Model has 2 layers (call hierarchy)"
);
let param_count = model.num_parameters();
assert!(
param_count > 0,
"J14: Model has parameters (call graph nodes)"
);
}
#[test]
fn j15_ci_fail_on_naive() {
#[derive(Debug)]
struct ProfileFlags {
fail_on_naive: bool,
naive_threshold_gflops: f32,
}
let flags = ProfileFlags {
fail_on_naive: true,
naive_threshold_gflops: 10.0,
};
let simulated_gflops = 5.0f32;
let is_naive = simulated_gflops < flags.naive_threshold_gflops;
assert!(
is_naive,
"J15: Naive detection works (5 GFLOPS < 10 threshold)"
);
let should_fail = flags.fail_on_naive && is_naive;
assert!(should_fail, "J15: CI would fail on naive detection");
}
#[test]
fn j16_energy_measurement() {
#[derive(Debug, Clone)]
struct EnergyReading {
joules: f64,
timestamp_ns: u64,
}
#[derive(Debug)]
struct EnergyProfile {
start: EnergyReading,
end: EnergyReading,
}
impl EnergyProfile {
fn joules_consumed(&self) -> f64 {
self.end.joules - self.start.joules
}
fn duration_secs(&self) -> f64 {
(self.end.timestamp_ns - self.start.timestamp_ns) as f64 / 1e9
}
fn watts(&self) -> f64 {
self.joules_consumed() / self.duration_secs()
}
}
let profile = EnergyProfile {
start: EnergyReading {
joules: 100.0,
timestamp_ns: 0,
},
end: EnergyReading {
joules: 110.0,
timestamp_ns: 1_000_000_000, },
};
assert!(
(profile.joules_consumed() - 10.0).abs() < 0.001,
"J16: Energy calculation works"
);
assert!(
(profile.watts() - 10.0).abs() < 0.001,
"J16: Power calculation works"
);
}
#[test]
fn j17_joules_per_token() {
let total_joules = 10.0f64;
let tokens_generated = 100u64;
let joules_per_token = total_joules / tokens_generated as f64;
assert!(
(joules_per_token - 0.1).abs() < 0.001,
"J17: J/token calculation (10J / 100 tokens = 0.1 J/tok)"
);
assert!(
joules_per_token > 0.01 && joules_per_token < 1.0,
"J17: J/token in reasonable range"
);
}
#[test]
fn j18_energy_graceful_degradation() {
#[derive(Debug)]
#[allow(dead_code)]
enum EnergyResult {
Available(f64),
Unavailable(String),
}
fn check_energy_support() -> EnergyResult {
#[cfg(target_os = "linux")]
{
EnergyResult::Unavailable("RAPL not available".to_string())
}
#[cfg(not(target_os = "linux"))]
{
EnergyResult::Unavailable("Energy profiling only supported on Linux".to_string())
}
}
let result = check_energy_support();
match result {
EnergyResult::Available(j) => assert!(j >= 0.0, "J18: Valid energy reading"),
EnergyResult::Unavailable(msg) => {
assert!(!msg.is_empty(), "J18: Graceful degradation with message")
}
}
}
#[test]
fn j19_json_energy_fields() {
let profile_with_energy = serde_json::json!({
"operation": "inference",
"duration_ms": 100.0,
"energy": {
"joules": 10.0,
"watts_avg": 100.0,
"joules_per_token": 0.1,
"co2_grams": 0.005 }
});
assert!(
profile_with_energy.get("energy").is_some(),
"J19: Energy object present"
);
let energy = profile_with_energy.get("energy").unwrap();
assert!(energy.get("joules").is_some(), "J19: Joules field present");
assert!(
energy.get("joules_per_token").is_some(),
"J19: J/token field present"
);
}
#[test]
fn j20_energy_reproducibility() {
let energy_readings = [10.0f64, 10.5, 9.8, 10.2, 9.9];
let mean: f64 = energy_readings.iter().sum::<f64>() / energy_readings.len() as f64;
let variance: f64 = energy_readings
.iter()
.map(|x| (x - mean).powi(2))
.sum::<f64>()
/ energy_readings.len() as f64;
let std_dev = variance.sqrt();
let cv = std_dev / mean;
assert!(
cv < 0.20,
"J20: Energy CV < 20% (actual: {:.2}%)",
cv * 100.0
);
}
#[test]
fn j21_performance_grade() {
#[derive(Debug, Clone, Copy)]
enum PerfGrade {
A, B, C, D, F, }
fn compute_grade(efficiency_percent: f32) -> PerfGrade {
match efficiency_percent {
e if e >= 80.0 => PerfGrade::A,
e if e >= 60.0 => PerfGrade::B,
e if e >= 40.0 => PerfGrade::C,
e if e >= 20.0 => PerfGrade::D,
_ => PerfGrade::F,
}
}
assert!(matches!(compute_grade(85.0), PerfGrade::A), "J21: A grade");
assert!(matches!(compute_grade(70.0), PerfGrade::B), "J21: B grade");
assert!(matches!(compute_grade(50.0), PerfGrade::C), "J21: C grade");
assert!(matches!(compute_grade(30.0), PerfGrade::D), "J21: D grade");
assert!(matches!(compute_grade(10.0), PerfGrade::F), "J21: F grade");
}
#[test]
fn j22_preallocation_detection() {
fn has_preallocation(code: &str) -> bool {
code.contains("with_capacity") || code.contains("reserve")
}
let good_code = "let mut v = Vec::with_capacity(1000);";
assert!(has_preallocation(good_code), "J22: Pre-allocation detected");
let bad_code = "let mut v = Vec::new(); for i in 0..1000 { v.push(i); }";
assert!(
!has_preallocation(bad_code),
"J22: Missing pre-allocation detected"
);
let sample_tensor_code = "Vec::with_capacity(hidden_size)";
assert!(
has_preallocation(sample_tensor_code),
"J22: Tensor code uses pre-allocation"
);
}
include!("includes/spec_checklist_j_profiling_include_01.rs");