1use crate::error::{CliError, Result};
63use crate::output;
64use colored::Colorize;
65use serde::{Deserialize, Serialize};
66use std::path::Path;
67use std::time::{Duration, Instant};
68
69#[derive(Debug, Clone)]
71pub struct QaConfig {
72 pub min_tps: f64,
74 pub min_speedup: f64,
76 pub min_gpu_speedup: f64,
78 pub skip_golden: bool,
80 pub skip_throughput: bool,
82 pub skip_ollama: bool,
84 pub skip_gpu_speedup: bool,
86 pub skip_contract: bool,
88 pub skip_format_parity: bool,
90 pub skip_ptx_parity: bool,
92 pub safetensors_path: Option<std::path::PathBuf>,
94 pub iterations: usize,
96 pub warmup: usize,
98 pub max_tokens: usize,
100 pub json: bool,
102 pub verbose: bool,
104 pub min_executed: Option<usize>,
106 pub previous_report: Option<std::path::PathBuf>,
108 pub regression_threshold: f64,
110 pub skip_gpu_state: bool,
112 pub skip_metadata: bool,
114}
115
116impl Default for QaConfig {
117 fn default() -> Self {
118 Self {
119 min_tps: 100.0, min_speedup: 0.2, min_gpu_speedup: 2.0, skip_golden: false,
123 skip_throughput: false,
124 skip_ollama: false,
125 skip_gpu_speedup: false,
126 skip_contract: false,
127 skip_format_parity: false,
128 skip_ptx_parity: false,
129 safetensors_path: None,
130 iterations: 10,
131 warmup: 3,
132 max_tokens: 32,
133 json: false,
134 verbose: false,
135 min_executed: None,
136 previous_report: None,
137 regression_threshold: 0.10,
138 skip_gpu_state: false,
139 skip_metadata: false,
140 }
141 }
142}
143
144#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct GateResult {
147 pub name: String,
149 pub passed: bool,
151 pub message: String,
153 #[serde(skip_serializing_if = "Option::is_none")]
155 pub value: Option<f64>,
156 #[serde(skip_serializing_if = "Option::is_none")]
158 pub threshold: Option<f64>,
159 pub duration_ms: u64,
161 pub skipped: bool,
163}
164
165impl GateResult {
166 fn passed(
167 name: &str,
168 message: &str,
169 value: Option<f64>,
170 threshold: Option<f64>,
171 duration: Duration,
172 ) -> Self {
173 Self {
174 name: name.to_string(),
175 passed: true,
176 message: message.to_string(),
177 value,
178 threshold,
179 duration_ms: duration.as_millis() as u64,
180 skipped: false,
181 }
182 }
183
184 fn failed(
185 name: &str,
186 message: &str,
187 value: Option<f64>,
188 threshold: Option<f64>,
189 duration: Duration,
190 ) -> Self {
191 Self {
192 name: name.to_string(),
193 passed: false,
194 message: message.to_string(),
195 value,
196 threshold,
197 duration_ms: duration.as_millis() as u64,
198 skipped: false,
199 }
200 }
201
202 fn skipped(name: &str, reason: &str) -> Self {
203 Self {
204 name: name.to_string(),
205 passed: true, message: format!("Skipped: {reason}"),
207 value: None,
208 threshold: None,
209 duration_ms: 0,
210 skipped: true,
211 }
212 }
213}
214
215#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct SystemInfo {
218 pub cpu_model: String,
220 #[serde(skip_serializing_if = "Option::is_none")]
222 pub gpu_model: Option<String>,
223 #[serde(skip_serializing_if = "Option::is_none")]
225 pub gpu_driver: Option<String>,
226}
227
228impl SystemInfo {
229 fn capture() -> Self {
230 let cpu_model = std::fs::read_to_string("/proc/cpuinfo")
231 .ok()
232 .and_then(|s| {
233 s.lines()
234 .find(|l| l.starts_with("model name"))
235 .and_then(|l| l.split(':').nth(1))
236 .map(|s| s.trim().to_string())
237 })
238 .unwrap_or_else(|| "unknown".to_string());
239
240 let (gpu_model, gpu_driver) = Self::detect_gpu();
241
242 Self {
243 cpu_model,
244 gpu_model,
245 gpu_driver,
246 }
247 }
248
249 fn detect_gpu() -> (Option<String>, Option<String>) {
250 let output = std::process::Command::new("nvidia-smi")
251 .args(["--query-gpu=name,driver_version", "--format=csv,noheader"])
252 .output()
253 .ok();
254 if let Some(out) = output {
255 if out.status.success() {
256 let text = String::from_utf8_lossy(&out.stdout);
257 let parts: Vec<&str> = text.trim().splitn(2, ',').collect();
258 return (
259 parts.first().map(|s| s.trim().to_string()),
260 parts.get(1).map(|s| s.trim().to_string()),
261 );
262 }
263 }
264 (None, None)
265 }
266}
267
268#[derive(Debug, Clone, Serialize, Deserialize)]
270pub struct QaReport {
271 pub model: String,
273 pub passed: bool,
275 pub gates: Vec<GateResult>,
277 #[serde(default)]
279 pub gates_executed: usize,
280 #[serde(default)]
282 pub gates_skipped: usize,
283 pub total_duration_ms: u64,
285 pub timestamp: String,
287 pub summary: String,
289 #[serde(default, skip_serializing_if = "Option::is_none")]
291 pub system_info: Option<SystemInfo>,
292}
293
294#[allow(clippy::too_many_arguments)]
296pub fn run(
297 path: &Path,
298 min_tps: Option<f64>,
299 min_speedup: Option<f64>,
300 min_gpu_speedup: Option<f64>,
301 skip_golden: bool,
302 skip_throughput: bool,
303 skip_ollama: bool,
304 skip_gpu_speedup: bool,
305 skip_contract: bool,
306 skip_format_parity: bool,
307 skip_ptx_parity: bool,
308 safetensors_path: Option<std::path::PathBuf>,
309 iterations: usize,
310 warmup: usize,
311 max_tokens: usize,
312 json: bool,
313 verbose: bool,
314 min_executed: Option<usize>,
315 previous_report: Option<std::path::PathBuf>,
316 regression_threshold: Option<f64>,
317 skip_gpu_state: bool,
318 skip_metadata: bool,
319) -> Result<()> {
320 let config = QaConfig {
321 min_tps: min_tps.unwrap_or(100.0),
322 min_speedup: min_speedup.unwrap_or(0.2), min_gpu_speedup: min_gpu_speedup.unwrap_or(2.0), skip_golden,
325 skip_throughput,
326 skip_ollama,
327 skip_gpu_speedup,
328 skip_contract,
329 skip_format_parity,
330 skip_ptx_parity,
331 safetensors_path,
332 iterations,
333 warmup,
334 max_tokens,
335 json,
336 verbose,
337 min_executed,
338 previous_report,
339 regression_threshold: regression_threshold.unwrap_or(0.10),
340 skip_gpu_state,
341 skip_metadata,
342 };
343
344 let report = run_qa(path, &config)?;
345
346 if json {
347 println!(
348 "{}",
349 serde_json::to_string_pretty(&report).unwrap_or_default()
350 );
351 }
352
353 if !report.passed {
354 return Err(CliError::ValidationFailed(report.summary));
355 }
356
357 Ok(())
358}
359
360fn dispatch_gate(
362 gates: &mut Vec<GateResult>,
363 json: bool,
364 skip: bool,
365 name: &str,
366 skip_reason: &str,
367 runner: impl FnOnce() -> Result<GateResult>,
368) -> Result<()> {
369 let result = if skip {
370 GateResult::skipped(name, skip_reason)
371 } else {
372 runner()?
373 };
374 if !json {
375 print_gate_result(&result);
376 }
377 gates.push(result);
378 Ok(())
379}
380
381fn gate_display_name(name: &str) -> &str {
384 match name {
385 "tensor_contract" => "Tensor Contract",
386 "golden_output" => "Golden Output",
387 "throughput" => "Throughput",
388 "ollama_parity" => "Ollama Parity",
389 "gpu_speedup" => "GPU Speedup",
390 "format_parity" => "Format Parity",
391 "ptx_parity" => "PTX Parity",
392 "gpu_state_isolation" => "GPU State Isolation",
393 "performance_regression" => "Perf Regression",
394 "metadata_plausibility" => "Metadata Plausibility",
395 other => other,
396 }
397}
398
399fn print_qa_summary(gates: &[GateResult], passed: bool, total_duration: Duration) {
401 output::header("QA Summary");
402
403 let gate_rows: Vec<Vec<String>> = gates
404 .iter()
405 .map(|g| {
406 let badge = if g.skipped {
407 output::badge_skip("SKIP")
408 } else if g.passed {
409 output::badge_pass("PASS")
410 } else {
411 output::badge_fail("FAIL")
412 };
413 let measured = g.value.map_or("—".to_string(), |v| format!("{v:.2}"));
414 let threshold = g.threshold.map_or("—".to_string(), |v| format!("{v:.2}"));
415 vec![
416 gate_display_name(&g.name).to_string(),
417 badge,
418 measured,
419 threshold,
420 output::duration_fmt(g.duration_ms),
421 ]
422 })
423 .collect();
424 println!(
425 "{}",
426 output::table(
427 &["Gate", "Status", "Measured", "Threshold", "Duration"],
428 &gate_rows,
429 )
430 );
431
432 println!();
433 if passed {
434 println!(" {}", output::badge_pass("ALL GATES PASSED"));
435 } else {
436 println!(" {}", output::badge_fail("GATES FAILED"));
437 for gate in gates.iter().filter(|g| !g.passed && !g.skipped) {
438 println!(" {} {}", "✗".red(), gate.name);
439 }
440 }
441 output::metric(
442 "Total Duration",
443 output::duration_fmt(total_duration.as_millis() as u64),
444 "",
445 );
446}
447
448fn is_gguf_format(path: &Path) -> bool {
450 #[cfg(feature = "inference")]
451 {
452 use realizar::format::{detect_format, ModelFormat};
453 let magic = std::fs::read(path).ok().and_then(|b| {
454 if b.len() >= 8 {
455 Some(b[..8].to_vec())
456 } else {
457 None
458 }
459 });
460 magic.and_then(|m| detect_format(&m).ok()) == Some(ModelFormat::Gguf)
461 }
462 #[cfg(not(feature = "inference"))]
463 {
464 let _ = path;
465 false
466 }
467}
468
469fn run_qa(path: &Path, config: &QaConfig) -> Result<QaReport> {
470 let start = Instant::now();
471 let mut gates = Vec::new();
472
473 if !config.json {
474 output::header("APR Quality Assurance");
475 let config_pairs = vec![
476 ("Model", path.display().to_string()),
477 ("Min TPS", format!("{:.0} tok/s", config.min_tps)),
478 ("Min Speedup", format!("{:.1}x Ollama", config.min_speedup)),
479 ];
480 println!("{}", output::kv_table(&config_pairs));
481 }
482
483 dispatch_gate(
484 &mut gates,
485 config.json,
486 config.skip_contract,
487 "tensor_contract",
488 "Skipped by --skip-contract",
489 || run_tensor_contract_gate(path, config),
490 )?;
491 dispatch_gate(
492 &mut gates,
493 config.json,
494 config.skip_metadata,
495 "metadata_plausibility",
496 "Skipped by --skip-metadata",
497 || run_metadata_plausibility_gate(path, config),
498 )?;
499 dispatch_gate(
500 &mut gates,
501 config.json,
502 config.skip_golden,
503 "golden_output",
504 "Skipped by --skip-golden",
505 || run_golden_output_gate(path, config),
506 )?;
507 dispatch_gate(
508 &mut gates,
509 config.json,
510 config.skip_throughput,
511 "throughput",
512 "Skipped by --skip-throughput",
513 || run_throughput_gate(path, config),
514 )?;
515 dispatch_gate(
516 &mut gates,
517 config.json,
518 config.skip_ollama,
519 "ollama_parity",
520 "Skipped by --skip-ollama",
521 || {
522 if is_gguf_format(path) {
523 run_ollama_parity_gate(path, config)
524 } else {
525 Ok(GateResult::skipped(
526 "ollama_parity",
527 "Non-GGUF format (F32/F16 lacks fused kernels for Ollama parity)",
528 ))
529 }
530 },
531 )?;
532 dispatch_gate(
533 &mut gates,
534 config.json,
535 config.skip_gpu_speedup,
536 "gpu_speedup",
537 "Skipped by --skip-gpu-speedup",
538 || run_gpu_speedup_gate(path, config),
539 )?;
540
541 let skip_format = config.skip_format_parity || config.safetensors_path.is_none();
542 let format_skip_reason = if config.skip_format_parity {
543 "Skipped by --skip-format-parity"
544 } else {
545 "No --safetensors-path provided"
546 };
547 dispatch_gate(
548 &mut gates,
549 config.json,
550 skip_format,
551 "format_parity",
552 format_skip_reason,
553 || run_format_parity_gate(path, config),
554 )?;
555 dispatch_gate(
556 &mut gates,
557 config.json,
558 config.skip_ptx_parity,
559 "ptx_parity",
560 "Skipped by --skip-ptx-parity",
561 || run_ptx_parity_gate(path, config),
562 )?;
563 dispatch_gate(
564 &mut gates,
565 config.json,
566 config.skip_gpu_state,
567 "gpu_state_isolation",
568 "Skipped by --skip-gpu-state",
569 || run_gpu_state_isolation_gate(path, config),
570 )?;
571
572 let regression_result = if config.previous_report.is_none() {
576 GateResult::skipped("performance_regression", "No --previous-report provided")
577 } else {
578 run_performance_regression_gate(&gates, config)?
579 };
580 if !config.json {
581 print_gate_result(®ression_result);
582 }
583 gates.push(regression_result);
584
585 let total_duration = start.elapsed();
586 let gates_executed = gates.iter().filter(|g| !g.skipped).count();
587 let gates_skipped = gates.iter().filter(|g| g.skipped).count();
588
589 if !config.json && gates_skipped > gates_executed {
591 println!(
592 " {} {} of {} gates SKIPPED — QA not rigorous",
593 "WARN".yellow().bold(),
594 gates_skipped,
595 gates_skipped + gates_executed
596 );
597 }
598
599 let mut passed = gates.iter().all(|g| g.passed);
600
601 if let Some(min) = config.min_executed {
603 if gates_executed < min {
604 if !config.json {
605 println!(
606 " {} Only {} gates executed, minimum required: {}",
607 "FAIL".red().bold(),
608 gates_executed,
609 min,
610 );
611 }
612 passed = false;
613 }
614 }
615
616 let summary = if passed {
617 format!(
618 "All QA gates passed ({} executed, {} skipped)",
619 gates_executed, gates_skipped
620 )
621 } else {
622 let names: Vec<_> = gates
623 .iter()
624 .filter(|g| !g.passed && !g.skipped)
625 .map(|g| g.name.as_str())
626 .collect();
627 if names.is_empty() && !passed {
628 format!(
629 "Insufficient gate execution: {} < {} minimum",
630 gates_executed,
631 config.min_executed.unwrap_or(0)
632 )
633 } else {
634 format!("Failed gates: {}", names.join(", "))
635 }
636 };
637
638 if !config.json {
639 print_qa_summary(&gates, passed, total_duration);
640 }
641
642 Ok(QaReport {
643 model: path.display().to_string(),
644 passed,
645 gates,
646 gates_executed,
647 gates_skipped,
648 total_duration_ms: total_duration.as_millis() as u64,
649 timestamp: chrono::Utc::now().to_rfc3339(),
650 summary,
651 system_info: Some(SystemInfo::capture()),
652 })
653}
654
655fn run_tensor_contract_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
664 let start = Instant::now();
665
666 if !config.json && config.verbose {
667 println!(
668 "{}",
669 "Running tensor contract validation (PMAT-235)...".yellow()
670 );
671 }
672
673 let rosetta = aprender::format::rosetta::RosettaStone::new();
674 let report = match rosetta.validate(path) {
675 Ok(r) => r,
676 Err(e) => {
677 let duration = start.elapsed();
678 return Ok(GateResult::failed(
679 "tensor_contract",
680 &format!("Failed to validate: {e}"),
681 None,
682 None,
683 duration,
684 ));
685 }
686 };
687
688 let duration = start.elapsed();
689
690 let contract_failures: Vec<String> = report
692 .tensors
693 .iter()
694 .flat_map(|t| t.failures.iter().map(|f| format!("{}: {}", t.name, f)))
695 .collect();
696
697 if contract_failures.is_empty() {
698 Ok(GateResult::passed(
699 "tensor_contract",
700 &format!(
701 "{} tensors passed all PMAT-235 contract gates",
702 report.tensor_count
703 ),
704 Some(report.tensor_count as f64),
705 Some(0.0),
706 duration,
707 ))
708 } else {
709 let summary = if contract_failures.len() <= 3 {
710 contract_failures.join("; ")
711 } else {
712 format!(
713 "{}; ... and {} more",
714 contract_failures[..3].join("; "),
715 contract_failures.len() - 3
716 )
717 };
718 Ok(GateResult::failed(
719 "tensor_contract",
720 &format!(
721 "{} contract violations in {} tensors: {}",
722 contract_failures.len(),
723 report.failed_tensor_count,
724 summary
725 ),
726 Some(contract_failures.len() as f64),
727 Some(0.0),
728 duration,
729 ))
730 }
731}
732
733fn run_metadata_plausibility_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
741 let start = Instant::now();
742
743 if !config.json && config.verbose {
744 println!(
745 "{}",
746 "Running metadata plausibility validation (Bug 210)...".yellow()
747 );
748 }
749
750 let data = std::fs::read(path)
752 .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
753
754 if data.len() < 4 {
755 let duration = start.elapsed();
756 return Ok(GateResult::failed(
757 "metadata_plausibility",
758 "File too small for metadata extraction",
759 None,
760 None,
761 duration,
762 ));
763 }
764
765 let (architecture, rope_theta, max_pos, rms_norm_eps) = extract_model_metadata(&data, path)?;
766
767 let mut violations: Vec<String> = Vec::new();
768 let mut checks_passed = 0usize;
769
770 check_rope_theta(architecture.as_deref(), rope_theta, &data, &mut violations, &mut checks_passed);
771 check_max_position_embeddings(max_pos, &mut violations, &mut checks_passed);
772 check_rms_norm_eps(rms_norm_eps, &mut violations, &mut checks_passed);
773 check_arch_theta_cross_validation(architecture.as_ref(), rope_theta, &mut violations, &mut checks_passed);
774
775 let duration = start.elapsed();
776
777 if violations.is_empty() {
778 Ok(GateResult::passed(
779 "metadata_plausibility",
780 &format!(
781 "{checks_passed} metadata checks passed (arch={}, rope_theta={}, max_pos={})",
782 architecture.as_deref().unwrap_or("unknown"),
783 rope_theta.map_or("none".to_string(), |t| format!("{t}")),
784 max_pos.map_or("none".to_string(), |p| format!("{p}")),
785 ),
786 Some(checks_passed as f64),
787 Some(0.0),
788 duration,
789 ))
790 } else {
791 Ok(GateResult::failed(
792 "metadata_plausibility",
793 &format!(
794 "{} metadata violation(s): {}",
795 violations.len(),
796 violations.join("; ")
797 ),
798 Some(violations.len() as f64),
799 Some(0.0),
800 duration,
801 ))
802 }
803}
804
805fn check_rope_theta(
807 arch: Option<&str>,
808 rope_theta: Option<f32>,
809 data: &[u8],
810 violations: &mut Vec<String>,
811 checks_passed: &mut usize,
812) {
813 if let Some(theta) = rope_theta {
814 let theta_f64 = f64::from(theta);
815 match arch {
816 Some("qwen2" | "qwen2.5" | "qwen") => {
817 if theta_f64 < 100_000.0 {
818 violations.push(format!(
819 "rope_theta={theta} for qwen2 — expected ~1000000.0 \
820 (100x too low, will produce garbage)"
821 ));
822 } else {
823 *checks_passed += 1;
824 }
825 }
826 Some("llama" | "llama2" | "llama3") => {
827 if (1000.0..=10_000_000.0).contains(&theta_f64) {
828 *checks_passed += 1;
829 } else {
830 violations.push(format!(
831 "rope_theta={theta} for llama — expected 10000-500000"
832 ));
833 }
834 }
835 _ => {
836 if (100.0..=100_000_000.0).contains(&theta_f64) {
837 *checks_passed += 1;
838 } else {
839 violations.push(format!(
840 "rope_theta={theta} outside plausible range [100, 100M]"
841 ));
842 }
843 }
844 }
845 } else {
846 let magic = &data[0..4];
847 if magic == b"GGUF" {
848 *checks_passed += 1;
849 } else {
850 violations.push("rope_theta missing from APR metadata".to_string());
851 }
852 }
853}
854
855fn check_max_position_embeddings(
857 max_pos: Option<usize>,
858 violations: &mut Vec<String>,
859 checks_passed: &mut usize,
860) {
861 if let Some(val) = max_pos {
862 if (128..=1_048_576).contains(&val) {
863 *checks_passed += 1;
864 } else {
865 violations.push(format!(
866 "max_position_embeddings={val} outside plausible range [128, 1M]"
867 ));
868 }
869 } else {
870 *checks_passed += 1;
871 }
872}
873
874fn check_rms_norm_eps(
876 rms_norm_eps: Option<f32>,
877 violations: &mut Vec<String>,
878 checks_passed: &mut usize,
879) {
880 if let Some(eps) = rms_norm_eps {
881 let eps_f64 = f64::from(eps);
882 if eps_f64 <= 0.0 || eps_f64 > 0.01 {
883 violations.push(format!(
884 "rms_norm_eps={eps} outside plausible range (0, 0.01]"
885 ));
886 } else {
887 *checks_passed += 1;
888 }
889 } else {
890 *checks_passed += 1;
891 }
892}
893
894fn check_arch_theta_cross_validation(
896 architecture: Option<&String>,
897 rope_theta: Option<f32>,
898 violations: &mut Vec<String>,
899 checks_passed: &mut usize,
900) {
901 if let (Some(arch), Some(theta)) = (architecture, rope_theta) {
902 let theta_f64 = f64::from(theta);
903 let suspicious = matches!(arch.as_str(), "qwen2" | "qwen2.5" | "qwen")
904 && (theta_f64 - 10000.0).abs() < 1.0;
905 if suspicious {
906 violations.push(format!(
907 "CRITICAL: {arch} with rope_theta=10000.0 — \
908 likely missing config.json (Bug 210)"
909 ));
910 } else {
911 *checks_passed += 1;
912 }
913 } else {
914 *checks_passed += 1;
915 }
916}
917
918type ModelMetadata = (Option<String>, Option<f32>, Option<usize>, Option<f32>);
920
921fn extract_model_metadata(
923 data: &[u8],
924 path: &Path,
925) -> Result<ModelMetadata> {
926 let magic = &data[0..4];
927
928 if magic == b"GGUF" {
929 let reader = aprender::format::gguf::reader::GgufReader::from_bytes(data.to_vec())
931 .map_err(|e| CliError::ValidationFailed(format!("GGUF parse failed: {e}")))?;
932 let arch = reader.architecture();
933 let rope_theta = reader.rope_theta();
934 let max_pos = reader.context_length();
935 let rms_norm_eps = reader.rms_norm_eps();
936 Ok((arch, rope_theta, max_pos, rms_norm_eps))
937 } else if &magic[0..3] == b"APR" || magic == b"APRN" {
938 use aprender::format::v2::AprV2Reader;
940 let reader = AprV2Reader::from_bytes(data)
941 .map_err(|e| CliError::ValidationFailed(format!("APR parse failed: {e}")))?;
942 let meta = reader.metadata();
943 let _ = path;
944 Ok((
945 meta.architecture.clone(),
946 meta.rope_theta,
947 meta.max_position_embeddings,
948 meta.rms_norm_eps,
949 ))
950 } else {
951 let config_path = path.with_file_name("config.json");
953 if config_path.exists() {
954 let config_str = std::fs::read_to_string(&config_path)
956 .map_err(|e| CliError::ValidationFailed(format!("config.json read failed: {e}")))?;
957 let arch = extract_json_string(&config_str, "model_type");
958 let rope_theta = extract_json_f32(&config_str, "rope_theta");
959 let max_pos = extract_json_usize(&config_str, "max_position_embeddings");
960 let rms_norm_eps = extract_json_f32(&config_str, "rms_norm_eps");
961 Ok((arch, rope_theta, max_pos, rms_norm_eps))
962 } else {
963 Ok((None, None, None, None))
965 }
966 }
967}
968
969fn extract_json_string(json: &str, key: &str) -> Option<String> {
971 let pattern = format!("\"{key}\"");
972 let idx = json.find(&pattern)?;
973 let after_key = &json[idx + pattern.len()..];
974 let after_colon = after_key.find(':').map(|i| &after_key[i + 1..])?;
976 let trimmed = after_colon.trim_start();
977 if trimmed.starts_with('"') {
978 let start = 1;
979 let end = trimmed[start..].find('"')?;
980 Some(trimmed[start..start + end].to_string())
981 } else {
982 None
983 }
984}
985
986fn extract_json_f32(json: &str, key: &str) -> Option<f32> {
988 let pattern = format!("\"{key}\"");
989 let idx = json.find(&pattern)?;
990 let after_key = &json[idx + pattern.len()..];
991 let after_colon = after_key.find(':').map(|i| &after_key[i + 1..])?;
992 let trimmed = after_colon.trim_start();
993 let end = trimmed.find([',', '}', '\n'])?;
995 trimmed[..end].trim().parse::<f32>().ok()
996}
997
998fn extract_json_usize(json: &str, key: &str) -> Option<usize> {
1000 let pattern = format!("\"{key}\"");
1001 let idx = json.find(&pattern)?;
1002 let after_key = &json[idx + pattern.len()..];
1003 let after_colon = after_key.find(':').map(|i| &after_key[i + 1..])?;
1004 let trimmed = after_colon.trim_start();
1005 let end = trimmed.find([',', '}', '\n'])?;
1006 trimmed[..end].trim().parse::<usize>().ok()
1007}
1008
1009#[derive(Debug, Clone)]
1011pub enum OutputVerification {
1012 Pass,
1014 Fail {
1016 reason: String,
1018 },
1019}
1020
1021pub fn verify_output(
1030 output: &str,
1031 test_id: &str,
1032 expected_patterns: &[&str],
1033) -> OutputVerification {
1034 if output.trim().is_empty() {
1036 return OutputVerification::Fail {
1037 reason: format!("{test_id}: Empty output"),
1038 };
1039 }
1040
1041 let garbage_patterns = ["\u{FFFD}", "[UNK]", "akunji", "olumbia"];
1043 for pattern in &garbage_patterns {
1044 if output.contains(pattern) {
1045 return OutputVerification::Fail {
1046 reason: format!("{test_id}: Garbage detected: '{pattern}'"),
1047 };
1048 }
1049 }
1050
1051 let null_count = output.bytes().filter(|&b| b == 0).count();
1053 if null_count > 0 {
1054 return OutputVerification::Fail {
1055 reason: format!("{test_id}: {null_count} null bytes detected (BPE artifact)"),
1056 };
1057 }
1058
1059 if !expected_patterns.is_empty() {
1061 let found = expected_patterns
1062 .iter()
1063 .any(|p| output.to_lowercase().contains(&p.to_lowercase()));
1064 if !found {
1065 return OutputVerification::Fail {
1066 reason: format!(
1067 "{test_id}: Expected one of {:?}, got: '{}'",
1068 expected_patterns,
1069 output.chars().take(100).collect::<String>()
1070 ),
1071 };
1072 }
1073 }
1074
1075 OutputVerification::Pass
1076}
1077
1078#[cfg(all(feature = "inference", feature = "cuda"))]
1083fn validate_gpu_golden_output(
1084 mapped: &realizar::gguf::MappedGGUFModel,
1085 prompt_tokens: &[u32],
1086 gen_config: &realizar::gguf::QuantizedGenerateConfig,
1087 gguf: &realizar::gguf::GGUFModel,
1088 expected_patterns: &[&str],
1089 config: &QaConfig,
1090) -> Result<Option<String>> {
1091 use realizar::gguf::{OwnedQuantizedModel, OwnedQuantizedModelCuda};
1092 let model = OwnedQuantizedModel::from_mapped(mapped)
1093 .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
1094 match OwnedQuantizedModelCuda::new(model, 0) {
1095 Ok(mut cuda_model) => match cuda_model.generate_gpu_resident(prompt_tokens, gen_config) {
1096 Ok(gpu_tokens) => {
1097 let gpu_text = gguf.decode(&gpu_tokens);
1098 if let OutputVerification::Fail { reason } =
1099 verify_output(&gpu_text, "golden_output_gpu", expected_patterns)
1100 {
1101 return Ok(Some(format!("GPU output failed (CPU passed): {reason}")));
1102 }
1103 }
1104 Err(e) => {
1105 if !config.json && config.verbose {
1106 println!("{}", format!("GPU golden output skipped: {e}").yellow());
1107 }
1108 }
1109 },
1110 Err(e) => {
1111 if !config.json && config.verbose {
1112 println!("{}", format!("CUDA init skipped: {e}").yellow());
1113 }
1114 }
1115 }
1116 Ok(None)
1117}
1118
1119#[cfg(feature = "inference")]
1121fn golden_output_apr(path: &Path, prompt: &str, max_tokens: usize) -> Result<(Vec<u32>, String)> {
1122 use realizar::apr::AprV2Model;
1123 use realizar::apr_transformer::{AprTransformer, GenerateConfig};
1124
1125 let apr_model = AprV2Model::load(path)
1126 .map_err(|e| CliError::ValidationFailed(format!("Failed to load APR: {e}")))?;
1127 let tokenizer = apr_model
1128 .load_embedded_bpe_tokenizer()
1129 .ok_or_else(|| CliError::ValidationFailed("APR missing embedded tokenizer".to_string()))?;
1130 let transformer = AprTransformer::from_apr_file(path)
1131 .map_err(|e| CliError::ValidationFailed(format!("Failed to load APR transformer: {e}")))?;
1132
1133 let prompt_tokens = tokenizer.encode(prompt);
1134 let gen_config = GenerateConfig {
1135 max_tokens,
1136 temperature: 0.0,
1137 top_k: 1,
1138 ..Default::default()
1139 };
1140
1141 let tokens = transformer
1142 .generate_with_cache(&prompt_tokens, &gen_config)
1143 .map_err(|e| CliError::ValidationFailed(format!("Generation failed: {e}")))?;
1144 let text = tokenizer.decode(&tokens);
1145 Ok((tokens, text))
1146}
1147
1148#[cfg(feature = "inference")]
1150fn golden_output_safetensors(
1151 path: &Path,
1152 prompt: &str,
1153 max_tokens: usize,
1154) -> Result<Option<(Vec<u32>, String)>> {
1155 use aprender::text::bpe::{load_from_json, BpeTokenizer};
1156 use realizar::safetensors_infer::SafetensorsToAprConverter;
1157
1158 let tokenizer_path = realizar::safetensors::find_sibling_file(path, "tokenizer.json");
1159 let tokenizer: Option<BpeTokenizer> = tokenizer_path
1160 .as_ref()
1161 .and_then(|p| std::fs::read_to_string(p).ok())
1162 .and_then(|json| load_from_json(&json).ok());
1163
1164 let Some(tokenizer) = tokenizer else {
1165 return Ok(None);
1166 };
1167
1168 let transformer = SafetensorsToAprConverter::convert(path)
1169 .map_err(|e| CliError::ValidationFailed(format!("SafeTensors convert failed: {e}")))?;
1170
1171 let prompt_tokens = tokenizer.encode(prompt);
1172 let gen_config = realizar::apr_transformer::GenerateConfig {
1173 max_tokens,
1174 temperature: 0.0,
1175 top_k: 1,
1176 ..Default::default()
1177 };
1178
1179 let tokens = transformer
1180 .generate_with_cache(&prompt_tokens, &gen_config)
1181 .map_err(|e| CliError::ValidationFailed(format!("Generation failed: {e}")))?;
1182 let text = tokenizer.decode(&tokens);
1183 Ok(Some((tokens, text)))
1184}
1185
1186#[cfg(feature = "inference")]
1188fn golden_output_gguf_cpu(
1189 mapped: &realizar::gguf::MappedGGUFModel,
1190 gguf: &realizar::gguf::GGUFModel,
1191 prompt: &str,
1192 max_tokens: usize,
1193) -> Result<(Vec<u32>, String)> {
1194 use realizar::gguf::{OwnedQuantizedModel, QuantizedGenerateConfig};
1195
1196 let prompt_tokens = gguf.encode(prompt).unwrap_or_else(|| vec![151643, 9707]);
1197 let gen_config = QuantizedGenerateConfig {
1198 max_tokens,
1199 temperature: 0.0,
1200 top_k: 1,
1201 ..Default::default()
1202 };
1203 let model = OwnedQuantizedModel::from_mapped(mapped)
1204 .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
1205 let tokens = model
1206 .generate_with_cache(&prompt_tokens, &gen_config)
1207 .map_err(|e| CliError::ValidationFailed(format!("CPU generation failed: {e}")))?;
1208 let text = gguf.decode(&tokens);
1209 Ok((tokens, text))
1210}
1211
1212fn golden_test_cases() -> Vec<(&'static str, Vec<&'static str>)> {
1218 vec![
1219 (
1220 "<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\n",
1221 vec!["4"],
1222 ),
1223 (
1224 "<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\n",
1225 vec!["Hello", "Hi", "hey", "hello", "!"],
1226 ),
1227 ]
1228}
1229
1230#[cfg(feature = "inference")]
1232fn generate_golden_for_format(
1233 path: &Path,
1234 prompt: &str,
1235 max_tokens: usize,
1236 format: realizar::format::ModelFormat,
1237 mapped: &realizar::gguf::MappedGGUFModel,
1238 gguf_model: &realizar::gguf::GGUFModel,
1239) -> Result<Option<(Vec<u32>, String)>> {
1240 use realizar::format::ModelFormat;
1241
1242 match format {
1243 ModelFormat::Gguf => Ok(Some(golden_output_gguf_cpu(
1244 mapped, gguf_model, prompt, max_tokens,
1245 )?)),
1246 ModelFormat::Apr => Ok(Some(golden_output_apr(path, prompt, max_tokens)?)),
1247 ModelFormat::SafeTensors => golden_output_safetensors(path, prompt, max_tokens),
1248 }
1249}
1250
1251#[cfg(feature = "inference")]
1255fn validate_golden_test_case(
1256 path: &Path,
1257 prompt: &str,
1258 expected_patterns: &[&str],
1259 config: &QaConfig,
1260 format: realizar::format::ModelFormat,
1261 mapped: &realizar::gguf::MappedGGUFModel,
1262 gguf_model: &realizar::gguf::GGUFModel,
1263 cuda_available: bool,
1264 start: Instant,
1265) -> Result<Option<GateResult>> {
1266 use realizar::format::ModelFormat;
1267
1268 let Some((_, output_text)) =
1269 generate_golden_for_format(path, prompt, config.max_tokens, format, mapped, gguf_model)?
1270 else {
1271 return Ok(Some(GateResult::skipped(
1272 "golden_output",
1273 "SafeTensors: tokenizer.json not found",
1274 )));
1275 };
1276
1277 #[cfg(feature = "cuda")]
1278 if cuda_available && format == ModelFormat::Gguf {
1279 use realizar::gguf::QuantizedGenerateConfig;
1280 let prompt_tokens = gguf_model
1281 .encode(prompt)
1282 .unwrap_or_else(|| vec![151643, 9707]);
1283 let gen_config = QuantizedGenerateConfig {
1284 max_tokens: config.max_tokens,
1285 temperature: 0.0,
1286 top_k: 1,
1287 ..Default::default()
1288 };
1289 if let Some(failure) = validate_gpu_golden_output(
1290 mapped,
1291 &prompt_tokens,
1292 &gen_config,
1293 gguf_model,
1294 expected_patterns,
1295 config,
1296 )? {
1297 return Ok(Some(GateResult::failed(
1298 "golden_output",
1299 &failure,
1300 None,
1301 None,
1302 start.elapsed(),
1303 )));
1304 }
1305 }
1306 #[cfg(not(feature = "cuda"))]
1307 let _ = cuda_available;
1308
1309 if let OutputVerification::Fail { reason } =
1310 verify_output(&output_text, "golden_output", expected_patterns)
1311 {
1312 return Ok(Some(GateResult::failed(
1313 "golden_output",
1314 &reason,
1315 None,
1316 None,
1317 start.elapsed(),
1318 )));
1319 }
1320
1321 Ok(None)
1322}
1323
1324fn run_golden_output_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
1325 let start = Instant::now();
1326
1327 if !config.json && config.verbose {
1328 println!("{}", "Running golden output test...".yellow());
1329 }
1330
1331 let test_cases = golden_test_cases();
1332
1333 #[cfg(feature = "inference")]
1334 {
1335 use realizar::cuda::CudaExecutor;
1336 use realizar::format::detect_format;
1337 use realizar::gguf::{GGUFModel, MappedGGUFModel};
1338
1339 let cuda_available = CudaExecutor::is_available() && CudaExecutor::num_devices() > 0;
1340 let model_bytes = std::fs::read(path)
1341 .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
1342 let format = detect_format(&model_bytes[..8.min(model_bytes.len())])
1343 .map_err(|e| CliError::ValidationFailed(format!("Failed to detect format: {e}")))?;
1344 let mapped = MappedGGUFModel::from_path(path)
1345 .map_err(|e| CliError::ValidationFailed(format!("Map failed: {e}")))?;
1346 let gguf_model = GGUFModel::from_bytes(&model_bytes)
1347 .map_err(|e| CliError::ValidationFailed(format!("Failed to parse GGUF: {e}")))?;
1348
1349 for (prompt, expected_patterns) in &test_cases {
1350 if let Some(result) = validate_golden_test_case(
1351 path,
1352 prompt,
1353 expected_patterns,
1354 config,
1355 format,
1356 &mapped,
1357 &gguf_model,
1358 cuda_available,
1359 start,
1360 )? {
1361 return Ok(result);
1362 }
1363 }
1364
1365 Ok(GateResult::passed(
1366 "golden_output",
1367 &format!("{} golden test cases passed", test_cases.len()),
1368 Some(test_cases.len() as f64),
1369 Some(test_cases.len() as f64),
1370 start.elapsed(),
1371 ))
1372 }
1373
1374 #[cfg(not(feature = "inference"))]
1375 {
1376 let _ = (path, config, test_cases);
1377 Ok(GateResult::skipped(
1378 "golden_output",
1379 "Requires 'inference' feature",
1380 ))
1381 }
1382}
1383
1384#[cfg(feature = "inference")]
1389fn measure_generate_throughput(
1390 warmup: usize,
1391 iterations: usize,
1392 prompt_len: usize,
1393 overall_start: Instant,
1394 mut generate_fn: impl FnMut() -> Vec<u32>,
1395) -> (f64, Duration) {
1396 for _ in 0..warmup {
1397 let _ = generate_fn();
1398 }
1399
1400 let mut total_tokens = 0usize;
1401 let measure_start = Instant::now();
1402 for _ in 0..iterations {
1403 let output = generate_fn();
1404 total_tokens += output.len().saturating_sub(prompt_len);
1405 }
1406 let measure_time = measure_start.elapsed();
1407 (
1408 total_tokens as f64 / measure_time.as_secs_f64(),
1409 overall_start.elapsed(),
1410 )
1411}
1412
1413#[cfg(feature = "inference")]
1415fn throughput_gguf(
1416 path: &Path,
1417 model_bytes: &[u8],
1418 config: &QaConfig,
1419 cuda_available: bool,
1420 start: Instant,
1421 prompt: &str,
1422) -> Result<(f64, Duration)> {
1423 use realizar::gguf::{
1424 GGUFModel, MappedGGUFModel, OwnedQuantizedModel, OwnedQuantizedModelCuda,
1425 QuantizedGenerateConfig,
1426 };
1427
1428 let gguf = GGUFModel::from_bytes(model_bytes)
1429 .map_err(|e| CliError::ValidationFailed(format!("Failed to parse GGUF: {e}")))?;
1430 let prompt_tokens = gguf.encode(prompt).unwrap_or_else(|| vec![151643, 9707]);
1431 let gen_config = QuantizedGenerateConfig {
1432 max_tokens: config.max_tokens,
1433 temperature: 0.0,
1434 top_k: 1,
1435 ..Default::default()
1436 };
1437
1438 let mapped = MappedGGUFModel::from_path(path)
1439 .map_err(|e| CliError::ValidationFailed(format!("Map failed: {e}")))?;
1440 let model = OwnedQuantizedModel::from_mapped(&mapped)
1441 .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
1442
1443 if cuda_available {
1444 let mut cuda_model = OwnedQuantizedModelCuda::new(model, 0)
1445 .map_err(|e| CliError::ValidationFailed(format!("CUDA init failed: {e}")))?;
1446 Ok(measure_generate_throughput(
1447 config.warmup,
1448 config.iterations,
1449 prompt_tokens.len(),
1450 start,
1451 || {
1452 cuda_model
1453 .generate_gpu_resident(&prompt_tokens, &gen_config)
1454 .unwrap_or_default()
1455 },
1456 ))
1457 } else {
1458 Ok(measure_generate_throughput(
1459 config.warmup,
1460 config.iterations,
1461 prompt_tokens.len(),
1462 start,
1463 || {
1464 model
1465 .generate_with_cache(&prompt_tokens, &gen_config)
1466 .unwrap_or_default()
1467 },
1468 ))
1469 }
1470}
1471
1472#[cfg(feature = "inference")]
1474fn throughput_apr(
1475 path: &Path,
1476 config: &QaConfig,
1477 start: Instant,
1478 prompt: &str,
1479) -> Result<(f64, Duration)> {
1480 use realizar::apr::AprV2Model;
1481 use realizar::apr_transformer::{AprTransformer, GenerateConfig};
1482
1483 let apr_model = AprV2Model::load(path)
1484 .map_err(|e| CliError::ValidationFailed(format!("Failed to load APR: {e}")))?;
1485 let tokenizer = apr_model
1486 .load_embedded_bpe_tokenizer()
1487 .ok_or_else(|| CliError::ValidationFailed("APR missing embedded tokenizer".to_string()))?;
1488 let transformer = AprTransformer::from_apr_file(path)
1489 .map_err(|e| CliError::ValidationFailed(format!("Failed to load APR transformer: {e}")))?;
1490
1491 let prompt_tokens = tokenizer.encode(prompt);
1492 let gen_config = GenerateConfig {
1493 max_tokens: config.max_tokens,
1494 temperature: 0.0,
1495 top_k: 1,
1496 ..Default::default()
1497 };
1498
1499 Ok(measure_generate_throughput(
1500 config.warmup,
1501 config.iterations,
1502 prompt_tokens.len(),
1503 start,
1504 || {
1505 transformer
1506 .generate_with_cache(&prompt_tokens, &gen_config)
1507 .unwrap_or_default()
1508 },
1509 ))
1510}
1511
1512#[cfg(feature = "inference")]
1514fn throughput_safetensors(
1515 path: &Path,
1516 config: &QaConfig,
1517 start: Instant,
1518 prompt: &str,
1519) -> Result<Option<(f64, Duration)>> {
1520 use aprender::text::bpe::{load_from_json, BpeTokenizer};
1521 use realizar::safetensors_infer::SafetensorsToAprConverter;
1522
1523 let tokenizer_path = realizar::safetensors::find_sibling_file(path, "tokenizer.json");
1524 let tokenizer: Option<BpeTokenizer> = tokenizer_path
1525 .as_ref()
1526 .and_then(|p| std::fs::read_to_string(p).ok())
1527 .and_then(|json| load_from_json(&json).ok());
1528
1529 let Some(tokenizer) = tokenizer else {
1530 return Ok(None);
1531 };
1532
1533 let transformer = SafetensorsToAprConverter::convert(path)
1534 .map_err(|e| CliError::ValidationFailed(format!("SafeTensors convert failed: {e}")))?;
1535
1536 let prompt_tokens = tokenizer.encode(prompt);
1537 let gen_config = realizar::apr_transformer::GenerateConfig {
1538 max_tokens: config.max_tokens,
1539 temperature: 0.0,
1540 top_k: 1,
1541 ..Default::default()
1542 };
1543
1544 Ok(Some(measure_generate_throughput(
1545 config.warmup,
1546 config.iterations,
1547 prompt_tokens.len(),
1548 start,
1549 || {
1550 transformer
1551 .generate_with_cache(&prompt_tokens, &gen_config)
1552 .unwrap_or_default()
1553 },
1554 )))
1555}
1556
1557#[cfg(feature = "inference")]
1559fn throughput_for_format(
1560 path: &Path,
1561 model_bytes: &[u8],
1562 format: realizar::format::ModelFormat,
1563 prompt: &str,
1564 config: &QaConfig,
1565 cuda_available: bool,
1566 start: Instant,
1567) -> Result<Option<(f64, Duration)>> {
1568 use realizar::format::ModelFormat;
1569
1570 match format {
1571 ModelFormat::Gguf => {
1572 throughput_gguf(path, model_bytes, config, cuda_available, start, prompt).map(Some)
1573 }
1574 ModelFormat::Apr => throughput_apr(path, config, start, prompt).map(Some),
1575 ModelFormat::SafeTensors => throughput_safetensors(path, config, start, prompt),
1576 }
1577}
1578
1579fn run_throughput_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
1584 let start = Instant::now();
1585
1586 if !config.json && config.verbose {
1587 println!("{}", "Running throughput benchmark...".yellow());
1588 }
1589
1590 #[cfg(feature = "inference")]
1591 {
1592 use realizar::cuda::CudaExecutor;
1593 use realizar::format::{detect_format, ModelFormat};
1594
1595 let cuda_available = CudaExecutor::is_available() && CudaExecutor::num_devices() > 0;
1596
1597 let model_bytes = std::fs::read(path)
1598 .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
1599
1600 let format = detect_format(&model_bytes[..8.min(model_bytes.len())])
1601 .map_err(|e| CliError::ValidationFailed(format!("Failed to detect format: {e}")))?;
1602
1603 let prompt = "Write a hello world program in Python:";
1604 let Some((tps, duration)) = throughput_for_format(
1605 path,
1606 &model_bytes,
1607 format,
1608 prompt,
1609 config,
1610 cuda_available,
1611 start,
1612 )?
1613 else {
1614 return Ok(GateResult::skipped(
1615 "throughput",
1616 "SafeTensors: tokenizer.json not found in model directory",
1617 ));
1618 };
1619
1620 let threshold = match format {
1623 ModelFormat::Gguf => 10.0_f64.max(config.min_tps / 10.0),
1624 ModelFormat::Apr | ModelFormat::SafeTensors => 1.0, };
1626
1627 if tps >= threshold {
1628 Ok(GateResult::passed(
1629 "throughput",
1630 &format!("{:.1} tok/s >= {:.0} tok/s threshold", tps, threshold),
1631 Some(tps),
1632 Some(threshold),
1633 duration,
1634 ))
1635 } else {
1636 Ok(GateResult::failed(
1637 "throughput",
1638 &format!("{:.1} tok/s < {:.0} tok/s threshold", tps, threshold),
1639 Some(tps),
1640 Some(threshold),
1641 duration,
1642 ))
1643 }
1644 }
1645
1646 #[cfg(not(feature = "inference"))]
1647 {
1648 let _ = (path, config);
1649 Ok(GateResult::skipped(
1650 "throughput",
1651 "Requires 'inference' feature",
1652 ))
1653 }
1654}
1655
1656#[cfg(feature = "inference")]
1661fn ollama_parity_grade(ratio: f64) -> &'static str {
1662 if ratio >= 2.0 {
1663 "A+"
1664 } else if ratio >= 1.5 {
1665 "A"
1666 } else if ratio >= 1.0 {
1667 "B"
1668 } else if ratio >= 0.75 {
1669 "C"
1670 } else if ratio >= 0.5 {
1671 "D"
1672 } else {
1673 "F"
1674 }
1675}
1676
1677#[cfg(feature = "inference")]
1687fn measure_our_gguf_tps(path: &Path, config: &QaConfig) -> Result<f64> {
1688 use realizar::gguf::{
1689 GGUFModel, MappedGGUFModel, OwnedQuantizedModel, OwnedQuantizedModelCuda,
1690 QuantizedGenerateConfig,
1691 };
1692
1693 let model_bytes = std::fs::read(path)
1694 .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
1695 let gguf = GGUFModel::from_bytes(&model_bytes)
1696 .map_err(|e| CliError::ValidationFailed(format!("Failed to parse GGUF: {e}")))?;
1697
1698 let prompt = "Write a function to check if a number is prime:";
1699 let prompt_tokens = gguf.encode(prompt).unwrap_or_else(|| vec![151643]);
1700 let parity_max_tokens = config.max_tokens.max(128);
1701 let gen_config = QuantizedGenerateConfig {
1702 max_tokens: parity_max_tokens,
1703 temperature: 0.0,
1704 top_k: 1,
1705 ..Default::default()
1706 };
1707
1708 let cuda_available = realizar::cuda::CudaExecutor::is_available()
1709 && realizar::cuda::CudaExecutor::num_devices() > 0;
1710
1711 let mapped = MappedGGUFModel::from_path(path)
1712 .map_err(|e| CliError::ValidationFailed(format!("Map failed: {e}")))?;
1713 let model = OwnedQuantizedModel::from_mapped(&mapped)
1714 .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
1715
1716 if cuda_available {
1717 let mut cuda_model = OwnedQuantizedModelCuda::new(model, 0)
1718 .map_err(|e| CliError::ValidationFailed(format!("CUDA init failed: {e}")))?;
1719 let (tps, _) = measure_generate_throughput(
1720 config.warmup,
1721 config.iterations,
1722 prompt_tokens.len(),
1723 Instant::now(),
1724 || {
1725 cuda_model
1726 .generate_gpu_resident(&prompt_tokens, &gen_config)
1727 .unwrap_or_default()
1728 },
1729 );
1730 Ok(tps)
1731 } else {
1732 let (tps, _) = measure_generate_throughput(
1733 config.warmup,
1734 config.iterations,
1735 prompt_tokens.len(),
1736 Instant::now(),
1737 || {
1738 model
1739 .generate_with_cache(&prompt_tokens, &gen_config)
1740 .unwrap_or_default()
1741 },
1742 );
1743 Ok(tps)
1744 }
1745}
1746
1747fn run_ollama_parity_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
1748 let start = Instant::now();
1749
1750 if !config.json && config.verbose {
1751 println!("{}", "Running Ollama parity test...".yellow());
1752 }
1753
1754 if !check_ollama_available() {
1755 return Ok(GateResult::skipped(
1756 "ollama_parity",
1757 "Ollama not available (start with: ollama serve)",
1758 ));
1759 }
1760
1761 #[cfg(feature = "inference")]
1762 {
1763 let ollama_tps = measure_ollama_throughput(path, config)?;
1764
1765 if ollama_tps <= 0.0 {
1766 return Ok(GateResult::skipped(
1767 "ollama_parity",
1768 "Could not measure Ollama throughput",
1769 ));
1770 }
1771
1772 let our_tps = measure_our_gguf_tps(path, config)?;
1773 let speedup = our_tps / ollama_tps;
1774 let grade = ollama_parity_grade(speedup);
1775 let duration = start.elapsed();
1776
1777 if speedup >= config.min_speedup {
1778 Ok(GateResult::passed(
1779 "ollama_parity",
1780 &format!(
1781 "{:.1}x Ollama ({:.0} vs {:.0} tok/s) Grade {grade} >= {:.1}x threshold",
1782 speedup, our_tps, ollama_tps, config.min_speedup
1783 ),
1784 Some(speedup),
1785 Some(config.min_speedup),
1786 duration,
1787 ))
1788 } else {
1789 Ok(GateResult::failed(
1790 "ollama_parity",
1791 &format!(
1792 "{:.2}x Ollama ({:.0} vs {:.0} tok/s) Grade {grade} < {:.1}x threshold",
1793 speedup, our_tps, ollama_tps, config.min_speedup
1794 ),
1795 Some(speedup),
1796 Some(config.min_speedup),
1797 duration,
1798 ))
1799 }
1800 }
1801
1802 #[cfg(not(feature = "inference"))]
1803 {
1804 let _ = (path, config);
1805 Ok(GateResult::skipped(
1806 "ollama_parity",
1807 "Requires 'inference' feature",
1808 ))
1809 }
1810}
1811
1812#[cfg(feature = "inference")]
1820fn measure_gpu_cpu_tps(path: &Path, config: &QaConfig) -> Result<(f64, f64)> {
1821 use realizar::gguf::{
1822 GGUFModel, MappedGGUFModel, OwnedQuantizedModel, OwnedQuantizedModelCuda,
1823 QuantizedGenerateConfig,
1824 };
1825
1826 let model_bytes = std::fs::read(path)
1827 .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
1828 let gguf = GGUFModel::from_bytes(&model_bytes)
1829 .map_err(|e| CliError::ValidationFailed(format!("Failed to parse GGUF: {e}")))?;
1830
1831 let prompt = "Write a function to calculate factorial:";
1832 let prompt_tokens = gguf.encode(prompt).unwrap_or_else(|| vec![151643]);
1833 let gen_config = QuantizedGenerateConfig {
1834 max_tokens: config.max_tokens,
1835 temperature: 0.0,
1836 top_k: 1,
1837 ..Default::default()
1838 };
1839
1840 let mapped = MappedGGUFModel::from_path(path)
1842 .map_err(|e| CliError::ValidationFailed(format!("Map failed: {e}")))?;
1843 let model = OwnedQuantizedModel::from_mapped(&mapped)
1844 .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
1845 let (cpu_tps, _) = measure_generate_throughput(
1846 config.warmup,
1847 config.iterations,
1848 prompt_tokens.len(),
1849 Instant::now(),
1850 || {
1851 model
1852 .generate_with_cache(&prompt_tokens, &gen_config)
1853 .unwrap_or_default()
1854 },
1855 );
1856
1857 let mapped2 = MappedGGUFModel::from_path(path)
1859 .map_err(|e| CliError::ValidationFailed(format!("Map failed: {e}")))?;
1860 let model2 = OwnedQuantizedModel::from_mapped(&mapped2)
1861 .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
1862 let mut cuda_model = OwnedQuantizedModelCuda::new(model2, 0)
1863 .map_err(|e| CliError::ValidationFailed(format!("CUDA init failed: {e}")))?;
1864 let (gpu_tps, _) = measure_generate_throughput(
1865 config.warmup,
1866 config.iterations,
1867 prompt_tokens.len(),
1868 Instant::now(),
1869 || {
1870 cuda_model
1871 .generate_gpu_resident(&prompt_tokens, &gen_config)
1872 .unwrap_or_default()
1873 },
1874 );
1875
1876 Ok((cpu_tps, gpu_tps))
1877}
1878
1879fn run_gpu_speedup_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
1880 let start = Instant::now();
1881
1882 if !config.json && config.verbose {
1883 println!("{}", "Running GPU vs CPU speedup test...".yellow());
1884 }
1885
1886 #[cfg(feature = "inference")]
1887 {
1888 use realizar::cuda::CudaExecutor;
1889 use realizar::format::{detect_format, ModelFormat};
1890
1891 let cuda_available = CudaExecutor::is_available() && CudaExecutor::num_devices() > 0;
1892 if !cuda_available {
1893 return Ok(GateResult::skipped(
1894 "gpu_speedup",
1895 "CUDA not available - cannot compare GPU vs CPU",
1896 ));
1897 }
1898
1899 let model_bytes = std::fs::read(path)
1900 .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
1901 let format = detect_format(&model_bytes[..8.min(model_bytes.len())])
1902 .map_err(|e| CliError::ValidationFailed(format!("Failed to detect format: {e}")))?;
1903 if format != ModelFormat::Gguf {
1904 return Ok(GateResult::skipped(
1905 "gpu_speedup",
1906 "Only GGUF format supported",
1907 ));
1908 }
1909
1910 let (cpu_tps, gpu_tps) = measure_gpu_cpu_tps(path, config)?;
1911 let duration = start.elapsed();
1912
1913 if cpu_tps <= 0.0 {
1914 return Ok(GateResult::failed(
1915 "gpu_speedup",
1916 "CPU throughput was zero - cannot calculate speedup",
1917 None,
1918 None,
1919 duration,
1920 ));
1921 }
1922
1923 let speedup = gpu_tps / cpu_tps;
1924
1925 if speedup >= config.min_gpu_speedup {
1926 Ok(GateResult::passed(
1927 "gpu_speedup",
1928 &format!(
1929 "GPU {:.1}x faster than CPU ({:.0} vs {:.0} tok/s) >= {:.1}x threshold",
1930 speedup, gpu_tps, cpu_tps, config.min_gpu_speedup
1931 ),
1932 Some(speedup),
1933 Some(config.min_gpu_speedup),
1934 duration,
1935 ))
1936 } else {
1937 Ok(GateResult::failed(
1938 "gpu_speedup",
1939 &format!(
1940 "GPU {:.2}x faster than CPU ({:.0} vs {:.0} tok/s) < {:.1}x threshold",
1941 speedup, gpu_tps, cpu_tps, config.min_gpu_speedup
1942 ),
1943 Some(speedup),
1944 Some(config.min_gpu_speedup),
1945 duration,
1946 ))
1947 }
1948 }
1949
1950 #[cfg(not(feature = "inference"))]
1951 {
1952 let _ = (path, config);
1953 Ok(GateResult::skipped(
1954 "gpu_speedup",
1955 "Requires 'inference' feature",
1956 ))
1957 }
1958}
1959
1960fn run_format_parity_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
1968 let start = Instant::now();
1969
1970 if !config.json && config.verbose {
1971 println!("{}", "Running cross-format parity test...".yellow());
1972 }
1973
1974 #[cfg(feature = "inference")]
1975 {
1976 use realizar::format::{detect_format, ModelFormat};
1977 use realizar::gguf::{GGUFModel, MappedGGUFModel, OwnedQuantizedModel};
1978 use realizar::safetensors_infer::SafetensorsToAprConverter;
1979
1980 let Some(safetensors_path) = &config.safetensors_path else {
1981 return Ok(GateResult::skipped(
1982 "format_parity",
1983 "No SafeTensors path provided (use --safetensors-path)",
1984 ));
1985 };
1986
1987 let gguf_bytes = std::fs::read(path)
1989 .map_err(|e| CliError::ValidationFailed(format!("Failed to read GGUF: {e}")))?;
1990
1991 let gguf_format = detect_format(&gguf_bytes[..8.min(gguf_bytes.len())]).map_err(|e| {
1992 CliError::ValidationFailed(format!("Failed to detect GGUF format: {e}"))
1993 })?;
1994
1995 if gguf_format != ModelFormat::Gguf {
1996 return Ok(GateResult::skipped(
1997 "format_parity",
1998 "Primary model must be GGUF format",
1999 ));
2000 }
2001
2002 if !safetensors_path.exists() {
2004 return Ok(GateResult::skipped(
2005 "format_parity",
2006 &format!("SafeTensors file not found: {}", safetensors_path.display()),
2007 ));
2008 }
2009
2010 let gguf = GGUFModel::from_bytes(&gguf_bytes)
2012 .map_err(|e| CliError::ValidationFailed(format!("Failed to parse GGUF: {e}")))?;
2013
2014 let prompt = "<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\n";
2016 let prompt_tokens: Vec<u32> = gguf.encode(prompt).unwrap_or_else(|| vec![151643, 9707]);
2017
2018 let gguf_logits = {
2020 let mapped = MappedGGUFModel::from_path(path)
2021 .map_err(|e| CliError::ValidationFailed(format!("GGUF map failed: {e}")))?;
2022 let model = OwnedQuantizedModel::from_mapped(&mapped)
2023 .map_err(|e| CliError::ValidationFailed(format!("GGUF model failed: {e}")))?;
2024 model
2025 .forward(&prompt_tokens)
2026 .map_err(|e| CliError::ValidationFailed(format!("GGUF forward failed: {e}")))?
2027 };
2028
2029 let st_logits = {
2031 let transformer =
2032 SafetensorsToAprConverter::convert(safetensors_path).map_err(|e| {
2033 CliError::ValidationFailed(format!("SafeTensors convert failed: {e}"))
2034 })?;
2035 transformer.forward(&prompt_tokens).map_err(|e| {
2036 CliError::ValidationFailed(format!("SafeTensors forward failed: {e}"))
2037 })?
2038 };
2039
2040 let duration = start.elapsed();
2041
2042 let gguf_argmax = gguf_logits
2044 .iter()
2045 .enumerate()
2046 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
2047 .map(|(idx, _)| idx as u32);
2048
2049 let st_argmax = st_logits
2050 .iter()
2051 .enumerate()
2052 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
2053 .map(|(idx, _)| idx as u32);
2054
2055 match (gguf_argmax, st_argmax) {
2056 (Some(gguf_token), Some(st_token)) => {
2057 if gguf_token == st_token {
2058 Ok(GateResult::passed(
2059 "format_parity",
2060 &format!(
2061 "GGUF argmax={} == SafeTensors argmax={} (Cross-format parity VERIFIED)",
2062 gguf_token, st_token
2063 ),
2064 Some(gguf_token as f64),
2065 Some(st_token as f64),
2066 duration,
2067 ))
2068 } else {
2069 Ok(GateResult::failed(
2070 "format_parity",
2071 &format!(
2072 "GGUF argmax={} != SafeTensors argmax={} (Cross-format parity BROKEN)",
2073 gguf_token, st_token
2074 ),
2075 Some(gguf_token as f64),
2076 Some(st_token as f64),
2077 duration,
2078 ))
2079 }
2080 }
2081 _ => Ok(GateResult::failed(
2082 "format_parity",
2083 "Failed to get argmax from one or both formats",
2084 None,
2085 None,
2086 duration,
2087 )),
2088 }
2089 }
2090
2091 #[cfg(not(feature = "inference"))]
2092 {
2093 let _ = (path, config);
2094 Ok(GateResult::skipped(
2095 "format_parity",
2096 "Requires 'inference' feature",
2097 ))
2098 }
2099}
2100
2101fn check_ollama_available() -> bool {
2103 std::process::Command::new("curl")
2105 .args([
2106 "-s",
2107 "-o",
2108 "/dev/null",
2109 "-w",
2110 "%{http_code}",
2111 "http://localhost:11434/api/tags",
2112 ])
2113 .output()
2114 .map(|o| String::from_utf8_lossy(&o.stdout).trim() == "200")
2115 .unwrap_or(false)
2116}
2117
2118fn detect_ollama_model_from_path(path: &Path) -> String {
2129 let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
2130 let filename_lower = filename.to_lowercase();
2131
2132 let size = if filename_lower.contains("0.5b") || filename_lower.contains("-0_5b") {
2134 "0.5b"
2135 } else if filename_lower.contains("1.5b") || filename_lower.contains("-1_5b") {
2136 "1.5b"
2137 } else if filename_lower.contains("3b") || filename_lower.contains("-3b") {
2138 "3b"
2139 } else if filename_lower.contains("7b") || filename_lower.contains("-7b") {
2140 "7b"
2141 } else if filename_lower.contains("14b") || filename_lower.contains("-14b") {
2142 "14b"
2143 } else if filename_lower.contains("32b") || filename_lower.contains("-32b") {
2144 "32b"
2145 } else {
2146 match std::fs::metadata(path).map(|m| m.len()).unwrap_or(0) {
2149 0..=800_000_000 => "0.5b",
2150 800_000_001..=2_000_000_000 => "1.5b",
2151 2_000_000_001..=4_000_000_000 => "3b",
2152 _ => "7b",
2153 }
2154 };
2155
2156 format!("qwen2.5-coder:{size}")
2158}
2159
2160#[cfg(feature = "inference")]
2164#[allow(clippy::disallowed_methods)] fn measure_ollama_throughput(path: &Path, config: &QaConfig) -> Result<f64> {
2166 let prompt = "Write a hello world program in Python:";
2168 let model = detect_ollama_model_from_path(path);
2170
2171 let parity_max_tokens = config.max_tokens.max(128);
2173 let request_body = serde_json::json!({
2174 "model": model,
2175 "prompt": prompt,
2176 "stream": false,
2177 "options": {
2178 "num_predict": parity_max_tokens,
2179 "temperature": 0.0
2180 }
2181 });
2182
2183 let mut total_tokens = 0usize;
2184 let mut total_duration_ns = 0u64;
2185
2186 for _ in 0..config.iterations.min(3) {
2187 let output = std::process::Command::new("curl")
2188 .args([
2189 "-s",
2190 "-X",
2191 "POST",
2192 "http://localhost:11434/api/generate",
2193 "-H",
2194 "Content-Type: application/json",
2195 "-d",
2196 &request_body.to_string(),
2197 ])
2198 .output();
2199
2200 if let Ok(output) = output {
2201 if let Ok(response) = serde_json::from_slice::<serde_json::Value>(&output.stdout) {
2202 if let (Some(eval_count), Some(eval_duration)) = (
2205 response
2206 .get("eval_count")
2207 .and_then(serde_json::Value::as_u64),
2208 response
2209 .get("eval_duration")
2210 .and_then(serde_json::Value::as_u64),
2211 ) {
2212 total_tokens += eval_count as usize;
2213 total_duration_ns += eval_duration;
2214 }
2215 }
2216 }
2217 }
2218
2219 if total_tokens == 0 || total_duration_ns == 0 {
2220 return Ok(0.0);
2221 }
2222
2223 let duration_s = total_duration_ns as f64 / 1_000_000_000.0;
2225 Ok(total_tokens as f64 / duration_s)
2226}
2227
2228fn print_gate_result(result: &GateResult) {
2230 let badge = if result.skipped {
2231 output::badge_skip("SKIP")
2232 } else if result.passed {
2233 output::badge_pass("PASS")
2234 } else {
2235 output::badge_fail("FAIL")
2236 };
2237
2238 let name = gate_display_name(&result.name);
2239
2240 println!(
2241 " {} {} {}",
2242 badge,
2243 name.white().bold(),
2244 result.message.dimmed()
2245 );
2246
2247 if !result.skipped {
2248 println!(
2249 " {}",
2250 output::duration_fmt(result.duration_ms).dimmed()
2251 );
2252 }
2253 println!();
2254}
2255
2256fn run_ptx_parity_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
2266 let start = Instant::now();
2267
2268 if !config.json && config.verbose {
2269 println!("{}", "Running PTX parity validation...".yellow());
2270 }
2271
2272 #[cfg(feature = "inference")]
2274 {
2275 use realizar::format::{detect_format, ModelFormat};
2276 use realizar::ptx_parity::{validate_all_kernel_pairs, KernelDimensions};
2277
2278 let magic = std::fs::File::open(path).ok().and_then(|mut f| {
2281 use std::io::Read;
2282 let mut buf = [0u8; 8];
2283 f.read_exact(&mut buf).ok()?;
2284 Some(buf.to_vec())
2285 });
2286 let fmt = magic.and_then(|m| detect_format(&m).ok());
2287 if fmt != Some(ModelFormat::Gguf) {
2288 return Ok(GateResult::skipped(
2289 "ptx_parity",
2290 "Non-GGUF format (PTX kernels only apply to quantized inference)",
2291 ));
2292 }
2293
2294 let mapped = realizar::gguf::MappedGGUFModel::from_path(path.to_str().unwrap_or_default())
2296 .map_err(|e| CliError::ValidationFailed(format!("Failed to load GGUF: {e}")))?;
2297
2298 let model_config = realizar::gguf::GGUFConfig::from_gguf(&mapped.model)
2299 .map_err(|e| CliError::ValidationFailed(format!("Failed to read config: {e}")))?;
2300
2301 let dims = KernelDimensions {
2302 hidden_dim: model_config.hidden_dim as u32,
2303 intermediate_dim: model_config.intermediate_dim as u32,
2304 num_heads: model_config.num_heads as u32,
2305 head_dim: (model_config.hidden_dim / model_config.num_heads) as u32,
2306 rope_theta: model_config.rope_theta,
2307 epsilon: model_config.eps,
2308 };
2309
2310 let report = validate_all_kernel_pairs(&dims);
2311 let duration = start.elapsed();
2312
2313 if report.all_passed() {
2314 Ok(GateResult::passed(
2315 "ptx_parity",
2316 &report.summary(),
2317 Some(report.passed as f64),
2318 Some(report.total as f64),
2319 duration,
2320 ))
2321 } else {
2322 if !config.json && config.verbose {
2324 for result in &report.results {
2325 if !result.passed {
2326 println!(
2327 " {} {} ({}): {}",
2328 "FAIL".red(),
2329 result.name,
2330 result.dispatch_strategy,
2331 result.violations.join("; ")
2332 );
2333 }
2334 }
2335 }
2336 Ok(GateResult::failed(
2337 "ptx_parity",
2338 &report.summary(),
2339 Some(report.passed as f64),
2340 Some(report.total as f64),
2341 duration,
2342 ))
2343 }
2344 }
2345
2346 #[cfg(not(feature = "inference"))]
2347 {
2348 let _ = (path, config, start);
2349 Ok(GateResult::skipped(
2350 "ptx_parity",
2351 "Requires inference feature",
2352 ))
2353 }
2354}
2355
2356fn run_gpu_state_isolation_gate(path: &Path, _config: &QaConfig) -> Result<GateResult> {
2371 let start = Instant::now();
2372
2373 #[cfg(all(feature = "inference", feature = "cuda"))]
2374 {
2375 use realizar::cuda::CudaExecutor;
2376 use realizar::format::{detect_format, ModelFormat};
2377 use realizar::gguf::{
2378 GGUFModel, MappedGGUFModel, OwnedQuantizedModel, OwnedQuantizedModelCuda,
2379 QuantizedGenerateConfig,
2380 };
2381
2382 let cuda_available = CudaExecutor::is_available() && CudaExecutor::num_devices() > 0;
2383 if !cuda_available {
2384 return Ok(GateResult::skipped(
2385 "gpu_state_isolation",
2386 "CUDA not available",
2387 ));
2388 }
2389
2390 let magic = std::fs::File::open(path).ok().and_then(|mut f| {
2391 use std::io::Read;
2392 let mut buf = [0u8; 8];
2393 f.read_exact(&mut buf).ok()?;
2394 Some(buf.to_vec())
2395 });
2396 let fmt = magic.and_then(|m| detect_format(&m).ok());
2397 if fmt != Some(ModelFormat::Gguf) {
2398 return Ok(GateResult::skipped(
2399 "gpu_state_isolation",
2400 "Only GGUF format supported for GPU state isolation",
2401 ));
2402 }
2403
2404 let model_bytes = std::fs::read(path)
2405 .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
2406 let gguf = GGUFModel::from_bytes(&model_bytes)
2407 .map_err(|e| CliError::ValidationFailed(format!("Failed to parse GGUF: {e}")))?;
2408
2409 let prompt_a = "<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\n";
2410 let prompt_b = "<|im_start|>user\nWrite hello world in Python<|im_end|>\n<|im_start|>assistant\n";
2411
2412 let tokens_a = gguf.encode(prompt_a).unwrap_or_else(|| vec![151643, 9707]);
2413 let tokens_b = gguf.encode(prompt_b).unwrap_or_else(|| vec![151643, 1234]);
2414
2415 let gen_config = QuantizedGenerateConfig {
2416 max_tokens: 16,
2417 temperature: 0.0,
2418 top_k: 1,
2419 ..Default::default()
2420 };
2421
2422 let mapped = MappedGGUFModel::from_path(path)
2423 .map_err(|e| CliError::ValidationFailed(format!("Map failed: {e}")))?;
2424 let model = OwnedQuantizedModel::from_mapped(&mapped)
2425 .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
2426 let mut cuda_model = OwnedQuantizedModelCuda::new(model, 0)
2427 .map_err(|e| CliError::ValidationFailed(format!("CUDA init failed: {e}")))?;
2428
2429 let output_a = cuda_model
2431 .generate_gpu_resident(&tokens_a, &gen_config)
2432 .map_err(|e| CliError::ValidationFailed(format!("Gen 1 failed: {e}")))?;
2433
2434 let output_b = cuda_model
2436 .generate_gpu_resident(&tokens_b, &gen_config)
2437 .map_err(|e| CliError::ValidationFailed(format!("Gen 2 failed: {e}")))?;
2438
2439 let output_a2 = cuda_model
2441 .generate_gpu_resident(&tokens_a, &gen_config)
2442 .map_err(|e| CliError::ValidationFailed(format!("Gen 3 failed: {e}")))?;
2443
2444 let duration = start.elapsed();
2445
2446 if output_a != output_a2 {
2448 let text_a = gguf.decode(&output_a);
2449 let text_a2 = gguf.decode(&output_a2);
2450 return Ok(GateResult::failed(
2451 "gpu_state_isolation",
2452 &format!(
2453 "State leak: prompt A produced different output on retry. \
2454 First: '{}', Retry: '{}'",
2455 text_a.chars().take(50).collect::<String>(),
2456 text_a2.chars().take(50).collect::<String>()
2457 ),
2458 None,
2459 None,
2460 duration,
2461 ));
2462 }
2463
2464 if output_a == output_b {
2466 return Ok(GateResult::failed(
2467 "gpu_state_isolation",
2468 "Model stuck: same output for different prompts (GPU state not functional)",
2469 None,
2470 None,
2471 duration,
2472 ));
2473 }
2474
2475 Ok(GateResult::passed(
2476 "gpu_state_isolation",
2477 "GPU state properly isolated: 3 generations, deterministic replay confirmed",
2478 Some(3.0),
2479 Some(3.0),
2480 duration,
2481 ))
2482 }
2483
2484 #[cfg(not(all(feature = "inference", feature = "cuda")))]
2485 {
2486 let _ = (path, config);
2487 Ok(GateResult::skipped(
2488 "gpu_state_isolation",
2489 "Requires inference+cuda features",
2490 ))
2491 }
2492}
2493
2494fn run_performance_regression_gate(
2500 current_gates: &[GateResult],
2501 config: &QaConfig,
2502) -> Result<GateResult> {
2503 let start = Instant::now();
2504
2505 let Some(prev_path) = &config.previous_report else {
2506 return Ok(GateResult::skipped(
2507 "performance_regression",
2508 "No previous report provided",
2509 ));
2510 };
2511
2512 let prev_json = std::fs::read_to_string(prev_path).map_err(|e| {
2513 CliError::ValidationFailed(format!("Failed to read previous report: {e}"))
2514 })?;
2515
2516 let prev_report: QaReport = serde_json::from_str(&prev_json).map_err(|e| {
2517 CliError::ValidationFailed(format!("Failed to parse previous report: {e}"))
2518 })?;
2519
2520 let threshold = config.regression_threshold;
2521 let mut regressions = Vec::new();
2522
2523 let comparable_gates = ["throughput", "ollama_parity", "gpu_speedup"];
2525 for gate_name in &comparable_gates {
2526 let prev_gate = prev_report.gates.iter().find(|g| g.name == *gate_name);
2527 let curr_gate = current_gates.iter().find(|g| g.name == *gate_name);
2528
2529 if let (Some(prev), Some(curr)) = (prev_gate, curr_gate) {
2530 if let (Some(prev_val), Some(curr_val)) = (prev.value, curr.value) {
2531 if prev_val > 0.0 && !prev.skipped && !curr.skipped {
2532 let regression = (prev_val - curr_val) / prev_val;
2533 if regression > threshold {
2534 regressions.push(format!(
2535 "{}: {:.1} -> {:.1} ({:.0}% regression)",
2536 gate_name,
2537 prev_val,
2538 curr_val,
2539 regression * 100.0
2540 ));
2541 }
2542 }
2543 }
2544 }
2545 }
2546
2547 let duration = start.elapsed();
2548
2549 if regressions.is_empty() {
2550 Ok(GateResult::passed(
2551 "performance_regression",
2552 &format!(
2553 "No regressions >{:.0}% vs {}",
2554 threshold * 100.0,
2555 prev_path.display()
2556 ),
2557 Some(0.0),
2558 Some(threshold),
2559 duration,
2560 ))
2561 } else {
2562 Ok(GateResult::failed(
2563 "performance_regression",
2564 &format!("Regressions detected: {}", regressions.join("; ")),
2565 Some(regressions.len() as f64),
2566 Some(0.0),
2567 duration,
2568 ))
2569 }
2570}
2571
2572#[cfg(test)]
2573mod tests {
2574 use super::*;
2575 use std::io::Write;
2576 use tempfile::NamedTempFile;
2577
2578 #[test]
2583 fn test_qa_config_default() {
2584 let config = QaConfig::default();
2585 assert!((config.min_tps - 100.0).abs() < f64::EPSILON);
2586 assert!((config.min_speedup - 0.2).abs() < f64::EPSILON);
2587 assert!((config.min_gpu_speedup - 2.0).abs() < f64::EPSILON);
2588 assert!(!config.skip_golden);
2589 assert!(!config.skip_throughput);
2590 assert!(!config.skip_ollama);
2591 assert!(!config.skip_gpu_speedup);
2592 assert!(!config.skip_format_parity);
2593 assert!(config.safetensors_path.is_none());
2594 }
2595
2596 #[test]
2597 fn test_qa_config_default_iterations() {
2598 let config = QaConfig::default();
2599 assert_eq!(config.iterations, 10);
2600 assert_eq!(config.warmup, 3);
2601 assert_eq!(config.max_tokens, 32);
2602 }
2603
2604 #[test]
2605 fn test_qa_config_default_output_flags() {
2606 let config = QaConfig::default();
2607 assert!(!config.json);
2608 assert!(!config.verbose);
2609 }
2610
2611 #[test]
2612 fn test_qa_config_clone() {
2613 let config = QaConfig {
2614 min_tps: 50.0,
2615 skip_golden: true,
2616 ..Default::default()
2617 };
2618 let cloned = config.clone();
2619 assert!((cloned.min_tps - 50.0).abs() < f64::EPSILON);
2620 assert!(cloned.skip_golden);
2621 }
2622
2623 #[test]
2624 fn test_qa_config_debug() {
2625 let config = QaConfig::default();
2626 let debug = format!("{config:?}");
2627 assert!(debug.contains("QaConfig"));
2628 assert!(debug.contains("min_tps"));
2629 }
2630
2631 #[test]
2636 fn test_gate_result_passed() {
2637 let result = GateResult::passed(
2638 "test_gate",
2639 "Test passed",
2640 Some(150.0),
2641 Some(100.0),
2642 Duration::from_secs(1),
2643 );
2644 assert!(result.passed);
2645 assert!(!result.skipped);
2646 assert_eq!(result.name, "test_gate");
2647 }
2648
2649 #[test]
2650 fn test_gate_result_passed_duration() {
2651 let result = GateResult::passed(
2652 "test_gate",
2653 "Test passed",
2654 Some(150.0),
2655 Some(100.0),
2656 Duration::from_millis(1500),
2657 );
2658 assert_eq!(result.duration_ms, 1500);
2659 }
2660
2661 #[test]
2662 fn test_gate_result_passed_no_value() {
2663 let result = GateResult::passed(
2664 "test_gate",
2665 "Test passed",
2666 None,
2667 None,
2668 Duration::from_secs(1),
2669 );
2670 assert!(result.value.is_none());
2671 assert!(result.threshold.is_none());
2672 }
2673
2674 #[test]
2675 fn test_gate_result_failed() {
2676 let result = GateResult::failed(
2677 "test_gate",
2678 "Test failed",
2679 Some(50.0),
2680 Some(100.0),
2681 Duration::from_secs(1),
2682 );
2683 assert!(!result.passed);
2684 assert!(!result.skipped);
2685 }
2686
2687 #[test]
2688 fn test_gate_result_failed_message() {
2689 let result = GateResult::failed(
2690 "throughput",
2691 "50 tok/s < 100 tok/s",
2692 Some(50.0),
2693 Some(100.0),
2694 Duration::from_secs(1),
2695 );
2696 assert!(result.message.contains("50"));
2697 assert!(result.message.contains("100"));
2698 }
2699
2700 #[test]
2701 fn test_gate_result_skipped() {
2702 let result = GateResult::skipped("test_gate", "No GPU available");
2703 assert!(result.passed); assert!(result.skipped);
2705 }
2706
2707 #[test]
2708 fn test_gate_result_skipped_message() {
2709 let result = GateResult::skipped("gpu_speedup", "GPU not available");
2710 assert!(result.message.contains("Skipped"));
2711 assert!(result.message.contains("GPU not available"));
2712 }
2713
2714 #[test]
2715 fn test_gate_result_skipped_no_duration() {
2716 let result = GateResult::skipped("test", "reason");
2717 assert_eq!(result.duration_ms, 0);
2718 }
2719
2720 #[test]
2721 fn test_gate_result_clone() {
2722 let result = GateResult::passed("test", "ok", Some(100.0), None, Duration::from_secs(1));
2723 let cloned = result.clone();
2724 assert_eq!(cloned.name, result.name);
2725 assert_eq!(cloned.passed, result.passed);
2726 }
2727
2728 #[test]
2729 fn test_gate_result_debug() {
2730 let result = GateResult::passed("test", "ok", None, None, Duration::from_secs(0));
2731 let debug = format!("{result:?}");
2732 assert!(debug.contains("GateResult"));
2733 }
2734
2735 #[test]
2736 fn test_gate_result_serialize() {
2737 let result = GateResult::passed(
2738 "throughput",
2739 "100 tok/s",
2740 Some(100.0),
2741 Some(60.0),
2742 Duration::from_secs(1),
2743 );
2744 let json = serde_json::to_string(&result).expect("serialize");
2745 assert!(json.contains("throughput"));
2746 assert!(json.contains("100"));
2747 }
2748
2749 #[test]
2750 fn test_gate_result_deserialize() {
2751 let json =
2752 r#"{"name":"test","passed":true,"message":"ok","duration_ms":1000,"skipped":false}"#;
2753 let result: GateResult = serde_json::from_str(json).expect("deserialize");
2754 assert_eq!(result.name, "test");
2755 assert!(result.passed);
2756 }
2757
2758 #[test]
2763 fn test_qa_report_serialization() {
2764 let report = QaReport {
2765 model: "test.gguf".to_string(),
2766 passed: true,
2767 gates: vec![GateResult::passed(
2768 "throughput",
2769 "100 tok/s",
2770 Some(100.0),
2771 Some(60.0),
2772 Duration::from_secs(5),
2773 )],
2774 total_duration_ms: 5000,
2775 timestamp: "2026-01-15T00:00:00Z".to_string(),
2776 summary: "All gates passed".to_string(),
2777 gates_executed: 0,
2778 gates_skipped: 0,
2779 system_info: None,
2780 };
2781
2782 let json = serde_json::to_string(&report).expect("serialization failed");
2783 assert!(json.contains("throughput"));
2784 assert!(json.contains("passed"));
2785 }
2786
2787 #[test]
2788 fn test_qa_report_deserialization() {
2789 let json = r#"{
2790 "model": "test.gguf",
2791 "passed": true,
2792 "gates": [],
2793 "total_duration_ms": 1000,
2794 "timestamp": "2026-01-01T00:00:00Z",
2795 "summary": "All passed"
2796 }"#;
2797 let report: QaReport = serde_json::from_str(json).expect("deserialize");
2798 assert_eq!(report.model, "test.gguf");
2799 assert!(report.passed);
2800 }
2801
2802 #[test]
2803 fn test_qa_report_failed() {
2804 let report = QaReport {
2805 model: "test.gguf".to_string(),
2806 passed: false,
2807 gates: vec![GateResult::failed(
2808 "throughput",
2809 "50 tok/s < 100 tok/s",
2810 Some(50.0),
2811 Some(100.0),
2812 Duration::from_secs(5),
2813 )],
2814 total_duration_ms: 5000,
2815 timestamp: "2026-01-15T00:00:00Z".to_string(),
2816 summary: "1 gate failed".to_string(),
2817 gates_executed: 0,
2818 gates_skipped: 0,
2819 system_info: None,
2820 };
2821 assert!(!report.passed);
2822 assert_eq!(report.gates.len(), 1);
2823 }
2824
2825 #[test]
2826 fn test_qa_report_multiple_gates() {
2827 let report = QaReport {
2828 model: "test.gguf".to_string(),
2829 passed: true,
2830 gates: vec![
2831 GateResult::passed("golden", "ok", None, None, Duration::from_secs(1)),
2832 GateResult::passed(
2833 "throughput",
2834 "ok",
2835 Some(100.0),
2836 Some(60.0),
2837 Duration::from_secs(2),
2838 ),
2839 GateResult::skipped("ollama", "skipped"),
2840 ],
2841 total_duration_ms: 3000,
2842 timestamp: "2026-01-15T00:00:00Z".to_string(),
2843 summary: "All passed".to_string(),
2844 gates_executed: 0,
2845 gates_skipped: 0,
2846 system_info: None,
2847 };
2848 assert_eq!(report.gates.len(), 3);
2849 }
2850
2851 #[test]
2852 fn test_qa_report_clone() {
2853 let report = QaReport {
2854 model: "test.gguf".to_string(),
2855 passed: true,
2856 gates: vec![],
2857 total_duration_ms: 1000,
2858 timestamp: "2026-01-15T00:00:00Z".to_string(),
2859 summary: "ok".to_string(),
2860 gates_executed: 0,
2861 gates_skipped: 0,
2862 system_info: None,
2863 };
2864 let cloned = report.clone();
2865 assert_eq!(cloned.model, report.model);
2866 }
2867
2868 #[test]
2869 fn test_qa_report_debug() {
2870 let report = QaReport {
2871 model: "test.gguf".to_string(),
2872 passed: true,
2873 gates: vec![],
2874 total_duration_ms: 1000,
2875 timestamp: "now".to_string(),
2876 summary: "ok".to_string(),
2877 gates_executed: 0,
2878 gates_skipped: 0,
2879 system_info: None,
2880 };
2881 let debug = format!("{report:?}");
2882 assert!(debug.contains("QaReport"));
2883 }
2884
2885 #[test]
2890 fn test_run_file_not_found() {
2891 let result = run(
2892 Path::new("/nonexistent/model.gguf"),
2893 None,
2894 None,
2895 None,
2896 false,
2897 false,
2898 false,
2899 false,
2900 false,
2901 false,
2902 false,
2903 None,
2904 10,
2905 3,
2906 32,
2907 false,
2908 false,
2909 None,
2910
2911 None,
2912
2913 None,
2914
2915 false,
2916
2917 false,
2918 );
2919 assert!(result.is_err());
2920 }
2921
2922 #[test]
2923 fn test_run_invalid_model() {
2924 let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
2925 file.write_all(b"not a valid gguf file").expect("write");
2926
2927 let result = run(
2928 file.path(),
2929 None,
2930 None,
2931 None,
2932 false,
2933 false,
2934 false,
2935 false,
2936 false,
2937 false,
2938 false,
2939 None,
2940 10,
2941 3,
2942 32,
2943 false,
2944 false,
2945 None,
2946
2947 None,
2948
2949 None,
2950
2951 false,
2952
2953 false,
2954 );
2955 assert!(result.is_err());
2957 }
2958
2959 #[test]
2960 fn test_run_with_custom_thresholds() {
2961 let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
2962 file.write_all(b"not valid").expect("write");
2963
2964 let result = run(
2965 file.path(),
2966 Some(50.0), Some(1.5), Some(3.0), false,
2970 false,
2971 false,
2972 false,
2973 false,
2974 false,
2975 false,
2976 None,
2977 5,
2978 2,
2979 16,
2980 false,
2981 false,
2982 None,
2983
2984 None,
2985
2986 None,
2987
2988 false,
2989
2990 false,
2991 );
2992 assert!(result.is_err());
2994 }
2995
2996 #[test]
2997 fn test_run_with_all_skips() {
2998 let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
2999 file.write_all(b"not valid").expect("write");
3000
3001 let result = run(
3002 file.path(),
3003 None,
3004 None,
3005 None,
3006 true, true, true, true, true, true, true, None,
3014 10,
3015 3,
3016 32,
3017 false,
3018 false,
3019 None,
3020
3021 None,
3022
3023 None,
3024
3025 true, true, );
3029 assert!(result.is_ok());
3031 }
3032
3033 #[test]
3034 fn test_run_with_json_output() {
3035 let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
3036 file.write_all(b"not valid").expect("write");
3037
3038 let result = run(
3039 file.path(),
3040 None,
3041 None,
3042 None,
3043 false,
3044 false,
3045 false,
3046 false,
3047 false,
3048 false,
3049 false,
3050 None,
3051 10,
3052 3,
3053 32,
3054 true, false,
3056 None,
3057
3058 None,
3059
3060 None,
3061
3062 false,
3063
3064 false,
3065 );
3066 assert!(result.is_err());
3068 }
3069
3070 #[test]
3071 fn test_run_with_verbose() {
3072 let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
3073 file.write_all(b"not valid").expect("write");
3074
3075 let result = run(
3076 file.path(),
3077 None,
3078 None,
3079 None,
3080 false,
3081 false,
3082 false,
3083 false,
3084 false,
3085 false,
3086 false,
3087 None,
3088 10,
3089 3,
3090 32,
3091 false,
3092 true, None,
3094
3095 None,
3096
3097 None,
3098
3099 false,
3100
3101 false,
3102 );
3103 assert!(result.is_err());
3105 }
3106
3107 #[test]
3108 fn test_run_with_safetensors_path() {
3109 let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
3110 file.write_all(b"not valid").expect("write");
3111 let st_file = NamedTempFile::with_suffix(".safetensors").expect("create st file");
3112
3113 let result = run(
3114 file.path(),
3115 None,
3116 None,
3117 None,
3118 false,
3119 false,
3120 false,
3121 false,
3122 false,
3123 false,
3124 false,
3125 Some(st_file.path().to_path_buf()), 10,
3127 3,
3128 32,
3129 false,
3130 false,
3131 None,
3132
3133 None,
3134
3135 None,
3136
3137 false,
3138
3139 false,
3140 );
3141 assert!(result.is_err());
3143 }
3144
3145 #[test]
3146 fn test_run_with_small_iterations() {
3147 let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
3148 file.write_all(b"not valid").expect("write");
3149
3150 let result = run(
3151 file.path(),
3152 None,
3153 None,
3154 None,
3155 false,
3156 false,
3157 false,
3158 false,
3159 false,
3160 false,
3161 false,
3162 None,
3163 1, 0, 8, false,
3167 false,
3168 None,
3169
3170 None,
3171
3172 None,
3173
3174 false,
3175
3176 false,
3177 );
3178 assert!(result.is_err());
3180 }
3181
3182 #[cfg(feature = "inference")]
3189 mod format_dispatch_tests {
3190 use realizar::format::{detect_format, ModelFormat};
3191
3192 #[test]
3194 fn test_gguf_format_detection() {
3195 let gguf_magic = b"GGUF\x03\x00\x00\x00"; let format = detect_format(gguf_magic).expect("detect GGUF");
3198 assert_eq!(format, ModelFormat::Gguf, "GGUF magic must detect as GGUF");
3199 }
3200
3201 #[test]
3203 fn test_apr_v2_format_detection() {
3204 let apr_magic = b"APR\x00\x02\x00\x00\x00"; let format = detect_format(apr_magic).expect("detect APR");
3207 assert_eq!(format, ModelFormat::Apr, "APR magic must detect as APR");
3208 }
3209
3210 #[test]
3212 fn test_safetensors_format_detection() {
3213 let mut st_magic = Vec::new();
3215 st_magic.extend_from_slice(&100u64.to_le_bytes()); st_magic.extend_from_slice(b"{\""); let format = detect_format(&st_magic).expect("detect SafeTensors");
3218 assert_eq!(
3219 format,
3220 ModelFormat::SafeTensors,
3221 "SafeTensors magic must detect as SafeTensors"
3222 );
3223 }
3224
3225 #[test]
3228 fn test_apr_format_does_not_skip_detection() {
3229 let apr_magic = b"APR\x00\x02\x00\x00\x00"; let format = detect_format(apr_magic).expect("detect APR");
3232
3233 assert_eq!(
3235 format,
3236 ModelFormat::Apr,
3237 "APR format MUST be detected - cannot skip with 'GGUF only' error"
3238 );
3239 }
3240
3241 #[test]
3243 fn test_model_format_enum_completeness() {
3244 let formats = [
3246 ModelFormat::Gguf,
3247 ModelFormat::Apr,
3248 ModelFormat::SafeTensors,
3249 ];
3250 assert_eq!(
3251 formats.len(),
3252 3,
3253 "Must support exactly 3 formats: GGUF, APR, SafeTensors"
3254 );
3255 }
3256 }
3257
3258 #[test]
3264 fn test_gate_result_skipped_flag_semantics() {
3265 let skipped = GateResult::skipped("test", "reason");
3267 assert!(skipped.skipped, "Skipped gate must have skipped=true");
3268 assert!(skipped.passed, "Skipped gates count as passed (don't fail)");
3269
3270 let passed = GateResult::passed("test", "ok", None, None, Duration::from_secs(1));
3272 assert!(!passed.skipped, "Passed gate must have skipped=false");
3273 assert!(passed.passed, "Passed gate must have passed=true");
3274
3275 let failed = GateResult::failed("test", "fail", None, None, Duration::from_secs(1));
3277 assert!(!failed.skipped, "Failed gate must have skipped=false");
3278 assert!(!failed.passed, "Failed gate must have passed=false");
3279 }
3280
3281 #[test]
3283 fn test_skipped_gate_must_have_reason() {
3284 let result = GateResult::skipped("test_gate", "Explicit reason required");
3285 assert!(
3286 result.message.contains("Skipped"),
3287 "Skip message must contain 'Skipped'"
3288 );
3289 assert!(result.message.len() > 10, "Skip reason must be descriptive");
3290 }
3291
3292 #[test]
3300 fn gate_result_value_equals_threshold_is_pass() {
3301 let result = GateResult::passed(
3304 "throughput",
3305 "100.0 tok/s >= 100.0 tok/s threshold",
3306 Some(100.0),
3307 Some(100.0),
3308 Duration::from_secs(1),
3309 );
3310 assert!(result.passed);
3311 assert_eq!(result.value, Some(100.0));
3312 assert_eq!(result.threshold, Some(100.0));
3313 }
3314
3315 #[test]
3318 fn gate_result_value_just_below_threshold_is_fail() {
3319 let result = GateResult::failed(
3320 "throughput",
3321 "99.9 tok/s < 100.0 tok/s",
3322 Some(99.9),
3323 Some(100.0),
3324 Duration::from_secs(1),
3325 );
3326 assert!(!result.passed);
3327 assert!(!result.skipped);
3328 }
3329
3330 #[test]
3333 fn gate_result_zero_duration() {
3334 let result = GateResult::passed(
3335 "fast_gate",
3336 "Sub-millisecond completion",
3337 None,
3338 None,
3339 Duration::from_nanos(0),
3340 );
3341 assert_eq!(result.duration_ms, 0);
3342 assert!(result.passed);
3343 }
3344
3345 #[test]
3348 fn gate_result_large_duration_no_overflow() {
3349 let result = GateResult::passed(
3351 "slow_gate",
3352 "Long-running test",
3353 None,
3354 None,
3355 Duration::from_secs(1_000_000),
3356 );
3357 assert_eq!(result.duration_ms, 1_000_000_000);
3358 }
3359
3360 #[test]
3364 fn gate_result_skipped_has_no_metrics() {
3365 let result = GateResult::skipped("contract", "Model not found");
3366 assert!(result.value.is_none(), "Skipped gate must have no value");
3367 assert!(
3368 result.threshold.is_none(),
3369 "Skipped gate must have no threshold"
3370 );
3371 }
3372
3373 #[test]
3376 fn gate_result_failed_without_value() {
3377 let result = GateResult::failed(
3378 "golden_output",
3379 "Inference engine crashed",
3380 None,
3381 None,
3382 Duration::from_millis(50),
3383 );
3384 assert!(!result.passed);
3385 assert!(result.value.is_none());
3386 }
3387
3388 #[test]
3395 fn gate_result_json_roundtrip_with_values() {
3396 let original = GateResult::passed(
3397 "throughput",
3398 "150.0 tok/s >= 100.0 tok/s",
3399 Some(150.0),
3400 Some(100.0),
3401 Duration::from_millis(2500),
3402 );
3403 let json = serde_json::to_string(&original).expect("serialize");
3404 let restored: GateResult = serde_json::from_str(&json).expect("deserialize");
3405 assert_eq!(restored.name, "throughput");
3406 assert!(restored.passed);
3407 assert!(!restored.skipped);
3408 assert_eq!(restored.value, Some(150.0));
3409 assert_eq!(restored.threshold, Some(100.0));
3410 assert_eq!(restored.duration_ms, 2500);
3411 }
3412
3413 #[test]
3416 fn gate_result_json_roundtrip_skipped() {
3417 let original = GateResult::skipped("gpu_speedup", "No GPU");
3418 let json = serde_json::to_string(&original).expect("serialize");
3419 let restored: GateResult = serde_json::from_str(&json).expect("deserialize");
3420 assert!(restored.skipped, "skipped flag must survive round-trip");
3421 assert!(restored.passed, "skipped gates must still show passed=true");
3422 assert!(
3423 restored.value.is_none(),
3424 "value should be None after round-trip"
3425 );
3426 }
3427
3428 #[test]
3431 fn gate_result_json_omits_none_fields() {
3432 let result = GateResult::passed("test", "ok", None, None, Duration::from_secs(1));
3433 let json = serde_json::to_string(&result).expect("serialize");
3434 assert!(
3435 !json.contains("value"),
3436 "None value should be omitted from JSON, got: {json}"
3437 );
3438 assert!(
3439 !json.contains("threshold"),
3440 "None threshold should be omitted from JSON, got: {json}"
3441 );
3442 }
3443
3444 #[test]
3451 fn qa_report_all_skipped_gates_passes() {
3452 let report = QaReport {
3453 model: "test.gguf".to_string(),
3454 passed: true,
3455 gates: vec![
3456 GateResult::skipped("golden", "no model"),
3457 GateResult::skipped("throughput", "no engine"),
3458 GateResult::skipped("ollama", "not available"),
3459 ],
3460 total_duration_ms: 10,
3461 timestamp: "2026-02-06T00:00:00Z".to_string(),
3462 summary: "All skipped".to_string(),
3463 gates_executed: 0,
3464 gates_skipped: 0,
3465 system_info: None,
3466 };
3467 assert!(report.passed);
3468 assert!(
3469 report.gates.iter().all(|g| g.skipped),
3470 "All gates should be skipped"
3471 );
3472 assert!(
3473 report.gates.iter().all(|g| g.passed),
3474 "All skipped gates should count as passed"
3475 );
3476 }
3477
3478 #[test]
3481 fn qa_report_single_failure_taints_report() {
3482 let gates = [
3483 GateResult::passed("golden", "ok", None, None, Duration::from_secs(1)),
3484 GateResult::failed(
3485 "throughput",
3486 "too slow",
3487 Some(5.0),
3488 Some(100.0),
3489 Duration::from_secs(2),
3490 ),
3491 GateResult::passed(
3492 "contract",
3493 "ok",
3494 Some(100.0),
3495 Some(0.0),
3496 Duration::from_secs(1),
3497 ),
3498 ];
3499 let passed = gates.iter().all(|g| g.passed);
3500 assert!(!passed, "Single failure must taint the entire report");
3501 }
3502
3503 #[test]
3507 fn qa_report_mixed_pass_and_skip_passes() {
3508 let gates = [
3509 GateResult::passed("golden", "ok", None, None, Duration::from_secs(1)),
3510 GateResult::skipped("ollama", "not available"),
3511 GateResult::passed(
3512 "contract",
3513 "ok",
3514 Some(50.0),
3515 Some(0.0),
3516 Duration::from_secs(1),
3517 ),
3518 GateResult::skipped("gpu_speedup", "no GPU"),
3519 ];
3520 let passed = gates.iter().all(|g| g.passed);
3521 assert!(passed, "Mix of passed + skipped should be overall pass");
3522 }
3523
3524 #[test]
3527 fn qa_report_failed_gate_filter_excludes_skipped() {
3528 let gates = [
3529 GateResult::failed(
3530 "throughput",
3531 "too slow",
3532 Some(1.0),
3533 Some(100.0),
3534 Duration::from_secs(1),
3535 ),
3536 GateResult::skipped("ollama", "not running"),
3537 GateResult::passed("contract", "ok", None, None, Duration::from_secs(1)),
3538 ];
3539 let failed_gates: Vec<_> = gates.iter().filter(|g| !g.passed && !g.skipped).collect();
3540 assert_eq!(
3541 failed_gates.len(),
3542 1,
3543 "Only non-skipped failures should appear"
3544 );
3545 assert_eq!(failed_gates[0].name, "throughput");
3546 }
3547
3548 #[test]
3555 fn qa_report_json_roundtrip_complete() {
3556 let original = QaReport {
3557 model: "/path/to/model.gguf".to_string(),
3558 passed: false,
3559 gates: vec![
3560 GateResult::passed(
3561 "contract",
3562 "50 tensors ok",
3563 Some(50.0),
3564 Some(0.0),
3565 Duration::from_millis(100),
3566 ),
3567 GateResult::failed(
3568 "throughput",
3569 "5 < 100",
3570 Some(5.0),
3571 Some(100.0),
3572 Duration::from_millis(5000),
3573 ),
3574 GateResult::skipped("ollama", "not installed"),
3575 ],
3576 total_duration_ms: 5100,
3577 timestamp: "2026-02-06T12:00:00Z".to_string(),
3578 summary: "Failed gates: throughput".to_string(),
3579 gates_executed: 0,
3580 gates_skipped: 0,
3581 system_info: None,
3582 };
3583
3584 let json = serde_json::to_string_pretty(&original).expect("serialize");
3585 let restored: QaReport = serde_json::from_str(&json).expect("deserialize");
3586
3587 assert_eq!(restored.model, original.model);
3588 assert_eq!(restored.passed, original.passed);
3589 assert_eq!(restored.gates.len(), 3);
3590 assert_eq!(restored.total_duration_ms, original.total_duration_ms);
3591 assert_eq!(restored.summary, original.summary);
3592 assert!(restored.gates[0].passed);
3594 assert!(!restored.gates[1].passed);
3595 assert!(restored.gates[2].skipped);
3596 }
3597
3598 #[test]
3605 fn detect_ollama_model_standard_sizes() {
3606 let cases = vec![
3607 ("/tmp/qwen2-0.5b-instruct-q4_0.gguf", "0.5b"),
3608 ("/tmp/qwen2-1.5b-instruct-q4_0.gguf", "1.5b"),
3609 ("/tmp/qwen2-7b-instruct-q4_0.gguf", "7b"),
3610 ("/tmp/qwen2-14b-instruct-q4_0.gguf", "14b"),
3611 ("/tmp/qwen2-32b-instruct-q4_0.gguf", "32b"),
3612 ];
3613 for (path, expected_size) in cases {
3614 let model = detect_ollama_model_from_path(std::path::Path::new(path));
3615 let expected = format!("qwen2.5-coder:{expected_size}");
3616 assert_eq!(
3617 model, expected,
3618 "Path '{path}' should detect size '{expected_size}'"
3619 );
3620 }
3621 }
3622
3623 #[test]
3626 fn detect_ollama_model_underscore_size() {
3627 let model = detect_ollama_model_from_path(std::path::Path::new(
3628 "/cache/qwen2.5-coder-0_5b-instruct-q4_k_m.gguf",
3629 ));
3630 assert!(
3631 model.contains("0.5b"),
3632 "Underscore-separated size should be detected: {model}"
3633 );
3634 }
3635
3636 #[test]
3639 fn detect_ollama_model_3b_not_confused_with_32b() {
3640 let model_3b =
3641 detect_ollama_model_from_path(std::path::Path::new("/tmp/qwen2-3b-instruct.gguf"));
3642 assert!(
3643 model_3b.contains(":3b"),
3644 "Should detect 3b, got: {model_3b}"
3645 );
3646
3647 let model_32b =
3648 detect_ollama_model_from_path(std::path::Path::new("/tmp/qwen2-32b-instruct.gguf"));
3649 assert!(
3650 model_32b.contains(":32b"),
3651 "Should detect 32b, got: {model_32b}"
3652 );
3653 }
3654
3655 #[test]
3658 fn detect_ollama_model_hash_named_file() {
3659 let model = detect_ollama_model_from_path(std::path::Path::new(
3661 "/tmp/e910cab26ae116eb.converted.gguf",
3662 ));
3663 assert!(
3664 model.contains("qwen2.5-coder:"),
3665 "Should produce valid model tag: {model}"
3666 );
3667 }
3668
3669 #[test]
3676 fn qa_config_partial_override_preserves_defaults() {
3677 let config = QaConfig {
3678 min_tps: 500.0,
3679 skip_golden: true,
3680 iterations: 5,
3681 ..Default::default()
3682 };
3683 assert!((config.min_tps - 500.0).abs() < f64::EPSILON);
3685 assert!(config.skip_golden);
3686 assert_eq!(config.iterations, 5);
3687 assert!((config.min_speedup - 0.2).abs() < f64::EPSILON);
3689 assert!((config.min_gpu_speedup - 2.0).abs() < f64::EPSILON);
3690 assert!(!config.skip_throughput);
3691 assert!(!config.skip_ollama);
3692 assert_eq!(config.warmup, 3);
3693 assert_eq!(config.max_tokens, 32);
3694 assert!(!config.json);
3695 }
3696
3697 #[test]
3700 fn qa_config_skip_flags_are_independent() {
3701 let config = QaConfig {
3702 skip_golden: true,
3703 skip_contract: true,
3704 ..Default::default()
3705 };
3706 assert!(config.skip_golden);
3707 assert!(config.skip_contract);
3708 assert!(!config.skip_throughput);
3709 assert!(!config.skip_ollama);
3710 assert!(!config.skip_gpu_speedup);
3711 assert!(!config.skip_format_parity);
3712 }
3713
3714 #[test]
3722 fn all_gate_names_have_display_mapping() {
3723 let gate_names = [
3725 "tensor_contract",
3726 "golden_output",
3727 "throughput",
3728 "ollama_parity",
3729 "gpu_speedup",
3730 "format_parity",
3731 ];
3732 for name in &gate_names {
3733 let display = match *name {
3736 "tensor_contract" => "Tensor Contract",
3737 "golden_output" => "Golden Output",
3738 "throughput" => "Throughput",
3739 "ollama_parity" => "Ollama Parity",
3740 "gpu_speedup" => "GPU Speedup",
3741 "format_parity" => "Format Parity",
3742 _ => panic!("Unknown gate name without display mapping: {name}"),
3743 };
3744 assert!(
3745 !display.is_empty(),
3746 "Display name for '{name}' must not be empty"
3747 );
3748 }
3749 }
3750
3751 #[test]
3758 fn print_gate_result_unknown_name_uses_raw_name() {
3759 let result = GateResult::passed(
3762 "custom_user_gate",
3763 "User-defined gate passed",
3764 None,
3765 None,
3766 Duration::from_millis(42),
3767 );
3768 print_gate_result(&result);
3770 }
3771
3772 #[test]
3774 fn print_gate_result_skip_branch() {
3775 let result = GateResult::skipped("ollama_parity", "Ollama not available");
3776 print_gate_result(&result);
3778 }
3779
3780 #[test]
3782 fn print_gate_result_fail_branch() {
3783 let result = GateResult::failed(
3784 "throughput",
3785 "5.0 tok/s < 100.0 tok/s threshold",
3786 Some(5.0),
3787 Some(100.0),
3788 Duration::from_millis(3500),
3789 );
3790 print_gate_result(&result);
3792 }
3793
3794 #[test]
3796 fn print_gate_result_pass_branch() {
3797 let result = GateResult::passed(
3798 "tensor_contract",
3799 "50 tensors passed all PMAT-235 contract gates",
3800 Some(50.0),
3801 Some(0.0),
3802 Duration::from_millis(120),
3803 );
3804 print_gate_result(&result);
3805 }
3806
3807 #[test]
3810 fn print_gate_result_all_known_gate_names() {
3811 let known_names = [
3812 "tensor_contract",
3813 "golden_output",
3814 "throughput",
3815 "ollama_parity",
3816 "gpu_speedup",
3817 "format_parity",
3818 ];
3819 for name in &known_names {
3820 let result = GateResult::passed(name, "ok", None, None, Duration::from_millis(1));
3821 print_gate_result(&result);
3823 }
3824 }
3825
3826 #[test]
3833 fn detect_ollama_model_case_insensitive() {
3834 let model = detect_ollama_model_from_path(Path::new("/tmp/Qwen2-0.5B-Instruct.gguf"));
3835 assert_eq!(
3836 model, "qwen2.5-coder:0.5b",
3837 "Uppercase '0.5B' should match via to_lowercase"
3838 );
3839 }
3840
3841 #[test]
3843 fn detect_ollama_model_1_5b_underscore() {
3844 let model =
3845 detect_ollama_model_from_path(Path::new("/cache/model-1_5b-instruct-q4_k.gguf"));
3846 assert_eq!(model, "qwen2.5-coder:1.5b");
3847 }
3848
3849 #[test]
3852 fn detect_ollama_model_root_path_no_panic() {
3853 let model = detect_ollama_model_from_path(Path::new("/"));
3854 assert!(
3856 model.starts_with("qwen2.5-coder:"),
3857 "Root path should produce valid model tag: {model}"
3858 );
3859 }
3860
3861 #[test]
3863 fn detect_ollama_model_no_extension() {
3864 let model = detect_ollama_model_from_path(Path::new("/tmp/qwen2-7b-instruct"));
3865 assert_eq!(model, "qwen2.5-coder:7b");
3866 }
3867
3868 #[test]
3871 fn detect_ollama_model_priority_order() {
3872 let model = detect_ollama_model_from_path(Path::new("/tmp/model-0.5b-vs-7b.gguf"));
3874 assert_eq!(
3875 model, "qwen2.5-coder:0.5b",
3876 "0.5b branch should match before 7b"
3877 );
3878 }
3879
3880 #[test]
3882 fn detect_ollama_model_14b_specificity() {
3883 let model = detect_ollama_model_from_path(Path::new("/tmp/llama-14b-chat.gguf"));
3884 assert_eq!(model, "qwen2.5-coder:14b");
3885 }
3886
3887 #[test]
3889 fn detect_ollama_model_file_size_heuristic_tiny() {
3890 let file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
3892 let model = detect_ollama_model_from_path(file.path());
3894 assert_eq!(
3895 model, "qwen2.5-coder:0.5b",
3896 "Empty temp file should map to 0.5b via file size heuristic"
3897 );
3898 }
3899
3900 #[test]
3906 fn qa_report_summary_all_passed_message() {
3907 let gates = vec![
3908 GateResult::passed("golden_output", "ok", None, None, Duration::from_secs(1)),
3909 GateResult::passed(
3910 "throughput",
3911 "150 tok/s",
3912 Some(150.0),
3913 Some(100.0),
3914 Duration::from_secs(2),
3915 ),
3916 ];
3917 let passed = gates.iter().all(|g| g.passed);
3918 let summary = if passed {
3919 "All QA gates passed".to_string()
3920 } else {
3921 let failed: Vec<_> = gates
3922 .iter()
3923 .filter(|g| !g.passed && !g.skipped)
3924 .map(|g| g.name.as_str())
3925 .collect();
3926 format!("Failed gates: {}", failed.join(", "))
3927 };
3928 assert_eq!(summary, "All QA gates passed");
3929 }
3930
3931 #[test]
3933 fn qa_report_summary_lists_failed_gate_names() {
3934 let gates = vec![
3935 GateResult::passed("golden_output", "ok", None, None, Duration::from_secs(1)),
3936 GateResult::failed(
3937 "throughput",
3938 "too slow",
3939 Some(5.0),
3940 Some(100.0),
3941 Duration::from_secs(2),
3942 ),
3943 GateResult::failed(
3944 "ollama_parity",
3945 "too slow vs ollama",
3946 Some(0.1),
3947 Some(0.2),
3948 Duration::from_secs(3),
3949 ),
3950 GateResult::skipped("gpu_speedup", "no GPU"),
3951 ];
3952 let passed = gates.iter().all(|g| g.passed);
3953 assert!(!passed);
3954 let failed_names: Vec<_> = gates
3955 .iter()
3956 .filter(|g| !g.passed && !g.skipped)
3957 .map(|g| g.name.as_str())
3958 .collect();
3959 let summary = format!("Failed gates: {}", failed_names.join(", "));
3960 assert_eq!(summary, "Failed gates: throughput, ollama_parity");
3961 }
3962
3963 #[test]
3965 fn qa_report_summary_skipped_only_is_passed() {
3966 let gates = vec![
3967 GateResult::skipped("golden_output", "no model"),
3968 GateResult::skipped("throughput", "no engine"),
3969 ];
3970 let passed = gates.iter().all(|g| g.passed);
3971 assert!(passed, "All-skipped should be passed");
3972 }
3973
3974 #[test]
3980 fn qa_config_with_safetensors_path() {
3981 let config = QaConfig {
3982 safetensors_path: Some(std::path::PathBuf::from("/models/qwen.safetensors")),
3983 ..Default::default()
3984 };
3985 assert_eq!(
3986 config.safetensors_path.as_deref(),
3987 Some(std::path::Path::new("/models/qwen.safetensors"))
3988 );
3989 }
3990
3991 #[test]
3994 fn qa_config_default_skip_contract_is_false() {
3995 let config = QaConfig::default();
3996 assert!(
3997 !config.skip_contract,
3998 "skip_contract must default to false to ensure tensor validation runs"
3999 );
4000 }
4001
4002 #[test]
4005 fn qa_config_all_skips_enabled() {
4006 let config = QaConfig {
4007 skip_golden: true,
4008 skip_throughput: true,
4009 skip_ollama: true,
4010 skip_gpu_speedup: true,
4011 skip_contract: true,
4012 skip_format_parity: true,
4013 ..Default::default()
4014 };
4015 assert!(config.skip_golden);
4016 assert!(config.skip_throughput);
4017 assert!(config.skip_ollama);
4018 assert!(config.skip_gpu_speedup);
4019 assert!(config.skip_contract);
4020 assert!(config.skip_format_parity);
4021 assert_eq!(config.iterations, 10);
4023 assert!((config.min_tps - 100.0).abs() < f64::EPSILON);
4024 }
4025
4026 #[test]
4029 fn qa_config_json_and_verbose_independent() {
4030 let config = QaConfig {
4031 json: true,
4032 verbose: true,
4033 ..Default::default()
4034 };
4035 assert!(config.json);
4036 assert!(config.verbose);
4037 }
4038
4039 #[test]
4041 fn qa_config_extreme_thresholds() {
4042 let config = QaConfig {
4043 min_tps: f64::MAX,
4044 min_speedup: 0.0,
4045 min_gpu_speedup: f64::MIN_POSITIVE,
4046 iterations: usize::MAX,
4047 warmup: 0,
4048 max_tokens: 1,
4049 ..Default::default()
4050 };
4051 assert_eq!(config.min_tps, f64::MAX);
4052 assert!((config.min_speedup).abs() < f64::EPSILON);
4053 assert_eq!(config.iterations, usize::MAX);
4054 assert_eq!(config.warmup, 0);
4055 assert_eq!(config.max_tokens, 1);
4056 }
4057
4058 #[test]
4065 fn gate_result_submillisecond_duration_truncates_to_zero() {
4066 let result = GateResult::passed(
4067 "fast",
4068 "blazing fast",
4069 None,
4070 None,
4071 Duration::from_micros(999),
4072 );
4073 assert_eq!(
4074 result.duration_ms, 0,
4075 "999 microseconds should truncate to 0ms"
4076 );
4077 }
4078
4079 #[test]
4081 fn gate_result_exact_one_millisecond() {
4082 let result = GateResult::passed("gate", "msg", None, None, Duration::from_millis(1));
4083 assert_eq!(result.duration_ms, 1);
4084 }
4085
4086 #[test]
4088 fn gate_result_nanos_to_millis_truncation() {
4089 let result = GateResult::failed("gate", "msg", None, None, Duration::from_nanos(1_500_000));
4090 assert_eq!(
4091 result.duration_ms, 1,
4092 "1.5ms in nanos should truncate to 1ms"
4093 );
4094 }
4095
4096 #[test]
4103 fn gate_result_skipped_message_format_contract() {
4104 let reasons = [
4105 "No GPU available",
4106 "Ollama not available (start with: ollama serve)",
4107 "Requires 'inference' feature",
4108 "Non-GGUF format (F32/F16 lacks fused kernels for Ollama parity)",
4109 "No --safetensors-path provided",
4110 "Skipped by --skip-golden",
4111 ];
4112 for reason in &reasons {
4113 let result = GateResult::skipped("test", reason);
4114 assert!(
4115 result.message.starts_with("Skipped: "),
4116 "Skipped message must start with 'Skipped: ', got: '{}'",
4117 result.message
4118 );
4119 assert!(
4120 result.message.ends_with(reason),
4121 "Skipped message must end with reason"
4122 );
4123 }
4124 }
4125
4126 #[test]
4128 fn gate_result_passed_preserves_value_and_threshold() {
4129 let result = GateResult::passed(
4130 "throughput",
4131 "150.0 tok/s >= 100.0 tok/s",
4132 Some(150.5),
4133 Some(100.0),
4134 Duration::from_secs(1),
4135 );
4136 assert_eq!(result.value, Some(150.5));
4137 assert_eq!(result.threshold, Some(100.0));
4138 }
4139
4140 #[test]
4142 fn gate_result_failed_preserves_value_and_threshold() {
4143 let result = GateResult::failed(
4144 "ollama_parity",
4145 "0.15x < 0.2x",
4146 Some(0.15),
4147 Some(0.2),
4148 Duration::from_secs(5),
4149 );
4150 assert_eq!(result.value, Some(0.15));
4151 assert_eq!(result.threshold, Some(0.2));
4152 assert!(!result.passed);
4153 }
4154
4155 #[test]
4162 fn gate_result_deserialize_explicit_null_values() {
4163 let json = r#"{
4164 "name": "throughput",
4165 "passed": true,
4166 "message": "ok",
4167 "value": null,
4168 "threshold": null,
4169 "duration_ms": 100,
4170 "skipped": false
4171 }"#;
4172 let result: GateResult = serde_json::from_str(json).expect("deserialize with nulls");
4173 assert!(result.value.is_none());
4174 assert!(result.threshold.is_none());
4175 }
4176
4177 #[test]
4179 fn gate_result_deserialize_missing_optional_fields() {
4180 let json = r#"{
4181 "name": "contract",
4182 "passed": false,
4183 "message": "validation error",
4184 "duration_ms": 50,
4185 "skipped": false
4186 }"#;
4187 let result: GateResult = serde_json::from_str(json).expect("deserialize missing optionals");
4188 assert_eq!(result.name, "contract");
4189 assert!(!result.passed);
4190 assert!(result.value.is_none());
4191 assert!(result.threshold.is_none());
4192 }
4193
4194 #[test]
4201 fn qa_report_empty_gates_is_valid() {
4202 let report = QaReport {
4203 model: "empty.gguf".to_string(),
4204 passed: true,
4205 gates: vec![],
4206 total_duration_ms: 0,
4207 timestamp: "2026-02-06T00:00:00Z".to_string(),
4208 summary: "No gates run".to_string(),
4209 gates_executed: 0,
4210 gates_skipped: 0,
4211 system_info: None,
4212 };
4213 assert!(report.passed);
4214 assert!(report.gates.is_empty());
4215 let json = serde_json::to_string(&report).expect("serialize empty report");
4216 let restored: QaReport = serde_json::from_str(&json).expect("deserialize empty report");
4217 assert!(restored.gates.is_empty());
4218 }
4219
4220 #[test]
4222 fn qa_report_many_gates_serialization() {
4223 let gates: Vec<GateResult> = (0..100)
4224 .map(|i| {
4225 GateResult::passed(
4226 &format!("gate_{i}"),
4227 &format!("Gate {i} passed"),
4228 Some(i as f64),
4229 Some(0.0),
4230 Duration::from_millis(i as u64),
4231 )
4232 })
4233 .collect();
4234 let report = QaReport {
4235 model: "stress.gguf".to_string(),
4236 passed: true,
4237 gates,
4238 total_duration_ms: 4950,
4239 timestamp: "2026-02-06T00:00:00Z".to_string(),
4240 summary: "All passed".to_string(),
4241 gates_executed: 0,
4242 gates_skipped: 0,
4243 system_info: None,
4244 };
4245 let json = serde_json::to_string(&report).expect("serialize many gates");
4246 let restored: QaReport = serde_json::from_str(&json).expect("deserialize many gates");
4247 assert_eq!(restored.gates.len(), 100);
4248 }
4249
4250 #[test]
4257 fn detect_ollama_model_output_format_contract() {
4258 let test_paths = [
4259 "/tmp/model-0.5b.gguf",
4260 "/tmp/model-1.5b.gguf",
4261 "/tmp/model-3b.gguf",
4262 "/tmp/model-7b.gguf",
4263 "/tmp/model-14b.gguf",
4264 "/tmp/model-32b.gguf",
4265 ];
4266 for path in &test_paths {
4267 let model = detect_ollama_model_from_path(Path::new(path));
4268 assert!(
4269 model.starts_with("qwen2.5-coder:"),
4270 "Model tag must start with 'qwen2.5-coder:', got: {model}"
4271 );
4272 let size = model.strip_prefix("qwen2.5-coder:").expect("strip prefix");
4273 assert!(
4274 ["0.5b", "1.5b", "3b", "7b", "14b", "32b"].contains(&size),
4275 "Size must be one of the known sizes, got: {size}"
4276 );
4277 }
4278 }
4279
4280 #[test]
4282 fn detect_ollama_model_directory_path() {
4283 let model = detect_ollama_model_from_path(Path::new("/tmp/models/"));
4284 assert!(
4286 model.starts_with("qwen2.5-coder:"),
4287 "Directory path should produce valid tag: {model}"
4288 );
4289 }
4290
4291 #[test]
4297 fn failed_gates_summary_multiple_failures() {
4298 let gates = vec![
4299 GateResult::failed("golden_output", "wrong", None, None, Duration::from_secs(1)),
4300 GateResult::failed(
4301 "throughput",
4302 "slow",
4303 Some(1.0),
4304 Some(100.0),
4305 Duration::from_secs(2),
4306 ),
4307 GateResult::failed(
4308 "tensor_contract",
4309 "violations",
4310 Some(5.0),
4311 Some(0.0),
4312 Duration::from_secs(1),
4313 ),
4314 GateResult::skipped("ollama_parity", "not available"),
4315 GateResult::passed(
4316 "gpu_speedup",
4317 "ok",
4318 Some(3.0),
4319 Some(2.0),
4320 Duration::from_secs(4),
4321 ),
4322 ];
4323 let failed_names: Vec<&str> = gates
4324 .iter()
4325 .filter(|g| !g.passed && !g.skipped)
4326 .map(|g| g.name.as_str())
4327 .collect();
4328 assert_eq!(failed_names.len(), 3);
4329 let summary = format!("Failed gates: {}", failed_names.join(", "));
4330 assert!(summary.contains("golden_output"));
4331 assert!(summary.contains("throughput"));
4332 assert!(summary.contains("tensor_contract"));
4333 assert!(
4334 !summary.contains("ollama_parity"),
4335 "Skipped gate should not appear in failures"
4336 );
4337 assert!(
4338 !summary.contains("gpu_speedup"),
4339 "Passed gate should not appear in failures"
4340 );
4341 }
4342
4343 #[test]
4345 fn failed_gates_summary_no_failures() {
4346 let gates = vec![
4347 GateResult::passed("golden_output", "ok", None, None, Duration::from_secs(1)),
4348 GateResult::skipped("ollama_parity", "not available"),
4349 ];
4350 let passed = gates.iter().all(|g| g.passed);
4351 assert!(passed);
4352 let summary = if passed {
4353 "All QA gates passed".to_string()
4354 } else {
4355 unreachable!()
4356 };
4357 assert_eq!(summary, "All QA gates passed");
4358 }
4359
4360 #[test]
4368 fn gate_result_nan_value_is_nan() {
4369 let result = GateResult::passed(
4370 "test",
4371 "NaN test",
4372 Some(f64::NAN),
4373 Some(100.0),
4374 Duration::from_secs(1),
4375 );
4376 assert!(
4377 result.value.expect("should have value").is_nan(),
4378 "NaN value must be preserved in GateResult"
4379 );
4380 assert!(
4381 !result.value.expect("should have value").is_finite(),
4382 "NaN is not finite"
4383 );
4384 }
4385
4386 #[test]
4389 fn gate_result_infinity_value_is_infinite() {
4390 let result = GateResult::failed(
4391 "test",
4392 "Inf test",
4393 Some(f64::INFINITY),
4394 Some(100.0),
4395 Duration::from_secs(1),
4396 );
4397 assert!(
4398 result.value.expect("should have value").is_infinite(),
4399 "Infinity must be preserved in GateResult"
4400 );
4401 }
4402
4403 #[test]
4405 fn gate_result_neg_infinity_threshold() {
4406 let result = GateResult::passed(
4407 "test",
4408 "neg inf threshold",
4409 Some(0.0),
4410 Some(f64::NEG_INFINITY),
4411 Duration::from_secs(1),
4412 );
4413 assert!(result
4414 .threshold
4415 .expect("should have threshold")
4416 .is_infinite());
4417 }
4418
4419 #[test]
4425 fn qa_config_clone_with_safetensors_path() {
4426 let config = QaConfig {
4427 safetensors_path: Some(std::path::PathBuf::from("/deep/clone/test.safetensors")),
4428 min_tps: 42.0,
4429 json: true,
4430 verbose: true,
4431 ..Default::default()
4432 };
4433 let cloned = config.clone();
4434 assert_eq!(cloned.safetensors_path, config.safetensors_path);
4435 assert!((cloned.min_tps - 42.0).abs() < f64::EPSILON);
4436 assert!(cloned.json);
4437 assert!(cloned.verbose);
4438 }
4439
4440 #[test]
4449 fn contract_failure_summary_single_failure() {
4450 let failures = vec!["embed_tokens.weight: density below threshold".to_string()];
4451 let summary = if failures.len() <= 3 {
4452 failures.join("; ")
4453 } else {
4454 format!(
4455 "{}; ... and {} more",
4456 failures[..3].join("; "),
4457 failures.len() - 3
4458 )
4459 };
4460 assert_eq!(summary, "embed_tokens.weight: density below threshold");
4461 assert!(!summary.contains("more"));
4462 }
4463
4464 #[test]
4466 fn contract_failure_summary_three_failures_no_truncation() {
4467 let failures = vec![
4468 "layer.0: NaN detected".to_string(),
4469 "layer.1: Inf detected".to_string(),
4470 "layer.2: zero density".to_string(),
4471 ];
4472 let summary = if failures.len() <= 3 {
4473 failures.join("; ")
4474 } else {
4475 format!(
4476 "{}; ... and {} more",
4477 failures[..3].join("; "),
4478 failures.len() - 3
4479 )
4480 };
4481 assert_eq!(
4482 summary,
4483 "layer.0: NaN detected; layer.1: Inf detected; layer.2: zero density"
4484 );
4485 assert!(!summary.contains("more"));
4486 }
4487
4488 #[test]
4490 fn contract_failure_summary_four_failures_truncates() {
4491 let failures = vec![
4492 "a: fail".to_string(),
4493 "b: fail".to_string(),
4494 "c: fail".to_string(),
4495 "d: fail".to_string(),
4496 ];
4497 let summary = if failures.len() <= 3 {
4498 failures.join("; ")
4499 } else {
4500 format!(
4501 "{}; ... and {} more",
4502 failures[..3].join("; "),
4503 failures.len() - 3
4504 )
4505 };
4506 assert!(summary.contains("a: fail; b: fail; c: fail"));
4507 assert!(summary.ends_with("; ... and 1 more"));
4508 }
4509
4510 #[test]
4512 fn contract_failure_summary_ten_failures_truncates() {
4513 let failures: Vec<String> = (0..10).map(|i| format!("tensor_{i}: violation")).collect();
4514 let summary = if failures.len() <= 3 {
4515 failures.join("; ")
4516 } else {
4517 format!(
4518 "{}; ... and {} more",
4519 failures[..3].join("; "),
4520 failures.len() - 3
4521 )
4522 };
4523 assert!(summary.contains("tensor_0: violation"));
4524 assert!(summary.contains("tensor_1: violation"));
4525 assert!(summary.contains("tensor_2: violation"));
4526 assert!(summary.ends_with("; ... and 7 more"));
4527 assert!(!summary.contains("tensor_3"));
4528 }
4529
4530 #[test]
4532 fn contract_failure_summary_zero_failures() {
4533 let failures: Vec<String> = vec![];
4534 let summary = if failures.len() <= 3 {
4535 failures.join("; ")
4536 } else {
4537 format!(
4538 "{}; ... and {} more",
4539 failures[..3].join("; "),
4540 failures.len() - 3
4541 )
4542 };
4543 assert!(summary.is_empty());
4544 }
4545
4546 #[test]
4552 fn detect_ollama_model_3b_standalone() {
4553 let model = detect_ollama_model_from_path(Path::new("/tmp/model3b.gguf"));
4554 assert_eq!(model, "qwen2.5-coder:3b");
4555 }
4556
4557 #[test]
4559 fn detect_ollama_model_dash_3b() {
4560 let model = detect_ollama_model_from_path(Path::new("/tmp/model-3b-chat.gguf"));
4561 assert_eq!(model, "qwen2.5-coder:3b");
4562 }
4563
4564 #[test]
4566 fn detect_ollama_model_dash_7b() {
4567 let model = detect_ollama_model_from_path(Path::new("/tmp/llama-7b-q4_k_m.gguf"));
4568 assert_eq!(model, "qwen2.5-coder:7b");
4569 }
4570
4571 #[test]
4573 fn detect_ollama_model_mixed_case_0_5b() {
4574 let model = detect_ollama_model_from_path(Path::new("/tmp/Qwen2.5-Coder-0.5B-Q4.gguf"));
4575 assert_eq!(model, "qwen2.5-coder:0.5b");
4576 }
4577
4578 #[test]
4580 fn detect_ollama_model_dash_32b() {
4581 let model = detect_ollama_model_from_path(Path::new("/tmp/qwen-32b-instruct.gguf"));
4582 assert_eq!(model, "qwen2.5-coder:32b");
4583 }
4584
4585 #[test]
4587 fn detect_ollama_model_dash_14b() {
4588 let model = detect_ollama_model_from_path(Path::new("/tmp/model-14b.gguf"));
4589 assert_eq!(model, "qwen2.5-coder:14b");
4590 }
4591
4592 #[test]
4594 fn detect_ollama_model_empty_string_path() {
4595 let model = detect_ollama_model_from_path(Path::new(""));
4596 assert!(
4598 model.starts_with("qwen2.5-coder:"),
4599 "Empty path should produce valid tag: {model}"
4600 );
4601 }
4602
4603 #[test]
4605 fn detect_ollama_model_1_5b_before_3b() {
4606 let model = detect_ollama_model_from_path(Path::new("/tmp/model-1.5b-3b.gguf"));
4607 assert_eq!(
4608 model, "qwen2.5-coder:1.5b",
4609 "1.5b should be matched before 3b in priority order"
4610 );
4611 }
4612
4613 #[test]
4615 fn detect_ollama_model_underscore_1_5b_variant() {
4616 let model = detect_ollama_model_from_path(Path::new("/cache/qwen2-1_5b-q4_k.gguf"));
4617 assert_eq!(model, "qwen2.5-coder:1.5b");
4618 }
4619
4620 #[test]
4622 fn detect_ollama_model_underscore_0_5b() {
4623 let model = detect_ollama_model_from_path(Path::new("/tmp/model-0_5b-instruct.gguf"));
4624 assert_eq!(model, "qwen2.5-coder:0.5b");
4625 }
4626
4627 #[test]
4633 fn gate_result_json_value_present_threshold_missing() {
4634 let json = r#"{
4635 "name": "contract",
4636 "passed": true,
4637 "message": "50 tensors ok",
4638 "value": 50.0,
4639 "duration_ms": 100,
4640 "skipped": false
4641 }"#;
4642 let result: GateResult = serde_json::from_str(json).expect("deserialize");
4643 assert_eq!(result.value, Some(50.0));
4644 assert!(result.threshold.is_none());
4645 }
4646
4647 #[test]
4649 fn gate_result_json_threshold_present_value_missing() {
4650 let json = r#"{
4651 "name": "throughput",
4652 "passed": false,
4653 "message": "too slow",
4654 "threshold": 100.0,
4655 "duration_ms": 5000,
4656 "skipped": false
4657 }"#;
4658 let result: GateResult = serde_json::from_str(json).expect("deserialize");
4659 assert!(result.value.is_none());
4660 assert_eq!(result.threshold, Some(100.0));
4661 }
4662
4663 #[test]
4665 fn gate_result_json_includes_value_when_some() {
4666 let result = GateResult::passed(
4667 "throughput",
4668 "150 tok/s",
4669 Some(150.0),
4670 None,
4671 Duration::from_secs(1),
4672 );
4673 let json = serde_json::to_string(&result).expect("serialize");
4674 assert!(
4675 json.contains("\"value\""),
4676 "value should be present: {json}"
4677 );
4678 assert!(
4679 !json.contains("\"threshold\""),
4680 "threshold should be omitted when None: {json}"
4681 );
4682 }
4683
4684 #[test]
4686 fn gate_result_json_includes_both_value_and_threshold() {
4687 let result = GateResult::failed(
4688 "ollama_parity",
4689 "0.1x < 0.2x",
4690 Some(0.1),
4691 Some(0.2),
4692 Duration::from_secs(10),
4693 );
4694 let json = serde_json::to_string(&result).expect("serialize");
4695 assert!(json.contains("\"value\""));
4696 assert!(json.contains("\"threshold\""));
4697 assert!(json.contains("0.1"));
4698 assert!(json.contains("0.2"));
4699 }
4700
4701 #[test]
4707 fn qa_report_json_pretty_print_format() {
4708 let report = QaReport {
4709 model: "test.gguf".to_string(),
4710 passed: true,
4711 gates: vec![GateResult::passed(
4712 "contract",
4713 "ok",
4714 Some(10.0),
4715 Some(0.0),
4716 Duration::from_millis(50),
4717 )],
4718 total_duration_ms: 50,
4719 timestamp: "2026-02-07T00:00:00Z".to_string(),
4720 summary: "All passed".to_string(),
4721 gates_executed: 0,
4722 gates_skipped: 0,
4723 system_info: None,
4724 };
4725 let json = serde_json::to_string_pretty(&report).expect("pretty serialize");
4726 assert!(json.contains('\n'), "Pretty JSON should contain newlines");
4727 assert!(
4728 json.contains(" "),
4729 "Pretty JSON should contain indentation"
4730 );
4731 assert!(json.contains("\"model\""));
4732 assert!(json.contains("\"gates\""));
4733 assert!(json.contains("\"summary\""));
4734 }
4735
4736 #[test]
4738 fn qa_report_json_to_string_pretty_never_panics() {
4739 let report = QaReport {
4740 model: String::new(),
4741 passed: false,
4742 gates: vec![
4743 GateResult::skipped("a", "skip"),
4744 GateResult::failed("b", "fail", Some(f64::NAN), None, Duration::from_secs(0)),
4745 ],
4746 total_duration_ms: 0,
4747 timestamp: String::new(),
4748 summary: String::new(),
4749 gates_executed: 0,
4750 gates_skipped: 0,
4751 system_info: None,
4752 };
4753 let json = serde_json::to_string_pretty(&report).unwrap_or_default();
4755 assert!(!json.is_empty());
4757 }
4758
4759 #[test]
4766 fn run_config_building_none_uses_defaults() {
4767 let min_tps: Option<f64> = None;
4768 let min_speedup: Option<f64> = None;
4769 let min_gpu_speedup: Option<f64> = None;
4770 let config = QaConfig {
4771 min_tps: min_tps.unwrap_or(100.0),
4772 min_speedup: min_speedup.unwrap_or(0.2),
4773 min_gpu_speedup: min_gpu_speedup.unwrap_or(2.0),
4774 ..Default::default()
4775 };
4776 assert!((config.min_tps - 100.0).abs() < f64::EPSILON);
4777 assert!((config.min_speedup - 0.2).abs() < f64::EPSILON);
4778 assert!((config.min_gpu_speedup - 2.0).abs() < f64::EPSILON);
4779 }
4780
4781 #[test]
4783 fn run_config_building_some_overrides_defaults() {
4784 let min_tps: Option<f64> = Some(50.0);
4785 let min_speedup: Option<f64> = Some(1.5);
4786 let min_gpu_speedup: Option<f64> = Some(3.0);
4787 let config = QaConfig {
4788 min_tps: min_tps.unwrap_or(100.0),
4789 min_speedup: min_speedup.unwrap_or(0.2),
4790 min_gpu_speedup: min_gpu_speedup.unwrap_or(2.0),
4791 ..Default::default()
4792 };
4793 assert!((config.min_tps - 50.0).abs() < f64::EPSILON);
4794 assert!((config.min_speedup - 1.5).abs() < f64::EPSILON);
4795 assert!((config.min_gpu_speedup - 3.0).abs() < f64::EPSILON);
4796 }
4797
4798 #[test]
4804 fn print_gate_result_zero_duration_formatting() {
4805 let result = GateResult::passed(
4806 "tensor_contract",
4807 "0 tensors",
4808 Some(0.0),
4809 Some(0.0),
4810 Duration::from_millis(0),
4811 );
4812 print_gate_result(&result);
4814 }
4815
4816 #[test]
4818 fn print_gate_result_large_duration_formatting() {
4819 let result = GateResult::passed(
4820 "throughput",
4821 "ok",
4822 Some(100.0),
4823 Some(50.0),
4824 Duration::from_secs(3600),
4825 );
4826 assert_eq!(result.duration_ms, 3_600_000);
4828 print_gate_result(&result);
4829 }
4830
4831 #[test]
4833 fn print_gate_result_subsecond_duration_formatting() {
4834 let result = GateResult::passed(
4835 "golden_output",
4836 "2 cases passed",
4837 Some(2.0),
4838 Some(2.0),
4839 Duration::from_millis(250),
4840 );
4841 assert_eq!(result.duration_ms, 250);
4842 print_gate_result(&result);
4844 }
4845
4846 #[test]
4852 fn qa_report_all_six_canonical_gates_roundtrip() {
4853 let report = QaReport {
4854 model: "/models/qwen2-0.5b-q4_k.gguf".to_string(),
4855 passed: false,
4856 gates: vec![
4857 GateResult::passed(
4858 "tensor_contract",
4859 "50 tensors ok",
4860 Some(50.0),
4861 Some(0.0),
4862 Duration::from_millis(100),
4863 ),
4864 GateResult::passed(
4865 "golden_output",
4866 "2 test cases passed",
4867 Some(2.0),
4868 Some(2.0),
4869 Duration::from_millis(5000),
4870 ),
4871 GateResult::failed(
4872 "throughput",
4873 "5 tok/s < 100 tok/s",
4874 Some(5.0),
4875 Some(100.0),
4876 Duration::from_millis(10000),
4877 ),
4878 GateResult::skipped("ollama_parity", "Ollama not available"),
4879 GateResult::skipped("gpu_speedup", "CUDA not available"),
4880 GateResult::skipped("format_parity", "No --safetensors-path provided"),
4881 ],
4882 total_duration_ms: 15100,
4883 timestamp: "2026-02-07T12:00:00Z".to_string(),
4884 summary: "Failed gates: throughput".to_string(),
4885 gates_executed: 0,
4886 gates_skipped: 0,
4887 system_info: None,
4888 };
4889 let json = serde_json::to_string_pretty(&report).expect("serialize");
4890 let restored: QaReport = serde_json::from_str(&json).expect("deserialize");
4891 assert_eq!(restored.gates.len(), 6);
4892 assert!(!restored.passed);
4893 assert!(restored.gates[0].passed && !restored.gates[0].skipped);
4895 assert!(restored.gates[1].passed && !restored.gates[1].skipped);
4896 assert!(!restored.gates[2].passed && !restored.gates[2].skipped);
4897 assert!(restored.gates[3].skipped);
4898 assert!(restored.gates[4].skipped);
4899 assert!(restored.gates[5].skipped);
4900 }
4901
4902 #[test]
4908 fn gate_result_passed_message_stored_verbatim() {
4909 let msg = "150.0 tok/s >= 100.0 tok/s threshold";
4910 let result = GateResult::passed(
4911 "throughput",
4912 msg,
4913 Some(150.0),
4914 Some(100.0),
4915 Duration::from_secs(1),
4916 );
4917 assert_eq!(result.message, msg);
4918 }
4919
4920 #[test]
4922 fn gate_result_failed_message_stored_verbatim() {
4923 let msg = "5.0 tok/s < 100.0 tok/s threshold";
4924 let result = GateResult::failed(
4925 "throughput",
4926 msg,
4927 Some(5.0),
4928 Some(100.0),
4929 Duration::from_secs(1),
4930 );
4931 assert_eq!(result.message, msg);
4932 }
4933
4934 #[test]
4936 fn gate_result_skipped_message_exact_format() {
4937 let result = GateResult::skipped("gpu_speedup", "CUDA not available");
4938 assert_eq!(result.message, "Skipped: CUDA not available");
4939 }
4940
4941 #[test]
4943 fn gate_result_skipped_empty_reason() {
4944 let result = GateResult::skipped("test", "");
4945 assert_eq!(result.message, "Skipped: ");
4946 assert!(result.skipped);
4947 }
4948
4949 #[test]
4951 fn gate_result_empty_name() {
4952 let result = GateResult::passed("", "ok", None, None, Duration::from_secs(0));
4953 assert_eq!(result.name, "");
4954 assert!(result.passed);
4955 }
4956
4957 #[test]
4963 fn gate_result_negative_value() {
4964 let result = GateResult::failed(
4965 "gpu_speedup",
4966 "-0.5x slower",
4967 Some(-0.5),
4968 Some(2.0),
4969 Duration::from_secs(1),
4970 );
4971 assert_eq!(result.value, Some(-0.5));
4972 assert!(!result.passed);
4973 }
4974
4975 #[test]
4977 fn gate_result_zero_value() {
4978 let result = GateResult::failed(
4979 "throughput",
4980 "0 tok/s",
4981 Some(0.0),
4982 Some(100.0),
4983 Duration::from_secs(1),
4984 );
4985 assert_eq!(result.value, Some(0.0));
4986 assert_eq!(result.threshold, Some(100.0));
4987 }
4988
4989 #[test]
4991 fn gate_result_epsilon_value() {
4992 let result = GateResult::passed(
4993 "throughput",
4994 "barely passing",
4995 Some(f64::MIN_POSITIVE),
4996 Some(0.0),
4997 Duration::from_secs(1),
4998 );
4999 assert_eq!(result.value, Some(f64::MIN_POSITIVE));
5000 assert!(result.passed);
5001 }
5002
5003 #[test]
5009 fn qa_report_unicode_model_path() {
5010 let report = QaReport {
5011 model: "/modelos/modelo_espa\u{00f1}ol.gguf".to_string(),
5012 passed: true,
5013 gates: vec![],
5014 total_duration_ms: 0,
5015 timestamp: "2026-02-07T00:00:00Z".to_string(),
5016 summary: "ok".to_string(),
5017 gates_executed: 0,
5018 gates_skipped: 0,
5019 system_info: None,
5020 };
5021 let json = serde_json::to_string(&report).expect("serialize unicode path");
5022 let restored: QaReport = serde_json::from_str(&json).expect("deserialize unicode path");
5023 assert!(restored.model.contains("espa\u{00f1}ol"));
5024 }
5025
5026 #[test]
5028 fn qa_report_long_model_path() {
5029 let long_path = format!("/very/{}/model.gguf", "deep/".repeat(100));
5030 let report = QaReport {
5031 model: long_path.clone(),
5032 passed: true,
5033 gates: vec![],
5034 total_duration_ms: 0,
5035 timestamp: "2026-02-07T00:00:00Z".to_string(),
5036 summary: "ok".to_string(),
5037 gates_executed: 0,
5038 gates_skipped: 0,
5039 system_info: None,
5040 };
5041 let json = serde_json::to_string(&report).expect("serialize long path");
5042 let restored: QaReport = serde_json::from_str(&json).expect("deserialize long path");
5043 assert_eq!(restored.model, long_path);
5044 }
5045
5046 #[test]
5048 fn qa_report_empty_model_path() {
5049 let report = QaReport {
5050 model: String::new(),
5051 passed: true,
5052 gates: vec![],
5053 total_duration_ms: 0,
5054 timestamp: "2026-02-07T00:00:00Z".to_string(),
5055 summary: "ok".to_string(),
5056 gates_executed: 0,
5057 gates_skipped: 0,
5058 system_info: None,
5059 };
5060 let json = serde_json::to_string(&report).expect("serialize empty model");
5061 let restored: QaReport = serde_json::from_str(&json).expect("deserialize empty model");
5062 assert!(restored.model.is_empty());
5063 }
5064
5065 #[test]
5071 fn qa_report_all_gates_failed() {
5072 let gates = vec![
5073 GateResult::failed(
5074 "tensor_contract",
5075 "violations",
5076 Some(5.0),
5077 Some(0.0),
5078 Duration::from_secs(1),
5079 ),
5080 GateResult::failed(
5081 "golden_output",
5082 "wrong output",
5083 None,
5084 None,
5085 Duration::from_secs(2),
5086 ),
5087 GateResult::failed(
5088 "throughput",
5089 "too slow",
5090 Some(1.0),
5091 Some(100.0),
5092 Duration::from_secs(3),
5093 ),
5094 ];
5095 let passed = gates.iter().all(|g| g.passed);
5096 assert!(!passed);
5097 let failed_names: Vec<&str> = gates
5098 .iter()
5099 .filter(|g| !g.passed && !g.skipped)
5100 .map(|g| g.name.as_str())
5101 .collect();
5102 assert_eq!(failed_names.len(), 3);
5103 let summary = format!("Failed gates: {}", failed_names.join(", "));
5104 assert_eq!(
5105 summary,
5106 "Failed gates: tensor_contract, golden_output, throughput"
5107 );
5108 }
5109
5110 #[test]
5112 fn qa_report_single_pass_rest_skipped() {
5113 let gates = vec![
5114 GateResult::passed(
5115 "tensor_contract",
5116 "ok",
5117 Some(10.0),
5118 Some(0.0),
5119 Duration::from_secs(1),
5120 ),
5121 GateResult::skipped("golden_output", "no engine"),
5122 GateResult::skipped("throughput", "no engine"),
5123 GateResult::skipped("ollama_parity", "not available"),
5124 GateResult::skipped("gpu_speedup", "no GPU"),
5125 GateResult::skipped("format_parity", "no path"),
5126 ];
5127 let passed = gates.iter().all(|g| g.passed);
5128 assert!(passed);
5129 }
5130
5131 #[test]
5137 fn print_gate_result_format_parity_display_name() {
5138 let result = GateResult::passed(
5139 "format_parity",
5140 "GGUF argmax=42 == SafeTensors argmax=42",
5141 Some(42.0),
5142 Some(42.0),
5143 Duration::from_millis(8000),
5144 );
5145 print_gate_result(&result);
5146 }
5147
5148 #[test]
5150 fn print_gate_result_gpu_speedup_failed() {
5151 let result = GateResult::failed(
5152 "gpu_speedup",
5153 "GPU 1.2x faster than CPU < 2.0x threshold",
5154 Some(1.2),
5155 Some(2.0),
5156 Duration::from_millis(15000),
5157 );
5158 print_gate_result(&result);
5159 }
5160
5161 #[test]
5167 fn qa_config_zero_iterations_and_warmup() {
5168 let config = QaConfig {
5169 iterations: 0,
5170 warmup: 0,
5171 max_tokens: 0,
5172 ..Default::default()
5173 };
5174 assert_eq!(config.iterations, 0);
5175 assert_eq!(config.warmup, 0);
5176 assert_eq!(config.max_tokens, 0);
5177 }
5178
5179 #[test]
5181 fn qa_config_large_max_tokens() {
5182 let config = QaConfig {
5183 max_tokens: 1_000_000,
5184 ..Default::default()
5185 };
5186 assert_eq!(config.max_tokens, 1_000_000);
5187 }
5188
5189 #[test]
5195 fn gate_result_serialize_large_value() {
5196 let result = GateResult::passed(
5197 "throughput",
5198 "very fast",
5199 Some(999_999.99),
5200 Some(100.0),
5201 Duration::from_secs(1),
5202 );
5203 let json = serde_json::to_string(&result).expect("serialize large value");
5204 assert!(json.contains("999999.99"));
5205 }
5206
5207 #[test]
5209 fn gate_result_serialize_tiny_value() {
5210 let result = GateResult::failed(
5211 "throughput",
5212 "basically zero",
5213 Some(0.000_001),
5214 Some(100.0),
5215 Duration::from_secs(1),
5216 );
5217 let json = serde_json::to_string(&result).expect("serialize tiny value");
5218 let restored: GateResult = serde_json::from_str(&json).expect("deserialize tiny value");
5220 assert!((restored.value.expect("has value") - 0.000_001).abs() < 1e-10);
5221 }
5222
5223 #[test]
5229 fn qa_report_deserialize_ignores_unknown_fields() {
5230 let json = r#"{
5231 "model": "test.gguf",
5232 "passed": true,
5233 "gates": [],
5234 "total_duration_ms": 100,
5235 "timestamp": "2026-02-07T00:00:00Z",
5236 "summary": "ok",
5237 "extra_field": "should be ignored",
5238 "another_extra": 42
5239 }"#;
5240 let report: QaReport = serde_json::from_str(json).expect("deserialize with extras");
5241 assert_eq!(report.model, "test.gguf");
5242 assert!(report.passed);
5243 }
5244
5245 #[test]
5247 fn gate_result_deserialize_ignores_unknown_fields() {
5248 let json = r#"{
5249 "name": "test",
5250 "passed": true,
5251 "message": "ok",
5252 "duration_ms": 100,
5253 "skipped": false,
5254 "future_field": "v2"
5255 }"#;
5256 let result: GateResult = serde_json::from_str(json).expect("deserialize with extras");
5257 assert_eq!(result.name, "test");
5258 assert!(result.passed);
5259 }
5260
5261 #[test]
5266 fn verify_output_rejects_empty() {
5267 let result = verify_output("", "test-001", &["4"]);
5268 assert!(matches!(result, OutputVerification::Fail { .. }));
5269 if let OutputVerification::Fail { reason } = result {
5270 assert!(reason.contains("Empty"), "Expected 'Empty', got: {reason}");
5271 }
5272 }
5273
5274 #[test]
5275 fn verify_output_rejects_whitespace_only() {
5276 let result = verify_output(" \n\t ", "test-002", &["4"]);
5277 assert!(matches!(result, OutputVerification::Fail { .. }));
5278 }
5279
5280 #[test]
5281 fn verify_output_rejects_garbage_fffd() {
5282 let result = verify_output("The answer is \u{FFFD}\u{FFFD}", "test-003", &["4"]);
5283 assert!(matches!(result, OutputVerification::Fail { .. }));
5284 if let OutputVerification::Fail { reason } = result {
5285 assert!(
5286 reason.contains("Garbage"),
5287 "Expected 'Garbage', got: {reason}"
5288 );
5289 }
5290 }
5291
5292 #[test]
5293 fn verify_output_rejects_garbage_unk() {
5294 let result = verify_output("Hello [UNK] world", "test-004", &["Hello"]);
5295 assert!(matches!(result, OutputVerification::Fail { .. }));
5296 if let OutputVerification::Fail { reason } = result {
5297 assert!(
5298 reason.contains("Garbage"),
5299 "Expected 'Garbage', got: {reason}"
5300 );
5301 }
5302 }
5303
5304 #[test]
5305 fn verify_output_rejects_null_bytes() {
5306 let result = verify_output("Hello\0World", "test-005", &["Hello"]);
5307 assert!(matches!(result, OutputVerification::Fail { .. }));
5308 if let OutputVerification::Fail { reason } = result {
5309 assert!(
5310 reason.contains("null"),
5311 "Expected 'null bytes', got: {reason}"
5312 );
5313 }
5314 }
5315
5316 #[test]
5317 fn verify_output_rejects_missing_expected() {
5318 let result = verify_output("The answer is five", "test-006", &["4"]);
5319 assert!(matches!(result, OutputVerification::Fail { .. }));
5320 if let OutputVerification::Fail { reason } = result {
5321 assert!(
5322 reason.contains("Expected"),
5323 "Expected mention of pattern, got: {reason}"
5324 );
5325 }
5326 }
5327
5328 #[test]
5329 fn verify_output_accepts_correct() {
5330 let result = verify_output("The answer is 4.", "test-007", &["4"]);
5331 assert!(matches!(result, OutputVerification::Pass));
5332 }
5333
5334 #[test]
5335 fn verify_output_accepts_any_expected_pattern() {
5336 let result = verify_output("Hi there!", "test-008", &["Hello", "Hi", "Hey"]);
5337 assert!(matches!(result, OutputVerification::Pass));
5338 }
5339
5340 #[test]
5341 fn verify_output_case_insensitive() {
5342 let result = verify_output("HELLO WORLD", "test-009", &["hello"]);
5343 assert!(matches!(result, OutputVerification::Pass));
5344 }
5345
5346 #[test]
5347 fn verify_output_garbage_check_before_answer_check() {
5348 let result = verify_output("4 [UNK] answer", "test-010", &["4"]);
5350 assert!(matches!(result, OutputVerification::Fail { .. }));
5351 if let OutputVerification::Fail { reason } = result {
5352 assert!(
5353 reason.contains("Garbage"),
5354 "Garbage check must happen BEFORE answer check, got: {reason}"
5355 );
5356 }
5357 }
5358
5359 #[test]
5360 fn verify_output_no_expected_patterns_passes() {
5361 let result = verify_output("Some valid output", "test-011", &[]);
5363 assert!(matches!(result, OutputVerification::Pass));
5364 }
5365
5366 #[cfg(feature = "inference")]
5371 #[test]
5372 fn ollama_parity_grade_boundaries() {
5373 assert_eq!(ollama_parity_grade(0.0), "F");
5375 assert_eq!(ollama_parity_grade(0.3), "F");
5376 assert_eq!(ollama_parity_grade(0.49), "F");
5377 assert_eq!(ollama_parity_grade(0.5), "D");
5379 assert_eq!(ollama_parity_grade(0.64), "D");
5380 assert_eq!(ollama_parity_grade(0.74), "D");
5381 assert_eq!(ollama_parity_grade(0.75), "C");
5383 assert_eq!(ollama_parity_grade(0.99), "C");
5384 assert_eq!(ollama_parity_grade(1.0), "B");
5386 assert_eq!(ollama_parity_grade(1.49), "B");
5387 assert_eq!(ollama_parity_grade(1.5), "A");
5389 assert_eq!(ollama_parity_grade(1.99), "A");
5390 assert_eq!(ollama_parity_grade(2.0), "A+");
5392 assert_eq!(ollama_parity_grade(3.5), "A+");
5393 }
5394}