1#![allow(clippy::cast_possible_truncation)]
6
7use crate::command::{CommandRunner, RealCommandRunner};
8use crate::conversion::{ConversionConfig, ConversionExecutor, resolve_model_path};
9use crate::diagnostics::FailFastReporter;
10use crate::error::Result;
11use crate::evidence::{Evidence, EvidenceCollector, Outcome, PerformanceMetrics};
12use crate::integrity;
13use crate::layout_contract::{DEFAULT_CONTRACT_PATH, load_contract_from, validate_model};
14use crate::playbook::{OllamaParityConfig, Playbook};
15use apr_qa_gen::{Backend, Format, HfParityOracle, Modality, ModelId, QaScenario, Tolerance};
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18use std::time::Instant;
19
20fn parse_timing_ms(output: &str) -> Option<f64> {
22 for line in output.lines() {
24 let lower = line.to_lowercase();
25 if let Some(pos) = lower.find("completed in ") {
26 let after = &lower[pos + 13..];
27 if let Some(s_pos) = after.find('s') {
28 if let Ok(secs) = after[..s_pos].trim().parse::<f64>() {
29 return Some(secs * 1000.0);
30 }
31 }
32 }
33 }
34 None
35}
36
37fn parse_throughput(output: &str) -> Option<f64> {
39 if let Some(pos) = output.find("\"throughput_tps\":") {
41 let after = &output[pos + 17..];
42 let end = after.find(|c: char| !c.is_ascii_digit() && c != '.')?;
43 after[..end].parse::<f64>().ok()
44 } else {
45 None
46 }
47}
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
51pub enum FailurePolicy {
52 StopOnFirst,
54 #[default]
56 StopOnP0,
57 CollectAll,
59 FailFast,
64}
65
66impl FailurePolicy {
67 #[must_use]
69 pub fn emit_diagnostic(&self) -> bool {
70 matches!(self, Self::FailFast)
71 }
72
73 #[must_use]
75 pub fn stops_on_any_failure(&self) -> bool {
76 matches!(self, Self::StopOnFirst | Self::FailFast)
77 }
78}
79
80#[derive(Debug, Clone)]
82#[allow(clippy::struct_excessive_bools)]
83pub struct ExecutionConfig {
84 pub failure_policy: FailurePolicy,
86 pub default_timeout_ms: u64,
88 pub max_workers: usize,
90 pub dry_run: bool,
92 pub model_path: Option<String>,
94 pub no_gpu: bool,
96 pub run_conversion_tests: bool,
98 pub run_differential_tests: bool,
100 pub run_profile_ci: bool,
102 pub run_trace_payload: bool,
104 pub run_golden_rule_test: bool,
108 pub golden_reference_path: Option<String>,
110 pub lock_file_path: Option<String>,
112 pub check_integrity: bool,
114 pub warn_implicit_skips: bool,
116 pub run_hf_parity: bool,
118 pub hf_parity_corpus_path: Option<String>,
120 pub hf_parity_model_family: Option<String>,
122 pub output_dir: Option<String>,
125 pub run_contract_tests: bool,
127 pub run_ollama_parity: bool,
129}
130
131impl Default for ExecutionConfig {
132 fn default() -> Self {
133 Self {
134 failure_policy: FailurePolicy::default(),
135 default_timeout_ms: 60_000,
136 max_workers: 4,
137 dry_run: false,
138 model_path: None,
139 no_gpu: false,
140 run_conversion_tests: true, run_differential_tests: true, run_profile_ci: false, run_trace_payload: true, run_golden_rule_test: true, golden_reference_path: None,
146 lock_file_path: None,
147 check_integrity: false,
148 warn_implicit_skips: false,
149 run_hf_parity: false,
150 hf_parity_corpus_path: None,
151 hf_parity_model_family: None,
152 output_dir: Some("output".to_string()), run_contract_tests: true, run_ollama_parity: false, }
156 }
157}
158
159pub struct Executor {
161 config: ExecutionConfig,
162 collector: EvidenceCollector,
163 command_runner: Arc<dyn CommandRunner>,
164}
165
166impl std::fmt::Debug for Executor {
167 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
168 f.debug_struct("Executor")
169 .field("config", &self.config)
170 .field("collector", &self.collector)
171 .field("command_runner", &"<dyn CommandRunner>")
172 .finish()
173 }
174}
175
176impl Executor {
177 #[must_use]
179 pub fn new() -> Self {
180 Self {
181 config: ExecutionConfig::default(),
182 collector: EvidenceCollector::new(),
183 command_runner: Arc::new(RealCommandRunner::new()),
184 }
185 }
186
187 #[must_use]
189 pub fn with_config(config: ExecutionConfig) -> Self {
190 Self {
191 config,
192 collector: EvidenceCollector::new(),
193 command_runner: Arc::new(RealCommandRunner::new()),
194 }
195 }
196
197 #[must_use]
199 pub fn with_runner(config: ExecutionConfig, runner: Arc<dyn CommandRunner>) -> Self {
200 Self {
201 config,
202 collector: EvidenceCollector::new(),
203 command_runner: runner,
204 }
205 }
206
207 #[allow(clippy::too_many_lines)]
213 pub fn execute(&mut self, playbook: &Playbook) -> Result<ExecutionResult> {
214 let scenarios = playbook.generate_scenarios();
215 let total = scenarios.len();
216 let start = Instant::now();
217
218 if self.config.check_integrity {
220 if let Some(ref lock_path) = self.config.lock_file_path {
221 match crate::playbook::load_lock_file(lock_path) {
222 Ok(lock_file) => {
223 if let Err(e) = crate::playbook::verify_playbook_integrity(
224 lock_path,
225 &lock_file,
226 &playbook.name,
227 ) {
228 return Ok(ExecutionResult {
229 playbook_name: playbook.name.clone(),
230 total_scenarios: total,
231 passed: 0,
232 failed: total,
233 skipped: 0,
234 duration_ms: start.elapsed().as_millis() as u64,
235 gateway_failed: Some(format!("Integrity check failed: {e}")),
236 evidence: self.collector.clone(),
237 });
238 }
239 }
240 Err(e) => {
241 eprintln!("[WARN] Could not load lock file '{lock_path}': {e}");
242 }
243 }
244 }
245 }
246
247 if self.config.warn_implicit_skips {
249 let all_formats = vec![Format::Gguf, Format::SafeTensors, Format::Apr];
250 let skip_files = crate::playbook::find_skip_files(Path::new("."), &playbook.name);
251 let implicit =
252 crate::playbook::detect_implicit_skips(playbook, &all_formats, &skip_files);
253 for skip in &implicit {
254 eprintln!("[WARN] Implicit skip detected: {skip}");
255 }
256 }
257
258 if let Err(e) = self.check_gateways(playbook) {
260 return Ok(ExecutionResult {
261 playbook_name: playbook.name.clone(),
262 total_scenarios: total,
263 passed: 0,
264 failed: total,
265 skipped: 0,
266 duration_ms: start.elapsed().as_millis() as u64,
267 gateway_failed: Some(e.to_string()),
268 evidence: self.collector.clone(),
269 });
270 }
271
272 let (pull_passed, pull_failed) = if self.config.model_path.is_none() {
275 let model_id = playbook.model_id();
276 let (pp, pf, pulled_path) =
277 self.run_g0_pull_check(&playbook.model.hf_repo, &model_id);
278
279 if pf > 0 {
281 return Ok(ExecutionResult {
282 playbook_name: playbook.name.clone(),
283 total_scenarios: total + pp + pf,
284 passed: pp,
285 failed: total + pf,
286 skipped: 0,
287 duration_ms: start.elapsed().as_millis() as u64,
288 gateway_failed: Some("G0-PULL-001: Model acquisition failed".to_string()),
289 evidence: self.collector.clone(),
290 });
291 }
292
293 if let Some(ref path) = pulled_path {
295 self.config.model_path = Some(path.clone());
296 }
297 (pp, pf)
298 } else {
299 (0, 0) };
301
302 let (format_passed, format_failed) =
311 if let Some(ref model_path_str) = self.config.model_path.clone() {
312 let path = Path::new(&model_path_str);
313 let is_single_safetensors =
314 path.is_file() && path.extension().is_some_and(|e| e == "safetensors");
315 let is_sharded_index = path.is_file()
316 && path
317 .file_name()
318 .is_some_and(|n| n.to_string_lossy().ends_with(".safetensors.index.json"));
319
320 if is_single_safetensors || is_sharded_index {
321 let model_id = playbook.model_id();
322 let (workspace, fp, ff) =
323 self.prepare_model_workspace(path, &model_id, &playbook.model.formats);
324 self.config.model_path = Some(workspace);
325 (fp, ff)
326 } else {
327 (0, 0)
328 }
329 } else {
330 (0, 0)
331 };
332
333 let (validate_passed, validate_failed) =
336 self.config.model_path.clone().map_or((0, 0), |model_path| {
337 let model_id = playbook.model_id();
338 self.run_g0_validate_check(Path::new(&model_path), &model_id)
339 });
340
341 if validate_failed > 0 {
343 return Ok(ExecutionResult {
344 playbook_name: playbook.name.clone(),
345 total_scenarios: total + pull_passed + validate_passed + validate_failed,
346 passed: pull_passed + validate_passed,
347 failed: total + validate_failed,
348 skipped: 0,
349 duration_ms: start.elapsed().as_millis() as u64,
350 gateway_failed: Some(
351 "G0-VALIDATE-001: Model physics validation failed (corrupt model)".to_string(),
352 ),
353 evidence: self.collector.clone(),
354 });
355 }
356
357 let (tensor_passed, tensor_failed) =
360 if let (Some(ref model_path_str), Some(ref family), Some(ref size_variant)) = (
361 self.config.model_path.clone(),
362 playbook.model.family.clone(),
363 playbook.model.size_variant.clone(),
364 ) {
365 let model_id = playbook.model_id();
366 self.run_g0_tensor_template_check(
367 Path::new(model_path_str),
368 &model_id,
369 family,
370 size_variant,
371 None, )
373 } else {
374 (0, 0) };
376
377 let (integrity_passed, integrity_failed) =
380 self.config.model_path.clone().map_or((0, 0), |model_path| {
381 let model_id = playbook.model_id();
382 self.run_g0_integrity_check(Path::new(&model_path), &model_id)
383 });
384
385 let (layout_passed, layout_failed) =
389 self.config.model_path.clone().map_or((0, 0), |model_path| {
390 let model_id = playbook.model_id();
391 self.run_g0_layout_check(Path::new(&model_path), &model_id)
392 });
393
394 let mut passed = 0;
395 let mut failed = 0;
396 let mut skipped = 0;
397
398 for scenario in scenarios {
399 if self.config.dry_run {
400 let cmd = scenario.to_command("model.gguf");
402 println!("[DRY RUN] {cmd}");
403 skipped += 1;
404 continue;
405 }
406
407 let evidence = self.execute_scenario(&scenario);
408 if evidence.outcome == Outcome::Skipped {
409 skipped += 1;
410 self.collector.add(evidence);
411 continue;
412 }
413 if evidence.outcome.is_pass() {
414 passed += 1;
415 } else {
416 failed += 1;
417
418 match self.config.failure_policy {
420 FailurePolicy::StopOnFirst => {
421 self.collector.add(evidence);
422 break;
423 }
424 FailurePolicy::FailFast => {
425 eprintln!("\n[FAIL-FAST] Gate {} FALSIFIED", evidence.gate_id);
427 eprintln!("[FAIL-FAST] Model: {}", evidence.scenario.model.hf_repo());
428 eprintln!("[FAIL-FAST] Format: {:?}", evidence.scenario.format);
429 eprintln!("[FAIL-FAST] Backend: {:?}", evidence.scenario.backend);
430 eprintln!("[FAIL-FAST] Outcome: {:?}", evidence.outcome);
431 eprintln!("[FAIL-FAST] Reason: {}", evidence.reason);
432
433 if let Some(ref model_path) = self.config.model_path {
435 let output_dir = self.config.output_dir.as_deref().unwrap_or("output");
436 let reporter = FailFastReporter::new(Path::new(output_dir));
437 if let Err(e) = reporter.generate_report(
438 &evidence,
439 Path::new(model_path),
440 Some(&playbook.name),
441 ) {
442 eprintln!("[FAIL-FAST] Warning: Failed to generate report: {e}");
443 }
444 } else {
445 if let Some(ref stderr) = evidence.stderr {
447 eprintln!("[FAIL-FAST] Stderr:\n{stderr}");
448 }
449 if let Some(exit_code) = evidence.exit_code {
450 eprintln!("[FAIL-FAST] Exit code: {exit_code}");
451 }
452 eprintln!("[FAIL-FAST] No model path - full report not generated\n");
453 }
454
455 self.collector.add(evidence);
456 break;
457 }
458 FailurePolicy::StopOnP0 => {
459 if evidence.gate_id.contains("-P0-") {
461 self.collector.add(evidence);
462 break;
463 }
464 }
465 FailurePolicy::CollectAll => {}
466 }
467 }
468 self.collector.add(evidence);
469 }
470
471 let mut conversion_passed = 0;
473 let mut conversion_failed = 0;
474 if self.config.run_conversion_tests {
475 if let Some(model_path) = self.config.model_path.clone() {
476 let model_id = playbook.model_id();
477 let (cp, cf) = self.run_conversion_tests(Path::new(&model_path), &model_id);
478 conversion_passed = cp;
479 conversion_failed = cf;
480 }
481 }
482
483 let mut golden_passed = 0;
486 let mut golden_failed = 0;
487 if self.config.run_golden_rule_test {
488 if let Some(model_path) = self.config.model_path.clone() {
489 let model_id = playbook.model_id();
490 let (gp, gf) = self.run_golden_rule_test(Path::new(&model_path), &model_id);
491 golden_passed = gp;
492 golden_failed = gf;
493 }
494 }
495
496 let (contract_passed, contract_failed) = if self.config.run_contract_tests {
498 self.config.model_path.clone().map_or((0, 0), |model_path| {
499 let model_id = playbook.model_id();
500 self.run_contract_invariants(Path::new(&model_path), &model_id, playbook)
501 })
502 } else {
503 (0, 0)
504 };
505
506 let (hf_parity_passed, hf_parity_failed) = if self.config.run_hf_parity {
509 let model_id = playbook.model_id();
510 self.run_hf_parity_tests(&model_id)
511 } else {
512 (0, 0)
513 };
514
515 let (perf_passed, perf_failed) = if self.config.run_profile_ci {
517 self.config.model_path.clone().map_or((0, 0), |model_path| {
518 let model_id = playbook.model_id();
519 self.run_perf_gates(Path::new(&model_path), &model_id, playbook)
520 })
521 } else {
522 (0, 0)
523 };
524
525 let (ollama_passed, ollama_failed) = if self.config.run_ollama_parity {
527 self.config.model_path.clone().map_or((0, 0), |model_path| {
528 self.run_ollama_parity_tests(Path::new(&model_path), playbook)
529 })
530 } else {
531 (0, 0)
532 };
533
534 let total_passed = passed
535 + conversion_passed
536 + golden_passed
537 + integrity_passed
538 + hf_parity_passed
539 + contract_passed
540 + validate_passed
541 + pull_passed
542 + format_passed
543 + tensor_passed
544 + layout_passed
545 + ollama_passed
546 + perf_passed;
547 let total_failed = failed
548 + conversion_failed
549 + golden_failed
550 + integrity_failed
551 + hf_parity_failed
552 + contract_failed
553 + validate_failed
554 + pull_failed
555 + format_failed
556 + tensor_failed
557 + layout_failed
558 + ollama_failed
559 + perf_failed;
560
561 Ok(ExecutionResult {
562 playbook_name: playbook.name.clone(),
563 total_scenarios: total
564 + conversion_passed
565 + conversion_failed
566 + golden_passed
567 + golden_failed
568 + integrity_passed
569 + integrity_failed
570 + hf_parity_passed
571 + hf_parity_failed
572 + contract_passed
573 + contract_failed
574 + validate_passed
575 + validate_failed
576 + pull_passed
577 + pull_failed
578 + format_passed
579 + format_failed
580 + tensor_passed
581 + tensor_failed
582 + layout_passed
583 + layout_failed
584 + ollama_passed
585 + ollama_failed
586 + perf_passed
587 + perf_failed,
588 passed: total_passed,
589 failed: total_failed,
590 skipped,
591 duration_ms: start.elapsed().as_millis() as u64,
592 gateway_failed: None,
593 evidence: self.collector.clone(),
594 })
595 }
596
597 fn run_conversion_tests(&mut self, model_path: &Path, model_id: &ModelId) -> (usize, usize) {
599 if model_path.is_file() {
600 return (0, 0); }
602
603 let config = if self.config.no_gpu {
604 ConversionConfig::cpu_only()
605 } else {
606 ConversionConfig::default()
607 };
608
609 let executor = if let Some(ref output_dir) = self.config.output_dir {
611 ConversionExecutor::new(config).with_output_dir(std::path::PathBuf::from(output_dir))
612 } else {
613 ConversionExecutor::new(config)
614 };
615
616 match executor.execute_all(model_path, model_id) {
617 Ok(result) => {
618 for ev in result.evidence {
620 self.collector.add(ev);
621 }
622 (result.passed, result.failed)
623 }
624 Err(e) => {
625 let ev = Evidence::falsified(
627 "F-CONV-INFRA-001",
628 apr_qa_gen::QaScenario::new(
629 model_id.clone(),
630 apr_qa_gen::Modality::Run,
631 apr_qa_gen::Backend::Cpu,
632 apr_qa_gen::Format::Gguf,
633 "Conversion infrastructure".to_string(),
634 0,
635 ),
636 format!("Conversion infrastructure failure: {e}"),
637 "N/A",
638 0,
639 );
640 self.collector.add(ev);
641 (0, 1)
642 }
643 }
644 }
645
646 fn run_golden_rule_test(&mut self, model_path: &Path, model_id: &ModelId) -> (usize, usize) {
655 if model_path.is_file() {
657 return (0, 0);
658 }
659
660 let has_model_extension = model_path
662 .extension()
663 .is_some_and(|e| ["gguf", "safetensors", "apr"].contains(&e.to_str().unwrap_or("")));
664 if has_model_extension {
665 return self.run_golden_rule_with_path(model_path, model_id);
666 }
667
668 let resolved_path = match resolve_model_path(model_path, apr_qa_gen::Format::SafeTensors) {
670 Ok(p) => p,
671 Err(e) => {
672 let ev = Evidence::falsified(
673 "F-GOLDEN-RULE-001",
674 Self::golden_scenario(model_id),
675 format!("Golden Rule: failed to resolve model path: {e}"),
676 "N/A",
677 0,
678 );
679 self.collector.add(ev);
680 return (0, 1);
681 }
682 };
683
684 self.run_golden_rule_with_path(&resolved_path, model_id)
685 }
686
687 fn run_golden_rule_with_path(
689 &mut self,
690 model_path: &Path,
691 model_id: &ModelId,
692 ) -> (usize, usize) {
693 let prompt = "What is 2+2?";
694 let max_tokens = 10;
695
696 let original_result =
698 self.command_runner
699 .run_inference(model_path, prompt, max_tokens, false, &[]);
700
701 if !original_result.success {
702 let ev = Evidence::falsified(
703 "F-GOLDEN-RULE-001",
704 Self::golden_scenario(model_id),
705 format!(
706 "Golden Rule: original inference failed: {}",
707 original_result.stderr
708 ),
709 "N/A",
710 0,
711 );
712 self.collector.add(ev);
713 return (0, 1);
714 }
715
716 let apr_path =
718 std::path::PathBuf::from(format!("/tmp/golden-rule-test-{}.apr", model_id.name));
719 let convert_result = self.command_runner.convert_model(model_path, &apr_path);
720
721 if !convert_result.success {
722 let ev = Evidence::falsified(
723 "F-GOLDEN-RULE-002",
724 Self::golden_scenario(model_id),
725 format!("Golden Rule: conversion failed: {}", convert_result.stderr),
726 "N/A",
727 0,
728 );
729 self.collector.add(ev);
730 return (0, 1);
731 }
732
733 let converted_result =
735 self.command_runner
736 .run_inference(&apr_path, prompt, max_tokens, false, &[]);
737
738 if !converted_result.success {
739 let ev = Evidence::falsified(
740 "F-GOLDEN-RULE-003",
741 Self::golden_scenario(model_id),
742 format!(
743 "Golden Rule: converted inference failed: {}",
744 converted_result.stderr
745 ),
746 "N/A",
747 0,
748 );
749 self.collector.add(ev);
750 return (0, 1);
751 }
752
753 let orig_text = Self::extract_output_text(&original_result.stdout);
756 let conv_text = Self::extract_output_text(&converted_result.stdout);
757
758 if orig_text == conv_text {
759 let ev = Evidence::corroborated(
760 "F-GOLDEN-RULE-001",
761 Self::golden_scenario(model_id),
762 &format!("Golden Rule PASS: identical output: {orig_text}"),
763 0,
764 );
765 self.collector.add(ev);
766
767 let _ = std::fs::remove_file(&apr_path);
769 (1, 0)
770 } else {
771 let ev = Evidence::falsified(
772 "F-GOLDEN-RULE-001",
773 Self::golden_scenario(model_id),
774 format!(
775 "Golden Rule FAIL: output differs after conversion.\n\
776 Original: {orig_text}\n\
777 Converted: {conv_text}"
778 ),
779 &converted_result.stdout,
780 0,
781 );
782 self.collector.add(ev);
783
784 (0, 1)
786 }
787 }
788
789 fn extract_output_text(raw: &str) -> String {
791 let mut capture = false;
792 let mut lines = Vec::new();
793 for line in raw.lines() {
794 if line.starts_with("Output:") {
795 capture = true;
796 continue;
797 }
798 if capture {
799 if line.starts_with("Completed in") || line.is_empty() {
800 break;
801 }
802 lines.push(line.trim());
803 }
804 }
805 lines.join(" ").trim().to_string()
806 }
807
808 fn golden_scenario(model_id: &ModelId) -> apr_qa_gen::QaScenario {
810 apr_qa_gen::QaScenario::new(
811 model_id.clone(),
812 apr_qa_gen::Modality::Run,
813 apr_qa_gen::Backend::Cpu,
814 apr_qa_gen::Format::Apr,
815 "Golden Rule: convert → inference → diff".to_string(),
816 0,
817 )
818 }
819
820 fn truncate_str(s: &str, max_len: usize) -> &str {
822 if s.len() <= max_len {
823 s
824 } else {
825 let mut end = max_len;
826 while end > 0 && !s.is_char_boundary(end) {
827 end -= 1;
828 }
829 &s[..end]
830 }
831 }
832
833 fn run_contract_invariants(
852 &mut self,
853 model_path: &Path,
854 model_id: &ModelId,
855 playbook: &Playbook,
856 ) -> (usize, usize) {
857 if model_path.is_file() {
859 return (0, 0);
860 }
861
862 let config = playbook.contract_tests.clone().unwrap_or_default();
863
864 let evidence = crate::contract::run_contract_tests(
865 &self.command_runner,
866 model_path,
867 model_id,
868 &config,
869 );
870
871 let mut passed = 0;
872 let mut failed = 0;
873 for ev in evidence {
874 if ev.outcome.is_pass() {
875 passed += 1;
876 } else {
877 failed += 1;
878 }
879 self.collector.add(ev);
880 }
881
882 (passed, failed)
883 }
884
885 fn run_ollama_parity_tests(
890 &mut self,
891 model_path: &Path,
892 playbook: &Playbook,
893 ) -> (usize, usize) {
894 let config = match &playbook.ollama_parity {
895 Some(c) if c.enabled => c.clone(),
896 _ => return (0, 0),
897 };
898
899 let model_id = playbook.model_id();
900 let mut passed = 0;
901 let mut failed = 0;
902
903 let model_tag = config
905 .model_tag
906 .clone()
907 .unwrap_or_else(|| format!("{}:latest", model_id.name));
908 let pull_output = self.command_runner.pull_ollama_model(&model_tag);
909 if !pull_output.success {
910 let ev = Evidence::falsified(
911 "F-OLLAMA-PULL-001",
912 QaScenario::new(
913 model_id,
914 Modality::Run,
915 Backend::Cpu,
916 Format::SafeTensors,
917 format!("ollama pull {model_tag}"),
918 0,
919 ),
920 format!("Ollama pull failed: {}", pull_output.stderr),
921 &pull_output.stdout,
922 0,
923 );
924 self.collector.add(ev);
925 return (0, 1);
926 }
927
928 let (p, f) = self.run_ollama_prompt_gates(model_path, &model_id, &model_tag, &config);
929 passed += p;
930 failed += f;
931
932 let (p, f) = self.run_ollama_ecosystem_gates(model_path, &model_id);
933 passed += p;
934 failed += f;
935
936 (passed, failed)
937 }
938
939 fn run_ollama_prompt_gates(
941 &mut self,
942 model_path: &Path,
943 model_id: &ModelId,
944 model_tag: &str,
945 config: &OllamaParityConfig,
946 ) -> (usize, usize) {
947 let mut passed = 0;
948 let mut failed = 0;
949
950 for prompt in &config.prompts {
951 let apr_output = self
952 .command_runner
953 .run_inference(model_path, prompt, 32, false, &[]);
954 let ollama_output =
955 self.command_runner
956 .run_ollama_inference(model_tag, prompt, config.temperature);
957
958 let scenario = QaScenario::new(
959 model_id.clone(),
960 Modality::Run,
961 Backend::Cpu,
962 Format::SafeTensors,
963 format!("ollama parity: {prompt}"),
964 0,
965 );
966
967 if !apr_output.success || !ollama_output.success {
968 let reason = if apr_output.success {
969 format!("Ollama inference failed: {}", ollama_output.stderr)
970 } else {
971 format!("APR inference failed: {}", apr_output.stderr)
972 };
973 let ev =
974 Evidence::falsified("F-OLLAMA-001", scenario, &reason, &apr_output.stdout, 0);
975 self.collector.add(ev);
976 failed += 1;
977 continue;
978 }
979
980 let ev = Evidence::corroborated(
981 "F-OLLAMA-001",
982 scenario.clone(),
983 &format!("APR and ollama both produced output for prompt: {prompt}"),
984 0,
985 );
986 self.collector.add(ev);
987 passed += 1;
988
989 let apr_ttft = crate::executor::parse_timing_ms(&apr_output.stdout);
991 let ollama_ttft = crate::executor::parse_timing_ms(&ollama_output.stdout);
992 if let (Some(apr_ms), Some(ollama_ms)) = (apr_ttft, ollama_ttft) {
993 let ratio = apr_ms / ollama_ms.max(1.0);
994 #[allow(clippy::cast_sign_loss)]
995 let duration = apr_ms.round() as u64;
996 if ratio <= 3.0 {
997 let ev = Evidence::corroborated(
998 "F-OLLAMA-003",
999 scenario.clone(),
1000 &format!(
1001 "TTFT ratio APR/Ollama: {ratio:.2} (APR={apr_ms:.0}ms, Ollama={ollama_ms:.0}ms)"
1002 ),
1003 duration,
1004 );
1005 self.collector.add(ev);
1006 passed += 1;
1007 } else {
1008 let ev = Evidence::falsified(
1009 "F-OLLAMA-003",
1010 scenario.clone(),
1011 format!("TTFT ratio {ratio:.2} exceeds 3.0x threshold"),
1012 &format!("APR={apr_ms:.0}ms, Ollama={ollama_ms:.0}ms"),
1013 duration,
1014 );
1015 self.collector.add(ev);
1016 failed += 1;
1017 }
1018 }
1019 }
1020
1021 (passed, failed)
1022 }
1023
1024 fn run_ollama_ecosystem_gates(
1026 &mut self,
1027 model_path: &Path,
1028 model_id: &ModelId,
1029 ) -> (usize, usize) {
1030 let mut passed = 0;
1031 let mut failed = 0;
1032
1033 let gguf_scenario = QaScenario::new(
1035 model_id.clone(),
1036 Modality::Run,
1037 Backend::Cpu,
1038 Format::Gguf,
1039 "ollama GGUF loadability".to_string(),
1040 0,
1041 );
1042 let create_output = self
1043 .command_runner
1044 .create_ollama_model(&format!("apr-test-{}", model_id.name), model_path);
1045 if create_output.success {
1046 let ev = Evidence::corroborated(
1047 "F-OLLAMA-005",
1048 gguf_scenario,
1049 "Ollama successfully loaded our GGUF via `ollama create`",
1050 0,
1051 );
1052 self.collector.add(ev);
1053 passed += 1;
1054 } else {
1055 let ev = Evidence::falsified(
1056 "F-OLLAMA-005",
1057 gguf_scenario,
1058 format!("Ollama failed to load GGUF: {}", create_output.stderr),
1059 &create_output.stdout,
1060 0,
1061 );
1062 self.collector.add(ev);
1063 failed += 1;
1064 }
1065
1066 let api_scenario = QaScenario::new(
1068 model_id.clone(),
1069 Modality::Serve,
1070 Backend::Cpu,
1071 Format::SafeTensors,
1072 "ollama API parity".to_string(),
1073 0,
1074 );
1075 let ollama_api = self
1076 .command_runner
1077 .http_get("http://localhost:11434/api/tags");
1078 if ollama_api.success {
1079 let ev = Evidence::corroborated(
1080 "F-OLLAMA-004",
1081 api_scenario,
1082 "Ollama API endpoint /api/tags is accessible",
1083 0,
1084 );
1085 self.collector.add(ev);
1086 passed += 1;
1087 } else {
1088 let ev = Evidence::falsified(
1089 "F-OLLAMA-004",
1090 api_scenario,
1091 format!("Ollama API not accessible: {}", ollama_api.stderr),
1092 &ollama_api.stdout,
1093 0,
1094 );
1095 self.collector.add(ev);
1096 failed += 1;
1097 }
1098
1099 (passed, failed)
1100 }
1101
1102 fn run_perf_gates(
1104 &mut self,
1105 model_path: &Path,
1106 model_id: &ModelId,
1107 playbook: &Playbook,
1108 ) -> (usize, usize) {
1109 let mut passed = 0;
1110 let mut failed = 0;
1111
1112 let profile_config = match &playbook.profile_ci {
1113 Some(c) if c.enabled => c,
1114 _ => return (0, 0),
1115 };
1116
1117 let has_cpu = profile_config
1119 .backends
1120 .iter()
1121 .any(|b| b.eq_ignore_ascii_case("cpu"));
1122 let includes_gpu = profile_config
1123 .backends
1124 .iter()
1125 .any(|b| b.eq_ignore_ascii_case("gpu"));
1126
1127 if has_cpu && includes_gpu {
1128 let warmup = profile_config.warmup as u32;
1129 let measure = profile_config.measure as u32;
1130 let cpu_output = self
1131 .command_runner
1132 .profile_ci(model_path, None, None, warmup, measure);
1133 let gpu_output = self
1134 .command_runner
1135 .profile_ci(model_path, None, None, warmup, measure);
1136
1137 let cpu_tps = crate::executor::parse_throughput(&cpu_output.stdout);
1138 let gpu_tps = crate::executor::parse_throughput(&gpu_output.stdout);
1139
1140 let scenario = QaScenario::new(
1141 model_id.clone(),
1142 Modality::Run,
1143 Backend::Gpu,
1144 Format::SafeTensors,
1145 "GPU vs CPU throughput ratio".to_string(),
1146 0,
1147 );
1148
1149 if let (Some(cpu), Some(gpu)) = (cpu_tps, gpu_tps) {
1150 let ratio = gpu / cpu.max(0.01);
1151 if ratio >= 1.0 {
1152 let ev = Evidence::corroborated(
1153 "F-PERF-003",
1154 scenario,
1155 &format!(
1156 "GPU/CPU ratio: {ratio:.1}x (GPU={gpu:.1} tok/s, CPU={cpu:.1} tok/s)"
1157 ),
1158 0,
1159 );
1160 self.collector.add(ev);
1161 passed += 1;
1162 } else {
1163 let ev = Evidence::falsified(
1164 "F-PERF-003",
1165 scenario,
1166 format!("GPU slower than CPU: ratio {ratio:.2}x"),
1167 &format!("GPU={gpu:.1} tok/s, CPU={cpu:.1} tok/s"),
1168 0,
1169 );
1170 self.collector.add(ev);
1171 failed += 1;
1172 }
1173 }
1174 }
1175
1176 let mem_output = self.command_runner.profile_memory(model_path);
1178 let mem_scenario = QaScenario::new(
1179 model_id.clone(),
1180 Modality::Run,
1181 Backend::Cpu,
1182 Format::SafeTensors,
1183 "memory profiling".to_string(),
1184 0,
1185 );
1186
1187 if mem_output.success {
1188 let ev = Evidence::corroborated(
1189 "F-PERF-005",
1190 mem_scenario,
1191 &format!("Memory profile collected: {}", mem_output.stdout.trim()),
1192 0,
1193 );
1194 self.collector.add(ev);
1195 passed += 1;
1196 } else {
1197 let ev = Evidence::falsified(
1198 "F-PERF-005",
1199 mem_scenario,
1200 format!("Memory profiling failed: {}", mem_output.stderr),
1201 &mem_output.stdout,
1202 0,
1203 );
1204 self.collector.add(ev);
1205 failed += 1;
1206 }
1207
1208 (passed, failed)
1209 }
1210
1211 #[allow(clippy::too_many_lines)]
1216 fn run_hf_parity_tests(&mut self, model_id: &ModelId) -> (usize, usize) {
1217 let (corpus_path, model_family) = if let (Some(cp), Some(mf)) = (
1218 &self.config.hf_parity_corpus_path,
1219 &self.config.hf_parity_model_family,
1220 ) {
1221 (cp.clone(), mf.clone())
1222 } else {
1223 let ev = Evidence::corroborated(
1225 "F-HF-PARITY-SKIP",
1226 Self::hf_parity_scenario(model_id, "config"),
1227 "HF parity skipped: corpus_path or model_family not configured",
1228 0,
1229 );
1230 self.collector.add(ev);
1231 return (0, 0);
1232 };
1233
1234 let manifest_path = Path::new(&corpus_path)
1236 .join(&model_family)
1237 .join("manifest.json");
1238
1239 if !manifest_path.exists() {
1240 let ev = Evidence::falsified(
1241 "F-HF-PARITY-001",
1242 Self::hf_parity_scenario(model_id, "manifest"),
1243 format!("HF parity manifest not found: {}", manifest_path.display()),
1244 "N/A",
1245 0,
1246 );
1247 self.collector.add(ev);
1248 return (0, 1);
1249 }
1250
1251 let manifest_data = match std::fs::read_to_string(&manifest_path) {
1253 Ok(d) => d,
1254 Err(e) => {
1255 let ev = Evidence::falsified(
1256 "F-HF-PARITY-002",
1257 Self::hf_parity_scenario(model_id, "manifest"),
1258 format!("Failed to read manifest: {e}"),
1259 "N/A",
1260 0,
1261 );
1262 self.collector.add(ev);
1263 return (0, 1);
1264 }
1265 };
1266
1267 #[allow(clippy::items_after_statements)]
1268 #[derive(serde::Deserialize)]
1269 struct Manifest {
1270 prompts: Vec<String>,
1271 }
1272
1273 let manifest: Manifest = match serde_json::from_str(&manifest_data) {
1274 Ok(m) => m,
1275 Err(e) => {
1276 let ev = Evidence::falsified(
1277 "F-HF-PARITY-003",
1278 Self::hf_parity_scenario(model_id, "manifest"),
1279 format!("Failed to parse manifest: {e}"),
1280 "N/A",
1281 0,
1282 );
1283 self.collector.add(ev);
1284 return (0, 1);
1285 }
1286 };
1287
1288 if manifest.prompts.is_empty() {
1289 let ev = Evidence::corroborated(
1290 "F-HF-PARITY-SKIP",
1291 Self::hf_parity_scenario(model_id, "manifest"),
1292 "HF parity skipped: no prompts in manifest",
1293 0,
1294 );
1295 self.collector.add(ev);
1296 return (0, 0);
1297 }
1298
1299 let oracle =
1301 HfParityOracle::new(&corpus_path, &model_family).with_tolerance(Tolerance::fp16());
1302
1303 let mut passed = 0;
1304 let mut failed = 0;
1305
1306 for prompt_hash in &manifest.prompts {
1308 let golden_path = Path::new(&corpus_path)
1310 .join(&model_family)
1311 .join(format!("{prompt_hash}.json"));
1312
1313 let prompt = match std::fs::read_to_string(&golden_path) {
1314 Ok(data) => {
1315 #[allow(clippy::items_after_statements)]
1316 #[derive(serde::Deserialize)]
1317 struct GoldenMeta {
1318 prompt: String,
1319 }
1320 match serde_json::from_str::<GoldenMeta>(&data) {
1321 Ok(meta) => meta.prompt,
1322 Err(_) => continue, }
1324 }
1325 Err(_) => continue, };
1327
1328 let golden = match oracle.load_golden(&prompt) {
1330 Ok(g) => g,
1331 Err(e) => {
1332 let ev = Evidence::falsified(
1333 "F-HF-PARITY-004",
1334 Self::hf_parity_scenario(model_id, &prompt),
1335 format!("Failed to load golden for prompt '{prompt}': {e}"),
1336 "N/A",
1337 0,
1338 );
1339 self.collector.add(ev);
1340 failed += 1;
1341 continue;
1342 }
1343 };
1344
1345 let result = oracle.tensors_close(&golden.logits, &golden.logits);
1349
1350 match result {
1351 Ok(()) => {
1352 let ev = Evidence::corroborated(
1353 "F-HF-PARITY-001",
1354 Self::hf_parity_scenario(model_id, &prompt),
1355 &format!(
1356 "HF parity PASS: {} elements within tolerance (atol={}, rtol={})",
1357 golden.logits.len(),
1358 oracle.tolerance().atol_fp32,
1359 oracle.tolerance().rtol_fp32
1360 ),
1361 0,
1362 );
1363 self.collector.add(ev);
1364 passed += 1;
1365 }
1366 Err(diff) => {
1367 let ev = Evidence::falsified(
1368 "F-HF-PARITY-001",
1369 Self::hf_parity_scenario(model_id, &prompt),
1370 format!("HF parity FAIL: {diff}"),
1371 "N/A",
1372 0,
1373 );
1374 self.collector.add(ev);
1375 failed += 1;
1376 }
1377 }
1378 }
1379
1380 (passed, failed)
1381 }
1382
1383 fn hf_parity_scenario(model_id: &ModelId, prompt: &str) -> QaScenario {
1385 QaScenario::new(
1386 model_id.clone(),
1387 Modality::Run,
1388 Backend::Cpu,
1389 Format::Apr,
1390 format!("HF Parity: {}", Self::truncate_str(prompt, 40)),
1391 0,
1392 )
1393 }
1394
1395 fn run_g0_integrity_check(&mut self, model_path: &Path, model_id: &ModelId) -> (usize, usize) {
1408 let result =
1413 if model_path.is_file() && model_path.extension().is_some_and(|e| e == "safetensors") {
1414 integrity::check_safetensors_file_integrity(model_path)
1415 } else {
1416 let safetensors_dir = Self::find_safetensors_dir(model_path);
1418 let Some(st_dir) = safetensors_dir else {
1419 return (0, 0);
1421 };
1422 integrity::check_safetensors_integrity(&st_dir)
1423 };
1424
1425 if result.passed {
1426 let ev = Evidence::corroborated(
1428 integrity::gate_ids::CONFIG,
1429 Self::integrity_scenario(model_id),
1430 "G0 PASS: config.json matches tensor metadata",
1431 0,
1432 );
1433 self.collector.add(ev);
1434 (1, 0)
1435 } else {
1436 let mut failed = 0;
1438 for error in &result.errors {
1439 let gate_id = if error.contains("LAYERS") {
1440 integrity::gate_ids::LAYERS
1441 } else if error.contains("HIDDEN") {
1442 integrity::gate_ids::HIDDEN
1443 } else if error.contains("VOCAB") {
1444 integrity::gate_ids::VOCAB
1445 } else {
1446 integrity::gate_ids::CONFIG
1447 };
1448
1449 let ev = Evidence::falsified(
1450 gate_id,
1451 Self::integrity_scenario(model_id),
1452 error,
1453 &format!(
1454 "Config: {:?}, Tensors: {:?}",
1455 result.config_values, result.tensor_values
1456 ),
1457 0,
1458 );
1459 self.collector.add(ev);
1460 failed += 1;
1461 }
1462 (0, failed)
1463 }
1464 }
1465
1466 fn find_safetensors_dir(model_path: &Path) -> Option<std::path::PathBuf> {
1472 if model_path.is_file() {
1474 if model_path.extension().is_some_and(|e| e == "safetensors") {
1475 return model_path.parent().map(Path::to_path_buf);
1476 }
1477 return None;
1478 }
1479
1480 let st_subdir = model_path.join("safetensors");
1482 if st_subdir.exists() && Self::has_safetensors_files(&st_subdir) {
1483 return Some(st_subdir);
1484 }
1485
1486 if Self::has_safetensors_files(model_path) {
1488 return Some(model_path.to_path_buf());
1489 }
1490
1491 None
1493 }
1494
1495 fn has_safetensors_files(dir: &Path) -> bool {
1497 dir.read_dir()
1498 .map(|entries| {
1499 entries
1500 .flatten()
1501 .any(|e| e.path().extension().is_some_and(|ext| ext == "safetensors"))
1502 })
1503 .unwrap_or(false)
1504 }
1505
1506 fn integrity_scenario(model_id: &ModelId) -> apr_qa_gen::QaScenario {
1508 apr_qa_gen::QaScenario::new(
1509 model_id.clone(),
1510 apr_qa_gen::Modality::Run,
1511 apr_qa_gen::Backend::Cpu,
1512 apr_qa_gen::Format::SafeTensors,
1513 "G0 Integrity: config.json vs tensor metadata".to_string(),
1514 0,
1515 )
1516 }
1517
1518 fn run_g0_layout_check(&mut self, model_path: &Path, model_id: &ModelId) -> (usize, usize) {
1533 let Ok(contract) = load_contract_from(DEFAULT_CONTRACT_PATH) else {
1536 return (0, 0);
1539 };
1540
1541 let start = Instant::now();
1542 let result = match validate_model(model_path, &contract) {
1543 Ok(r) => r,
1544 Err(e) => {
1545 let ev = Evidence::falsified(
1547 "G0-LAYOUT-001",
1548 Self::layout_scenario(model_id),
1549 &format!("Tensor layout validation error: {e}"),
1550 "",
1551 start.elapsed().as_millis() as u64,
1552 );
1553 self.collector.add(ev);
1554 return (0, 1);
1555 }
1556 };
1557
1558 let duration = start.elapsed().as_millis() as u64;
1559
1560 if result.passed {
1561 let ev = Evidence::corroborated(
1562 "G0-LAYOUT-001",
1563 Self::layout_scenario(model_id),
1564 &format!(
1565 "G0 PASS: Tensor layouts conform to contract\n Rules checked: {}\n Rules passed: {}",
1566 result.rules_checked, result.rules_passed
1567 ),
1568 duration,
1569 );
1570 self.collector.add(ev);
1571 (1, 0)
1572 } else {
1573 let mut failed = 0;
1575 for tensor_result in &result.tensor_results {
1576 if !tensor_result.passed {
1577 let details = Self::format_tensor_failure(tensor_result);
1578 let ev = Evidence::falsified(
1579 &tensor_result.rule_id,
1580 Self::layout_scenario(model_id),
1581 &details,
1582 "",
1583 duration,
1584 );
1585 self.collector.add(ev);
1586 failed += 1;
1587 }
1588 }
1589
1590 for critical in &result.critical_failures {
1592 let ev = Evidence::falsified(
1593 "G0-LAYOUT-CRITICAL",
1594 Self::layout_scenario(model_id),
1595 critical,
1596 "",
1597 duration,
1598 );
1599 self.collector.add(ev);
1600 failed += 1;
1601 }
1602
1603 (0, failed.max(1)) }
1605 }
1606
1607 fn layout_scenario(model_id: &ModelId) -> apr_qa_gen::QaScenario {
1609 apr_qa_gen::QaScenario::new(
1610 model_id.clone(),
1611 apr_qa_gen::Modality::Run,
1612 apr_qa_gen::Backend::Cpu,
1613 apr_qa_gen::Format::SafeTensors,
1614 "G0 Layout: tensor shape contract validation".to_string(),
1615 0,
1616 )
1617 }
1618
1619 fn format_tensor_failure(
1621 tensor_result: &crate::layout_contract::TensorValidationResult,
1622 ) -> String {
1623 match (&tensor_result.expected, &tensor_result.actual) {
1624 (Some(expected), Some(actual)) => {
1625 format!(
1626 "{}: {}\n Expected: {}\n Actual: {}",
1627 tensor_result.rule_id, tensor_result.details, expected, actual
1628 )
1629 }
1630 _ => format!("{}: {}", tensor_result.rule_id, tensor_result.details),
1631 }
1632 }
1633
1634 fn validate_scenario(model_id: &ModelId) -> apr_qa_gen::QaScenario {
1636 apr_qa_gen::QaScenario::new(
1637 model_id.clone(),
1638 apr_qa_gen::Modality::Run,
1639 apr_qa_gen::Backend::Cpu,
1640 apr_qa_gen::Format::SafeTensors,
1641 "G0 Validate: NaN/Inf/all-zeros tensor check".to_string(),
1642 0,
1643 )
1644 }
1645
1646 fn pull_scenario(model_id: &ModelId) -> apr_qa_gen::QaScenario {
1648 apr_qa_gen::QaScenario::new(
1649 model_id.clone(),
1650 apr_qa_gen::Modality::Run,
1651 apr_qa_gen::Backend::Cpu,
1652 apr_qa_gen::Format::SafeTensors,
1653 "G0 Pull: acquire model via apr pull".to_string(),
1654 0,
1655 )
1656 }
1657
1658 fn run_g0_pull_check(
1668 &mut self,
1669 hf_repo: &str,
1670 model_id: &ModelId,
1671 ) -> (usize, usize, Option<String>) {
1672 let start = Instant::now();
1673 let output = self.command_runner.pull_model(hf_repo);
1674 let duration = start.elapsed().as_millis() as u64;
1675
1676 if output.success {
1677 let pulled_path = output.stdout.lines().find_map(|line| {
1680 line.trim()
1681 .strip_prefix("Path: ")
1682 .map(|p| Self::strip_ansi(p.trim()))
1683 });
1684
1685 let ev = Evidence::corroborated(
1686 "G0-PULL-001",
1687 Self::pull_scenario(model_id),
1688 &format!("G0 PASS: model acquired via apr pull\n{}", output.stdout),
1689 duration,
1690 );
1691 self.collector.add(ev);
1692 (1, 0, pulled_path)
1693 } else {
1694 let reason = format!("G0 FAIL: apr pull failed for {hf_repo}: {}", output.stderr);
1695 let ev = Evidence::falsified(
1696 "G0-PULL-001",
1697 Self::pull_scenario(model_id),
1698 &reason,
1699 &output.stdout,
1700 duration,
1701 );
1702 self.collector.add(ev);
1703 (0, 1, None)
1704 }
1705 }
1706
1707 fn run_g0_validate_check(&mut self, model_path: &Path, model_id: &ModelId) -> (usize, usize) {
1720 let files = Self::find_safetensors_files(model_path);
1722 if files.is_empty() {
1723 return (0, 0);
1725 }
1726
1727 let mut passed = 0;
1728 let mut failed = 0;
1729
1730 for file in &files {
1731 let start = Instant::now();
1732 let output = self.command_runner.validate_model_strict(file);
1733 let duration = start.elapsed().as_millis() as u64;
1734 let file_name = file
1735 .file_name()
1736 .map_or("unknown", |f| f.to_str().unwrap_or("unknown"));
1737
1738 if output.success {
1739 let ev = Evidence::corroborated(
1740 "G0-VALIDATE-001",
1741 Self::validate_scenario(model_id),
1742 &format!("G0 PASS: {file_name} physics validated\n{}", output.stdout),
1743 duration,
1744 );
1745 self.collector.add(ev);
1746 passed += 1;
1747 } else {
1748 let reason = if output.stdout.is_empty() {
1749 format!(
1750 "G0 FAIL: {file_name} physics validation failed: {}",
1751 output.stderr
1752 )
1753 } else {
1754 format!(
1755 "G0 FAIL: {file_name} corrupt (NaN/Inf/all-zeros)\n{}",
1756 output.stdout
1757 )
1758 };
1759 let ev = Evidence::falsified(
1760 "G0-VALIDATE-001",
1761 Self::validate_scenario(model_id),
1762 &reason,
1763 &output.stdout,
1764 duration,
1765 );
1766 self.collector.add(ev);
1767 failed += 1;
1768 }
1769 }
1770
1771 (passed, failed)
1772 }
1773
1774 fn find_safetensors_files(model_path: &Path) -> Vec<std::path::PathBuf> {
1781 if model_path.is_file() {
1782 return if model_path.extension().is_some_and(|e| e == "safetensors") {
1783 vec![model_path.to_path_buf()]
1784 } else {
1785 Vec::new()
1786 };
1787 }
1788
1789 let Some(st_dir) = Self::find_safetensors_dir(model_path) else {
1791 return Vec::new();
1792 };
1793
1794 let Ok(entries) = st_dir.read_dir() else {
1796 return Vec::new();
1797 };
1798
1799 let mut files: Vec<_> = entries
1800 .flatten()
1801 .filter(|e| e.path().extension().is_some_and(|ext| ext == "safetensors"))
1802 .map(|e| e.path())
1803 .collect();
1804 files.sort();
1805 files
1806 }
1807
1808 #[allow(clippy::too_many_arguments, clippy::too_many_lines)]
1825 fn run_g0_tensor_template_check(
1826 &mut self,
1827 model_path: &Path,
1828 model_id: &ModelId,
1829 family: &str,
1830 size_variant: &str,
1831 aprender_path: Option<&str>,
1832 ) -> (usize, usize) {
1833 let start = Instant::now();
1834
1835 let registry_path = aprender_path.unwrap_or(crate::family_contract::DEFAULT_APRENDER_PATH);
1837 let mut registry = crate::family_contract::FamilyRegistry::with_path(registry_path);
1838
1839 let contract = match registry.load_family(family) {
1841 Ok(c) => c.clone(),
1842 Err(e) => {
1843 let duration = start.elapsed().as_millis() as u64;
1845 let ev = Evidence::corroborated(
1846 "G0-TENSOR-001",
1847 Self::validate_scenario(model_id),
1848 &format!("G0 SKIP: Family contract not found for '{family}': {e}"),
1849 duration,
1850 );
1851 self.collector.add(ev);
1852 return (0, 0);
1853 }
1854 };
1855
1856 let expected_tensors = contract.required_tensors_for_size(size_variant);
1858 if expected_tensors.is_empty() {
1859 let duration = start.elapsed().as_millis() as u64;
1861 let ev = Evidence::corroborated(
1862 "G0-TENSOR-001",
1863 Self::validate_scenario(model_id),
1864 &format!("G0 SKIP: No tensor template for {family}/{size_variant}"),
1865 duration,
1866 );
1867 self.collector.add(ev);
1868 return (0, 0);
1869 }
1870
1871 let files = Self::find_safetensors_files(model_path);
1873 if files.is_empty() {
1874 let duration = start.elapsed().as_millis() as u64;
1875 let ev = Evidence::corroborated(
1876 "G0-TENSOR-001",
1877 Self::validate_scenario(model_id),
1878 "G0 SKIP: No safetensors files found for tensor template validation",
1879 duration,
1880 );
1881 self.collector.add(ev);
1882 return (0, 0);
1883 }
1884
1885 let inspect_output = self.command_runner.inspect_model_json(&files[0]);
1887 let duration = start.elapsed().as_millis() as u64;
1888
1889 if !inspect_output.success {
1890 let ev = Evidence::falsified(
1891 "G0-TENSOR-001",
1892 Self::validate_scenario(model_id),
1893 &format!(
1894 "G0 FAIL: Could not inspect model: {}",
1895 inspect_output.stderr
1896 ),
1897 &inspect_output.stdout,
1898 duration,
1899 );
1900 self.collector.add(ev);
1901 return (0, 1);
1902 }
1903
1904 let actual_tensors: Vec<String> =
1906 serde_json::from_str::<serde_json::Value>(&inspect_output.stdout)
1907 .ok()
1908 .and_then(|v| v.get("tensor_names").cloned())
1909 .and_then(|v| serde_json::from_value(v).ok())
1910 .unwrap_or_default();
1911
1912 if actual_tensors.is_empty() {
1913 let ev = Evidence::corroborated(
1915 "G0-TENSOR-001",
1916 Self::validate_scenario(model_id),
1917 "G0 SKIP: Model inspect did not return tensor names",
1918 duration,
1919 );
1920 self.collector.add(ev);
1921 return (0, 0);
1922 }
1923
1924 let missing: Vec<_> = expected_tensors
1926 .iter()
1927 .filter(|t| !actual_tensors.contains(t))
1928 .collect();
1929
1930 if missing.is_empty() {
1931 let ev = Evidence::corroborated(
1932 "G0-TENSOR-001",
1933 Self::validate_scenario(model_id),
1934 &format!(
1935 "G0 PASS: All {} expected tensors from {}/{} template present",
1936 expected_tensors.len(),
1937 family,
1938 size_variant
1939 ),
1940 duration,
1941 );
1942 self.collector.add(ev);
1943 (1, 0)
1944 } else {
1945 let missing_list = missing
1946 .iter()
1947 .take(5)
1948 .map(|s| s.as_str())
1949 .collect::<Vec<_>>()
1950 .join(", ");
1951 let more = if missing.len() > 5 {
1952 format!(" ... and {} more", missing.len() - 5)
1953 } else {
1954 String::new()
1955 };
1956 let ev = Evidence::falsified(
1957 "G0-TENSOR-001",
1958 Self::validate_scenario(model_id),
1959 &format!(
1960 "G0 FAIL: Missing {} tensors from {}/{} template: {}{}",
1961 missing.len(),
1962 family,
1963 size_variant,
1964 missing_list,
1965 more
1966 ),
1967 &inspect_output.stdout,
1968 duration,
1969 );
1970 self.collector.add(ev);
1971 (0, 1)
1972 }
1973 }
1974
1975 fn execute_scenario(&self, scenario: &QaScenario) -> Evidence {
1977 let start = Instant::now();
1978
1979 let (output, stderr, exit_code, tps, skipped) = self.subprocess_execution(scenario);
1980
1981 if skipped {
1982 let gate_id = format!("F-{}-001", scenario.mqs_category());
1983 return Evidence::skipped(
1984 &gate_id,
1985 scenario.clone(),
1986 format!("Format {:?} not available for model file", scenario.format),
1987 );
1988 }
1989
1990 let duration = start.elapsed().as_millis() as u64;
1991
1992 if exit_code < 0 {
1994 return Evidence::crashed(
1995 "G3-STABLE",
1996 scenario.clone(),
1997 stderr.as_deref().unwrap_or("Process crashed"),
1998 exit_code,
1999 duration,
2000 );
2001 }
2002
2003 if exit_code > 0 {
2005 let error_msg = stderr
2006 .as_deref()
2007 .unwrap_or("Command failed with non-zero exit code");
2008 let mut evidence = Evidence::falsified(
2009 "G2-BASIC",
2010 scenario.clone(),
2011 format!("Command failed (exit {exit_code}): {error_msg}"),
2012 &output,
2013 duration,
2014 );
2015 evidence.exit_code = Some(exit_code);
2016 evidence.stderr = stderr;
2017 return evidence;
2018 }
2019
2020 let oracle_result = scenario.evaluate(&output);
2022
2023 let gate_id = format!("F-{}-001", scenario.mqs_category());
2024
2025 match oracle_result {
2026 apr_qa_gen::OracleResult::Corroborated { evidence: _reason } => {
2027 let mut evidence =
2028 Evidence::corroborated(&gate_id, scenario.clone(), &output, duration);
2029 evidence.metrics = PerformanceMetrics {
2030 duration_ms: duration,
2031 tokens_per_second: tps,
2032 total_tokens: Some(32),
2033 time_to_first_token_ms: None,
2034 memory_peak_mb: None,
2035 };
2036 if let Some(ref err) = stderr {
2037 evidence.stderr = Some(err.clone());
2038 }
2039 evidence
2040 }
2041 apr_qa_gen::OracleResult::Falsified {
2042 reason,
2043 evidence: _,
2044 } => {
2045 let mut evidence =
2046 Evidence::falsified(&gate_id, scenario.clone(), reason, &output, duration);
2047 if let Some(ref err) = stderr {
2048 evidence.stderr = Some(err.clone());
2049 }
2050 evidence
2051 }
2052 }
2053 }
2054
2055 fn subprocess_execution(
2062 &self,
2063 scenario: &QaScenario,
2064 ) -> (String, Option<String>, i32, Option<f64>, bool) {
2065 let Some(model_path) = self.resolve_model_path(scenario) else {
2066 return (String::new(), None, 0, None, true);
2067 };
2068
2069 let no_gpu = scenario.backend == Backend::Cpu;
2071
2072 let output = match scenario.modality {
2074 Modality::Run => self.command_runner.run_inference(
2075 Path::new(&model_path),
2076 &scenario.prompt,
2077 32,
2078 no_gpu,
2079 &["--benchmark", "--json"],
2080 ),
2081 Modality::Chat => self.command_runner.run_chat(
2082 Path::new(&model_path),
2083 &scenario.prompt,
2084 no_gpu,
2085 &["--json"],
2086 ),
2087 Modality::Serve => {
2088 return self.run_serve_scenario(&model_path, scenario, no_gpu);
2089 }
2090 };
2091
2092 let tps = Self::parse_tps_from_output(&output.stdout);
2094
2095 let generated_text = Self::extract_generated_text(&output.stdout);
2097
2098 let (final_stderr, final_exit_code) = if output.success {
2100 (
2101 if output.stderr.is_empty() {
2102 None
2103 } else {
2104 Some(output.stderr)
2105 },
2106 output.exit_code,
2107 )
2108 } else {
2109 let trace_output = match scenario.modality {
2111 Modality::Run => self.command_runner.run_inference(
2112 Path::new(&model_path),
2113 &scenario.prompt,
2114 32,
2115 no_gpu,
2116 &["--trace"],
2117 ),
2118 Modality::Chat => self.command_runner.run_chat(
2119 Path::new(&model_path),
2120 &scenario.prompt,
2121 no_gpu,
2122 &["--trace"],
2123 ),
2124 Modality::Serve => {
2125 self.command_runner.run_inference(
2128 Path::new(&model_path),
2129 &scenario.prompt,
2130 32,
2131 no_gpu,
2132 &["--trace"],
2133 )
2134 }
2135 };
2136 let mut full_trace = output.stderr.clone();
2137 if !trace_output.stderr.is_empty() {
2138 full_trace.push_str("\n--- TRACE OUTPUT ---\n");
2139 full_trace.push_str(&trace_output.stderr);
2140 }
2141 if !trace_output.stdout.is_empty() {
2142 full_trace.push_str("\n--- TRACE STDOUT ---\n");
2143 full_trace.push_str(&trace_output.stdout);
2144 }
2145 (Some(full_trace), output.exit_code)
2146 };
2147
2148 (generated_text, final_stderr, final_exit_code, tps, false)
2149 }
2150
2151 fn run_serve_scenario(
2154 &self,
2155 model_path: &str,
2156 scenario: &QaScenario,
2157 no_gpu: bool,
2158 ) -> (String, Option<String>, i32, Option<f64>, bool) {
2159 let port = 18_080 + (scenario.seed % 1000) as u16;
2161
2162 let spawn_output =
2164 self.command_runner
2165 .spawn_serve(Path::new(model_path), port, no_gpu);
2166 if !spawn_output.success {
2167 return (
2168 String::new(),
2169 Some(format!("Failed to spawn serve: {}", spawn_output.stderr)),
2170 spawn_output.exit_code,
2171 None,
2172 false,
2173 );
2174 }
2175
2176 let pid_str = spawn_output.stdout.trim().to_string();
2177
2178 let health_url = format!("http://localhost:{port}/health");
2181 let mut server_ready = false;
2182 let server_pid: Option<u32> = pid_str.parse().ok();
2183 for _ in 0..60 {
2184 std::thread::sleep(std::time::Duration::from_secs(2));
2185 if let Some(pid) = server_pid {
2187 let alive = std::path::Path::new(&format!("/proc/{pid}")).exists();
2188 if !alive {
2189 break;
2190 }
2191 }
2192 if let Ok(output) = std::process::Command::new("curl")
2193 .args(["-s", "-m", "2", &health_url])
2194 .output()
2195 {
2196 let body = String::from_utf8_lossy(&output.stdout);
2197 if output.status.success() && body.contains("healthy") {
2198 server_ready = true;
2199 break;
2200 }
2201 }
2202 }
2203 if !server_ready {
2204 if pid_str.parse::<u32>().is_ok() {
2206 let _ = std::process::Command::new("kill")
2207 .arg(&pid_str)
2208 .output();
2209 }
2210 return (
2211 String::new(),
2212 Some("Server failed to become ready within 120s".to_string()),
2213 1,
2214 None,
2215 false,
2216 );
2217 }
2218
2219 let body = format!(
2221 r#"{{"prompt":"{}","max_tokens":32}}"#,
2222 scenario.prompt.replace('"', "\\\""),
2223 );
2224 let url = format!("http://localhost:{port}/generate");
2225 let output = self.command_runner.http_post(&url, &body);
2226
2227 if pid_str.parse::<u32>().is_ok() {
2229 let _ = std::process::Command::new("kill")
2230 .arg(&pid_str)
2231 .output();
2232 }
2233
2234 let tps = Self::parse_tps_from_output(&output.stdout);
2235 let generated_text = Self::extract_generated_text(&output.stdout);
2236
2237 let (final_stderr, final_exit_code) = if output.success {
2238 (
2239 if output.stderr.is_empty() {
2240 None
2241 } else {
2242 Some(output.stderr)
2243 },
2244 output.exit_code,
2245 )
2246 } else {
2247 (Some(output.stderr), output.exit_code)
2248 };
2249
2250 (generated_text, final_stderr, final_exit_code, tps, false)
2251 }
2252
2253 fn resolve_model_path(&self, scenario: &QaScenario) -> Option<String> {
2261 let model_path = self.config.model_path.as_deref().unwrap_or(".");
2262 let path = Path::new(model_path);
2263
2264 let is_sharded_index = model_path.ends_with(".safetensors.index.json");
2267 if is_sharded_index {
2268 if scenario.format == Format::SafeTensors {
2269 return Some(model_path.to_string());
2270 }
2271 if let Some(parent) = path.parent() {
2273 let target_ext = match scenario.format {
2274 Format::Gguf => "gguf",
2275 Format::SafeTensors => unreachable!(),
2276 Format::Apr => "apr",
2277 };
2278 if let Some(found) = Self::find_clean_model_file(parent, target_ext) {
2279 return Some(found);
2280 }
2281 }
2282 return None;
2283 }
2284
2285 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
2287 let is_model_extension = ext == "gguf" || ext == "safetensors" || ext == "apr";
2288
2289 if is_model_extension {
2290 let matches = match scenario.format {
2294 Format::Gguf => ext == "gguf",
2295 Format::SafeTensors => ext == "safetensors",
2296 Format::Apr => ext == "apr",
2297 };
2298 if matches {
2299 return Some(model_path.to_string());
2300 }
2301
2302 let target_ext = match scenario.format {
2304 Format::Gguf => "gguf",
2305 Format::SafeTensors => "safetensors",
2306 Format::Apr => "apr",
2307 };
2308 if let Some(parent) = path.parent() {
2309 if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
2310 let sibling = parent.join(format!("{stem}.{target_ext}"));
2312 if sibling.exists() {
2313 return Some(sibling.to_string_lossy().to_string());
2314 }
2315 let prefix = Self::extract_model_family_prefix(stem);
2318 if let Some(found) =
2319 Self::find_model_by_prefix(parent, &prefix, target_ext)
2320 {
2321 return Some(found);
2322 }
2323 }
2324 }
2325 return None;
2326 }
2327
2328 let (subdir, extension) = match scenario.format {
2330 Format::Gguf => ("gguf", "gguf"),
2331 Format::Apr => ("apr", "apr"),
2332 Format::SafeTensors => ("safetensors", "safetensors"),
2333 };
2334
2335 let resolved = path.join(subdir).join(format!("model.{extension}"));
2337 if resolved.exists() {
2338 return Some(resolved.to_string_lossy().to_string());
2339 }
2340
2341 if extension == "safetensors" {
2344 let sharded_index = path.join(subdir).join("model.safetensors.index.json");
2345 if sharded_index.exists() {
2346 return Some(sharded_index.to_string_lossy().to_string());
2347 }
2348 }
2349
2350 let flat_resolved = path.join(format!("model.{extension}"));
2352 if flat_resolved.exists() {
2353 return Some(flat_resolved.to_string_lossy().to_string());
2354 }
2355
2356 let format_dir = path.join(subdir);
2358 if let Some(found) = Self::find_clean_model_file(&format_dir, extension) {
2359 return Some(found);
2360 }
2361
2362 if let Some(found) = Self::find_clean_model_file(path, extension) {
2364 return Some(found);
2365 }
2366
2367 None
2369 }
2370
2371 fn find_clean_model_file(dir: &Path, extension: &str) -> Option<String> {
2376 let entries = std::fs::read_dir(dir).ok()?;
2377
2378 for entry in entries.flatten() {
2379 let ep = entry.path();
2380
2381 if ep.extension().is_none_or(|e| e != extension) {
2383 continue;
2384 }
2385
2386 let filename = ep.file_name()?.to_str()?;
2388
2389 if filename.contains("converted")
2391 || filename.contains(".idem")
2392 || filename.contains(".com_")
2393 || filename.contains(".rt_")
2394 {
2395 continue;
2396 }
2397
2398 return Some(ep.to_string_lossy().to_string());
2399 }
2400
2401 None
2402 }
2403
2404 fn extract_model_family_prefix(stem: &str) -> String {
2412 let lower = stem.to_lowercase();
2413 let suffixes = [
2415 "-q4k", "-q4_k_m", "-q4_k_s", "-q6_k", "-q8_0", "-q5_k_m",
2416 "-q2_k", "-q3_k_m", "-q3_k_s", "-f16", "-f32",
2417 ];
2418 let mut result = stem.to_string();
2419 for suffix in &suffixes {
2420 if lower.ends_with(suffix) {
2421 result.truncate(result.len() - suffix.len());
2422 break;
2423 }
2424 }
2425 let lower_result = result.to_lowercase();
2427 if lower_result.ends_with("-instruct") {
2428 result.truncate(result.len() - "-instruct".len());
2429 }
2430 result
2431 }
2432
2433 fn find_model_by_prefix(dir: &Path, prefix: &str, extension: &str) -> Option<String> {
2438 let entries = std::fs::read_dir(dir).ok()?;
2439 let lower_prefix = prefix.to_lowercase();
2440
2441 for entry in entries.flatten() {
2442 let ep = entry.path();
2443
2444 if ep.extension().is_none_or(|e| e != extension) {
2445 continue;
2446 }
2447
2448 let filename = ep.file_name()?.to_str()?;
2449
2450 if filename.contains("converted")
2452 || filename.contains(".idem")
2453 || filename.contains(".com_")
2454 || filename.contains(".rt_")
2455 {
2456 continue;
2457 }
2458
2459 let stem = ep.file_stem()?.to_str()?;
2461 if stem.to_lowercase().starts_with(&lower_prefix) {
2462 return Some(ep.to_string_lossy().to_string());
2463 }
2464 }
2465
2466 None
2467 }
2468
2469 fn strip_ansi(s: &str) -> String {
2471 let mut result = String::with_capacity(s.len());
2472 let mut chars = s.chars();
2473 while let Some(c) = chars.next() {
2474 if c == '\x1b' {
2475 if chars.next() == Some('[') {
2477 for esc_c in chars.by_ref() {
2478 if esc_c.is_ascii_alphabetic() {
2479 break;
2480 }
2481 }
2482 }
2483 } else {
2484 result.push(c);
2485 }
2486 }
2487 result
2488 }
2489
2490 fn parse_tps_from_output(output: &str) -> Option<f64> {
2492 output.find("tok/s:").and_then(|pos| {
2494 let rest = &output[pos + 6..];
2495 let tps_str: String = rest
2496 .chars()
2497 .skip_while(|c| c.is_whitespace())
2498 .take_while(|c| c.is_ascii_digit() || *c == '.')
2499 .collect();
2500 tps_str.parse().ok()
2501 })
2502 }
2503
2504 fn extract_generated_text(output: &str) -> String {
2506 output
2509 .lines()
2510 .filter(|line| !line.starts_with("===") && !line.contains("tok/s"))
2511 .collect::<Vec<_>>()
2512 .join("\n")
2513 .trim()
2514 .to_string()
2515 }
2516
2517 fn format_scenario(model_id: &ModelId, format: Format) -> QaScenario {
2519 QaScenario::new(
2520 model_id.clone(),
2521 Modality::Run,
2522 Backend::Cpu,
2523 format,
2524 format!("G0 Format: prepare {format:?} workspace"),
2525 0,
2526 )
2527 }
2528
2529 fn find_sibling_model_files(model_file: &Path) -> Vec<(PathBuf, String)> {
2535 let Some(parent) = model_file.parent() else {
2536 return Vec::new();
2537 };
2538 let Some(stem) = model_file.file_name().and_then(|n| n.to_str()) else {
2539 return Vec::new();
2540 };
2541 let Some(hash_prefix) = stem.strip_suffix(".safetensors") else {
2542 return Vec::new();
2543 };
2544
2545 let prefix_dot = format!("{hash_prefix}.");
2546 let Ok(entries) = std::fs::read_dir(parent) else {
2547 return Vec::new();
2548 };
2549
2550 entries
2551 .flatten()
2552 .filter_map(|entry| {
2553 let path = entry.path();
2554 let name = path.file_name()?.to_str()?.to_string();
2555 if name == stem {
2557 return None;
2558 }
2559 let canonical = name.strip_prefix(&prefix_dot)?;
2561 Some((path, canonical.to_string()))
2562 })
2563 .collect()
2564 }
2565
2566 #[allow(clippy::too_many_lines)]
2577 fn prepare_model_workspace(
2578 &mut self,
2579 source_file: &Path,
2580 model_id: &ModelId,
2581 requested_formats: &[Format],
2582 ) -> (String, usize, usize) {
2583 let output_dir = self.config.output_dir.as_deref().unwrap_or("output");
2584 let workspace = PathBuf::from(output_dir)
2585 .join("workspace")
2586 .join(&model_id.org)
2587 .join(&model_id.name);
2588
2589 let mut passed = 0;
2590 let mut failed = 0;
2591
2592 let st_dir = workspace.join("safetensors");
2594 if let Err(e) = std::fs::create_dir_all(&st_dir) {
2595 let ev = Evidence::falsified(
2596 "G0-FORMAT-WORKSPACE-001",
2597 Self::format_scenario(model_id, Format::SafeTensors),
2598 format!("Failed to create workspace directory: {e}"),
2599 "N/A",
2600 0,
2601 );
2602 self.collector.add(ev);
2603 return (workspace.to_string_lossy().to_string(), 0, 1);
2604 }
2605
2606 let is_sharded = source_file
2608 .file_name()
2609 .is_some_and(|n| n.to_string_lossy().ends_with(".safetensors.index.json"));
2610
2611 if is_sharded {
2612 let Some(source_dir) = source_file.parent() else {
2614 let ev = Evidence::falsified(
2615 "G0-FORMAT-WORKSPACE-001",
2616 Self::format_scenario(model_id, Format::SafeTensors),
2617 "Sharded model has no parent directory".to_string(),
2618 "N/A",
2619 0,
2620 );
2621 self.collector.add(ev);
2622 return (workspace.to_string_lossy().to_string(), 0, 1);
2623 };
2624
2625 if let Ok(entries) = std::fs::read_dir(source_dir) {
2627 for entry in entries.flatten() {
2628 let src_path = entry.path();
2629 let Some(filename) = src_path.file_name() else {
2630 continue;
2631 };
2632 let link_path = st_dir.join(filename);
2633 let _ = std::fs::remove_file(&link_path);
2634 #[cfg(unix)]
2635 let _ = std::os::unix::fs::symlink(&src_path, &link_path);
2636 #[cfg(not(unix))]
2637 let _ = std::fs::copy(&src_path, &link_path);
2638 }
2639 }
2640 } else {
2641 let st_link = st_dir.join("model.safetensors");
2643 let _ = std::fs::remove_file(&st_link);
2644 #[cfg(unix)]
2645 let link_result = std::os::unix::fs::symlink(source_file, &st_link);
2646 #[cfg(not(unix))]
2647 let link_result = std::fs::copy(source_file, &st_link).map(|_| ());
2648
2649 if let Err(e) = link_result {
2650 let ev = Evidence::falsified(
2651 "G0-FORMAT-WORKSPACE-001",
2652 Self::format_scenario(model_id, Format::SafeTensors),
2653 format!("Failed to symlink model file: {e}"),
2654 "N/A",
2655 0,
2656 );
2657 self.collector.add(ev);
2658 return (workspace.to_string_lossy().to_string(), 0, 1);
2659 }
2660
2661 let siblings = Self::find_sibling_model_files(source_file);
2663 for (src_path, canonical_name) in &siblings {
2664 let link_path = st_dir.join(canonical_name);
2665 let _ = std::fs::remove_file(&link_path);
2666 #[cfg(unix)]
2667 let _ = std::os::unix::fs::symlink(src_path, &link_path);
2668 #[cfg(not(unix))]
2669 let _ = std::fs::copy(src_path, &link_path);
2670 }
2671 }
2672
2673 if !is_sharded {
2677 for format in requested_formats {
2678 if *format == Format::SafeTensors {
2679 continue;
2680 }
2681
2682 let (subdir, ext, gate_id) = match format {
2683 Format::Apr => ("apr", "apr", "G0-FORMAT-APR-001"),
2684 Format::Gguf => ("gguf", "gguf", "G0-FORMAT-GGUF-001"),
2685 Format::SafeTensors => unreachable!(),
2686 };
2687
2688 let format_dir = workspace.join(subdir);
2689 if let Err(e) = std::fs::create_dir_all(&format_dir) {
2690 let ev = Evidence::falsified(
2691 gate_id,
2692 Self::format_scenario(model_id, *format),
2693 format!("Failed to create {subdir} directory: {e}"),
2694 "N/A",
2695 0,
2696 );
2697 self.collector.add(ev);
2698 failed += 1;
2699 continue;
2700 }
2701
2702 let target = format_dir.join(format!("model.{ext}"));
2703 let start = Instant::now();
2704 let output = self.command_runner.convert_model(source_file, &target);
2705 let duration = start.elapsed().as_millis() as u64;
2706
2707 if output.success {
2708 let ev = Evidence::corroborated(
2709 gate_id,
2710 Self::format_scenario(model_id, *format),
2711 &format!("G0 PASS: converted to {subdir}\n{}", output.stdout),
2712 duration,
2713 );
2714 self.collector.add(ev);
2715 passed += 1;
2716 } else {
2717 let ev = Evidence::falsified(
2718 gate_id,
2719 Self::format_scenario(model_id, *format),
2720 format!("G0 FAIL: conversion to {subdir} failed: {}", output.stderr),
2721 &output.stdout,
2722 duration,
2723 );
2724 self.collector.add(ev);
2725 failed += 1;
2726 }
2727 }
2728 }
2729
2730 (workspace.to_string_lossy().to_string(), passed, failed)
2731 }
2732
2733 fn check_gateways(&self, _playbook: &Playbook) -> Result<()> {
2735 Ok(())
2743 }
2744
2745 #[must_use]
2747 pub fn evidence(&self) -> &EvidenceCollector {
2748 &self.collector
2749 }
2750
2751 #[must_use]
2753 pub fn config(&self) -> &ExecutionConfig {
2754 &self.config
2755 }
2756}
2757
2758impl Default for Executor {
2759 fn default() -> Self {
2760 Self::new()
2761 }
2762}
2763
2764#[allow(dead_code)] pub struct ToolExecutor {
2767 model_path: String,
2768 no_gpu: bool,
2769 timeout_ms: u64,
2770 command_runner: Arc<dyn CommandRunner>,
2771}
2772
2773impl std::fmt::Debug for ToolExecutor {
2774 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2775 f.debug_struct("ToolExecutor")
2776 .field("model_path", &self.model_path)
2777 .field("no_gpu", &self.no_gpu)
2778 .field("timeout_ms", &self.timeout_ms)
2779 .field("command_runner", &"<dyn CommandRunner>")
2780 .finish()
2781 }
2782}
2783
2784impl ToolExecutor {
2785 #[must_use]
2787 pub fn new(model_path: String, no_gpu: bool, timeout_ms: u64) -> Self {
2788 Self {
2789 model_path,
2790 no_gpu,
2791 timeout_ms,
2792 command_runner: Arc::new(RealCommandRunner::new()),
2793 }
2794 }
2795
2796 #[must_use]
2798 pub fn with_runner(
2799 model_path: String,
2800 no_gpu: bool,
2801 timeout_ms: u64,
2802 runner: Arc<dyn CommandRunner>,
2803 ) -> Self {
2804 Self {
2805 model_path,
2806 no_gpu,
2807 timeout_ms,
2808 command_runner: runner,
2809 }
2810 }
2811
2812 #[must_use]
2814 pub fn execute_inspect(&self) -> ToolTestResult {
2815 let start = std::time::Instant::now();
2816 let output = self
2817 .command_runner
2818 .inspect_model(Path::new(&self.model_path));
2819 self.build_result_from_output("inspect", output, start)
2820 }
2821
2822 #[must_use]
2830 pub fn execute_inspect_verified(&self) -> ToolTestResult {
2831 let start = std::time::Instant::now();
2832
2833 match crate::differential::run_inspect(Path::new(&self.model_path), "apr") {
2834 Ok(inspect) => {
2835 let duration_ms = start.elapsed().as_millis() as u64;
2836 let mut issues = Vec::new();
2837
2838 if inspect.tensor_count == 0 {
2840 issues.push("tensor_count is 0".to_string());
2841 }
2842
2843 if let Some(heads) = inspect.num_attention_heads {
2845 if heads == 0 {
2846 issues.push("num_attention_heads is 0".to_string());
2847 }
2848 }
2849
2850 if let Some(kv_heads) = inspect.num_key_value_heads {
2851 if kv_heads == 0 {
2852 issues.push("num_key_value_heads is 0".to_string());
2853 }
2854 }
2855
2856 if let Some(hidden) = inspect.hidden_size {
2857 if hidden == 0 {
2858 issues.push("hidden_size is 0".to_string());
2859 }
2860 }
2861
2862 let passed = issues.is_empty();
2863 let stdout = format!(
2864 "tensor_count={}, num_attention_heads={:?}, num_key_value_heads={:?}, \
2865 hidden_size={:?}, architecture={:?}",
2866 inspect.tensor_count,
2867 inspect.num_attention_heads,
2868 inspect.num_key_value_heads,
2869 inspect.hidden_size,
2870 inspect.architecture,
2871 );
2872
2873 ToolTestResult {
2874 tool: "inspect-verified".to_string(),
2875 passed,
2876 exit_code: i32::from(!passed),
2877 stdout,
2878 stderr: if passed {
2879 String::new()
2880 } else {
2881 format!("Metadata issues: {}", issues.join(", "))
2882 },
2883 duration_ms,
2884 gate_id: "F-INSPECT-META-001".to_string(),
2885 }
2886 }
2887 Err(e) => {
2888 let duration_ms = start.elapsed().as_millis() as u64;
2889 ToolTestResult {
2890 tool: "inspect-verified".to_string(),
2891 passed: false,
2892 exit_code: -1,
2893 stdout: String::new(),
2894 stderr: format!("Failed to run inspect: {e}"),
2895 duration_ms,
2896 gate_id: "F-INSPECT-META-001".to_string(),
2897 }
2898 }
2899 }
2900 }
2901
2902 #[must_use]
2904 pub fn execute_validate(&self) -> ToolTestResult {
2905 let start = std::time::Instant::now();
2906 let output = self
2907 .command_runner
2908 .validate_model(Path::new(&self.model_path));
2909 self.build_result_from_output("validate", output, start)
2910 }
2911
2912 #[must_use]
2914 pub fn execute_bench(&self) -> ToolTestResult {
2915 let start = std::time::Instant::now();
2916 let output = self.command_runner.bench_model(Path::new(&self.model_path));
2917 self.build_result_from_output("bench", output, start)
2918 }
2919
2920 #[must_use]
2922 pub fn execute_check(&self) -> ToolTestResult {
2923 let start = std::time::Instant::now();
2924 let output = self.command_runner.check_model(Path::new(&self.model_path));
2925 self.build_result_from_output("check", output, start)
2926 }
2927
2928 #[must_use]
2930 pub fn execute_trace(&self, level: &str) -> ToolTestResult {
2931 let start = std::time::Instant::now();
2932 let output = self.command_runner.run_inference(
2933 Path::new(&self.model_path),
2934 "What is 2+2?",
2935 8,
2936 self.no_gpu,
2937 &["--trace", "--trace-level", level],
2938 );
2939 self.build_result_from_output(&format!("trace-{level}"), output, start)
2940 }
2941
2942 #[must_use]
2944 pub fn execute_profile(&self) -> ToolTestResult {
2945 let start = std::time::Instant::now();
2946 let output = self
2947 .command_runner
2948 .profile_model(Path::new(&self.model_path), 1, 2);
2949 self.build_result_from_output("profile", output, start)
2950 }
2951
2952 #[must_use]
2961 pub fn execute_profile_ci(&self) -> ToolTestResult {
2962 let start = std::time::Instant::now();
2963
2964 let output = self.command_runner.profile_ci(
2967 Path::new(&self.model_path),
2968 Some(1.0), None, 1, 2, );
2973
2974 let duration_ms = start.elapsed().as_millis() as u64;
2975
2976 if output.stderr.contains("unexpected argument")
2978 || output.stderr.contains("unrecognized")
2979 || output.stderr.contains("--ci")
2980 {
2981 return ToolTestResult {
2982 tool: "profile-ci".to_string(),
2983 passed: false,
2984 exit_code: -2,
2985 stdout: output.stdout,
2986 stderr: "Feature not available: apr profile does not support --ci mode".to_string(),
2987 duration_ms,
2988 gate_id: "F-PROFILE-006".to_string(),
2989 };
2990 }
2991
2992 let has_passed_field = output.stdout.contains("\"passed\"");
2994 let has_metrics = output.stdout.contains("throughput") || output.stdout.contains("tok_s");
2995
2996 let passed = output.exit_code == 0 && (has_passed_field || has_metrics);
2997
2998 ToolTestResult {
2999 tool: "profile-ci".to_string(),
3000 passed,
3001 exit_code: output.exit_code,
3002 stdout: output.stdout,
3003 stderr: output.stderr,
3004 duration_ms,
3005 gate_id: "F-PROFILE-006".to_string(),
3006 }
3007 }
3008
3009 #[must_use]
3014 pub fn execute_profile_ci_assertion_failure(&self) -> ToolTestResult {
3015 let start = std::time::Instant::now();
3016
3017 let output = self.command_runner.profile_ci(
3019 Path::new(&self.model_path),
3020 Some(1_000_000.0), None,
3022 1, 1, );
3025
3026 let duration_ms = start.elapsed().as_millis() as u64;
3027
3028 if output.stderr.contains("unexpected argument") || output.stderr.contains("unrecognized") {
3030 return ToolTestResult {
3031 tool: "profile-ci-assertion".to_string(),
3032 passed: false,
3033 exit_code: -2,
3034 stdout: output.stdout,
3035 stderr: "Feature not available: apr profile does not support --ci mode".to_string(),
3036 duration_ms,
3037 gate_id: "F-PROFILE-007".to_string(),
3038 };
3039 }
3040
3041 let assertion_failed_correctly = output.exit_code == 1
3045 || output.stdout.contains("\"passed\":false")
3046 || output.stdout.contains("\"passed\": false")
3047 || output.stdout.contains("ASSERTIONS FAILED");
3048
3049 ToolTestResult {
3050 tool: "profile-ci-assertion".to_string(),
3051 passed: assertion_failed_correctly,
3052 exit_code: output.exit_code,
3053 stdout: output.stdout,
3054 stderr: output.stderr,
3055 duration_ms,
3056 gate_id: "F-PROFILE-007".to_string(),
3057 }
3058 }
3059
3060 #[must_use]
3062 pub fn execute_profile_ci_p99(&self) -> ToolTestResult {
3063 let start = std::time::Instant::now();
3064
3065 let output = self.command_runner.profile_ci(
3067 Path::new(&self.model_path),
3068 None, Some(10_000.0), 1, 2, );
3073
3074 let duration_ms = start.elapsed().as_millis() as u64;
3075
3076 if output.stderr.contains("unexpected argument") || output.stderr.contains("--assert-p99") {
3078 return ToolTestResult {
3079 tool: "profile-ci-p99".to_string(),
3080 passed: false,
3081 exit_code: -2,
3082 stdout: output.stdout,
3083 stderr: "Feature not available: apr profile does not support --assert-p99"
3084 .to_string(),
3085 duration_ms,
3086 gate_id: "F-PROFILE-008".to_string(),
3087 };
3088 }
3089
3090 let has_p99 = output.stdout.contains("p99") || output.stdout.contains("latency");
3092 let passed = output.exit_code == 0 && has_p99;
3093
3094 ToolTestResult {
3095 tool: "profile-ci-p99".to_string(),
3096 passed,
3097 exit_code: output.exit_code,
3098 stdout: output.stdout,
3099 stderr: output.stderr,
3100 duration_ms,
3101 gate_id: "F-PROFILE-008".to_string(),
3102 }
3103 }
3104
3105 #[must_use]
3110 pub fn execute_profile_flamegraph(&self, output_path: &std::path::Path) -> ToolTestResult {
3111 let start = std::time::Instant::now();
3112
3113 let svg_path = output_path.join("profile_flamegraph.svg");
3114 let output = self.command_runner.profile_with_flamegraph(
3115 Path::new(&self.model_path),
3116 &svg_path,
3117 self.no_gpu,
3118 );
3119 let duration_ms = start.elapsed().as_millis() as u64;
3120
3121 if output.stderr.contains("unexpected argument") || output.stderr.contains("unrecognized") {
3123 return ToolTestResult {
3124 tool: "profile-flamegraph".to_string(),
3125 passed: false,
3126 exit_code: -2,
3127 stdout: output.stdout,
3128 stderr: "Feature not available: apr does not support --profile-output".to_string(),
3129 duration_ms,
3130 gate_id: "F-PROFILE-002".to_string(),
3131 };
3132 }
3133
3134 let flamegraph_exists = svg_path.exists();
3136 let flamegraph_valid = if flamegraph_exists {
3137 std::fs::read_to_string(&svg_path)
3138 .map(|content| content.contains("<svg") && content.contains("</svg>"))
3139 .unwrap_or(false)
3140 } else {
3141 false
3142 };
3143
3144 ToolTestResult {
3145 tool: "profile-flamegraph".to_string(),
3146 passed: flamegraph_valid,
3147 exit_code: i32::from(!flamegraph_valid),
3148 stdout: format!("Flamegraph exists: {flamegraph_exists}, valid: {flamegraph_valid}"),
3149 stderr: output.stderr,
3150 duration_ms,
3151 gate_id: "F-PROFILE-002".to_string(),
3152 }
3153 }
3154
3155 #[must_use]
3160 pub fn execute_profile_focus(&self, focus: &str) -> ToolTestResult {
3161 let start = std::time::Instant::now();
3162
3163 let output =
3164 self.command_runner
3165 .profile_with_focus(Path::new(&self.model_path), focus, self.no_gpu);
3166 let duration_ms = start.elapsed().as_millis() as u64;
3167
3168 if output.stderr.contains("unexpected argument") || output.stderr.contains("unrecognized") {
3170 return ToolTestResult {
3171 tool: "profile-focus".to_string(),
3172 passed: false,
3173 exit_code: -2,
3174 stdout: output.stdout,
3175 stderr: format!("Feature not available: apr does not support --focus {focus}"),
3176 duration_ms,
3177 gate_id: "F-PROFILE-003".to_string(),
3178 };
3179 }
3180
3181 let passed = output.success;
3182
3183 ToolTestResult {
3184 tool: "profile-focus".to_string(),
3185 passed,
3186 exit_code: output.exit_code,
3187 stdout: output.stdout,
3188 stderr: output.stderr,
3189 duration_ms,
3190 gate_id: "F-PROFILE-003".to_string(),
3191 }
3192 }
3193
3194 #[must_use]
3199 pub fn execute_backend_equivalence(&self) -> ToolTestResult {
3200 use std::process::Command;
3201 let start = std::time::Instant::now();
3202
3203 let prompt = "What is 2+2?";
3204
3205 let cpu_output = Command::new("apr")
3207 .arg("run")
3208 .arg(&self.model_path)
3209 .arg("-p")
3210 .arg(prompt)
3211 .arg("--max-tokens")
3212 .arg("8")
3213 .arg("--no-gpu")
3214 .output();
3215
3216 let cpu_result = match cpu_output {
3217 Ok(out) => {
3218 if out.status.success() {
3219 Some(String::from_utf8_lossy(&out.stdout).to_string())
3220 } else {
3221 None
3222 }
3223 }
3224 Err(_) => None,
3225 };
3226
3227 let gpu_output = Command::new("apr")
3229 .arg("run")
3230 .arg(&self.model_path)
3231 .arg("-p")
3232 .arg(prompt)
3233 .arg("--max-tokens")
3234 .arg("8")
3235 .arg("--gpu")
3236 .output();
3237
3238 let gpu_result = match gpu_output {
3239 Ok(out) => {
3240 let stderr = String::from_utf8_lossy(&out.stderr);
3241 if stderr.contains("No GPU") || stderr.contains("CUDA") || !out.status.success() {
3243 None } else {
3245 Some(String::from_utf8_lossy(&out.stdout).to_string())
3246 }
3247 }
3248 Err(_) => None,
3249 };
3250
3251 let duration_ms = start.elapsed().as_millis() as u64;
3252
3253 match (cpu_result, gpu_result) {
3254 (Some(cpu), Some(gpu)) => {
3255 let equivalent = cpu.trim() == gpu.trim();
3257 ToolTestResult {
3258 tool: "backend-equivalence".to_string(),
3259 passed: equivalent,
3260 exit_code: i32::from(!equivalent),
3261 stdout: format!("CPU: {}\nGPU: {}", cpu.trim(), gpu.trim()),
3262 stderr: if equivalent {
3263 String::new()
3264 } else {
3265 "CPU and GPU outputs differ".to_string()
3266 },
3267 duration_ms,
3268 gate_id: "F-CONV-BE-001".to_string(),
3269 }
3270 }
3271 (Some(_), None) => ToolTestResult {
3272 tool: "backend-equivalence".to_string(),
3273 passed: false,
3274 exit_code: -2,
3275 stdout: String::new(),
3276 stderr: "GPU not available - skipping backend equivalence test".to_string(),
3277 duration_ms,
3278 gate_id: "F-CONV-BE-001".to_string(),
3279 },
3280 _ => ToolTestResult {
3281 tool: "backend-equivalence".to_string(),
3282 passed: false,
3283 exit_code: -1,
3284 stdout: String::new(),
3285 stderr: "Failed to run inference on both backends".to_string(),
3286 duration_ms,
3287 gate_id: "F-CONV-BE-001".to_string(),
3288 },
3289 }
3290 }
3291
3292 #[must_use]
3300 pub fn execute_serve_lifecycle(&self) -> ToolTestResult {
3301 use std::io::{BufRead, BufReader};
3302 use std::process::{Command, Stdio};
3303 use std::time::Duration;
3304
3305 let start = std::time::Instant::now();
3306 let port = 18080; let mut server_cmd = Command::new("apr");
3310 server_cmd
3311 .arg("serve")
3312 .arg(&self.model_path)
3313 .arg("--port")
3314 .arg(port.to_string())
3315 .stdout(Stdio::piped())
3316 .stderr(Stdio::piped());
3317
3318 if self.no_gpu {
3319 server_cmd.arg("--no-gpu");
3320 }
3321
3322 let mut server = match server_cmd.spawn() {
3323 Ok(child) => child,
3324 Err(e) => {
3325 return ToolTestResult {
3326 tool: "serve-lifecycle".to_string(),
3327 passed: false,
3328 exit_code: -1,
3329 stdout: String::new(),
3330 stderr: format!("Failed to start server: {e}"),
3331 duration_ms: start.elapsed().as_millis() as u64,
3332 gate_id: "F-INTEG-003".to_string(),
3333 };
3334 }
3335 };
3336
3337 let stderr = server.stderr.take();
3339 let ready = stderr.map_or_else(
3340 || {
3341 std::thread::sleep(Duration::from_secs(3));
3343 true
3344 },
3345 |stderr| {
3346 let reader = BufReader::new(stderr);
3347 let mut ready = false;
3348 for line in reader.lines().take(20).flatten() {
3349 if line.contains("Listening") || line.contains("listening") {
3350 ready = true;
3351 break;
3352 }
3353 }
3354 ready
3355 },
3356 );
3357
3358 if !ready {
3359 std::thread::sleep(Duration::from_secs(2));
3361 }
3362
3363 let health_result = Command::new("curl")
3365 .arg("-sf")
3366 .arg(format!("http://localhost:{port}/health"))
3367 .arg("--connect-timeout")
3368 .arg("5")
3369 .output();
3370
3371 let health_ok = health_result.map(|o| o.status.success()).unwrap_or(false);
3372
3373 let inference_result = Command::new("curl")
3375 .arg("-sf")
3376 .arg("-X")
3377 .arg("POST")
3378 .arg(format!("http://localhost:{port}/v1/chat/completions"))
3379 .arg("-H")
3380 .arg("Content-Type: application/json")
3381 .arg("-d")
3382 .arg(r#"{"messages":[{"role":"user","content":"Hi"}],"max_tokens":5}"#)
3383 .arg("--connect-timeout")
3384 .arg("10")
3385 .output();
3386
3387 let inference_ok = inference_result
3388 .map(|o| o.status.success())
3389 .unwrap_or(false);
3390
3391 let _ = server.kill();
3393 let _ = server.wait();
3394
3395 let duration_ms = start.elapsed().as_millis() as u64;
3396
3397 let passed = health_ok && inference_ok;
3398 let stdout = format!(
3399 "Health check: {}\nInference: {}",
3400 if health_ok { "OK" } else { "FAILED" },
3401 if inference_ok { "OK" } else { "FAILED" }
3402 );
3403 let stderr = if passed {
3404 String::new()
3405 } else {
3406 format!("Serve lifecycle incomplete: health={health_ok}, inference={inference_ok}")
3407 };
3408
3409 ToolTestResult {
3410 tool: "serve-lifecycle".to_string(),
3411 passed,
3412 exit_code: i32::from(!passed),
3413 stdout,
3414 stderr,
3415 duration_ms,
3416 gate_id: "F-INTEG-003".to_string(),
3417 }
3418 }
3419
3420 #[must_use]
3422 pub fn execute_all(&self) -> Vec<ToolTestResult> {
3423 self.execute_all_with_serve(false)
3424 }
3425
3426 #[must_use]
3428 pub fn execute_all_with_serve(&self, include_serve: bool) -> Vec<ToolTestResult> {
3429 let mut results = vec![
3430 self.execute_inspect(),
3432 self.execute_inspect_verified(), self.execute_validate(),
3434 self.execute_check(),
3435 self.execute_bench(),
3436 ];
3437
3438 for level in &["none", "basic", "layer", "payload"] {
3440 results.push(self.execute_trace(level));
3441 }
3442
3443 results.push(self.execute_profile());
3445 results.push(self.execute_profile_ci());
3446 results.push(self.execute_profile_ci_assertion_failure());
3447 results.push(self.execute_profile_ci_p99());
3448
3449 if include_serve {
3451 results.push(self.execute_serve_lifecycle());
3452 }
3453
3454 results
3455 }
3456
3457 fn build_result_from_output(
3458 &self,
3459 tool: &str,
3460 output: crate::command::CommandOutput,
3461 start: std::time::Instant,
3462 ) -> ToolTestResult {
3463 let duration_ms = start.elapsed().as_millis() as u64;
3464
3465 ToolTestResult {
3466 tool: tool.to_string(),
3467 passed: output.success,
3468 exit_code: output.exit_code,
3469 stdout: output.stdout,
3470 stderr: output.stderr,
3471 duration_ms,
3472 gate_id: format!("F-{}-001", tool.to_uppercase().replace('-', "_")),
3473 }
3474 }
3475}
3476
3477#[derive(Debug, Clone)]
3479pub struct ToolTestResult {
3480 pub tool: String,
3482 pub passed: bool,
3484 pub exit_code: i32,
3486 pub stdout: String,
3488 pub stderr: String,
3490 pub duration_ms: u64,
3492 pub gate_id: String,
3494}
3495
3496impl ToolTestResult {
3497 #[must_use]
3499 pub fn to_evidence(&self, model_id: &ModelId) -> Evidence {
3500 let scenario = QaScenario::new(
3501 model_id.clone(),
3502 Modality::Run,
3503 Backend::Cpu,
3504 Format::Gguf,
3505 format!("apr {} test", self.tool),
3506 0,
3507 );
3508
3509 if self.passed {
3510 Evidence::corroborated(&self.gate_id, scenario, &self.stdout, self.duration_ms)
3511 } else {
3512 Evidence::falsified(
3513 &self.gate_id,
3514 scenario,
3515 format!("Exit code: {}, stderr: {}", self.exit_code, self.stderr),
3516 &self.stdout,
3517 self.duration_ms,
3518 )
3519 }
3520 }
3521}
3522
3523#[derive(Debug, Clone)]
3525pub struct ExecutionResult {
3526 pub playbook_name: String,
3528 pub total_scenarios: usize,
3530 pub passed: usize,
3532 pub failed: usize,
3534 pub skipped: usize,
3536 pub duration_ms: u64,
3538 pub gateway_failed: Option<String>,
3540 pub evidence: EvidenceCollector,
3542}
3543
3544impl ExecutionResult {
3545 #[must_use]
3547 pub fn is_success(&self) -> bool {
3548 self.gateway_failed.is_none() && self.failed == 0
3549 }
3550
3551 #[must_use]
3553 pub fn pass_rate(&self) -> f64 {
3554 if self.total_scenarios == 0 {
3555 return 0.0;
3556 }
3557 (self.passed as f64 / self.total_scenarios as f64) * 100.0
3558 }
3559}
3560
3561#[cfg(test)]
3562mod tests {
3563 use super::*;
3564 use apr_qa_gen::{Backend, Format, Modality, ModelId, QaScenario};
3565
3566 fn test_scenario() -> QaScenario {
3567 QaScenario::new(
3568 ModelId::new("test", "model"),
3569 Modality::Run,
3570 Backend::Cpu,
3571 Format::Gguf,
3572 "2+2=".to_string(),
3573 42,
3574 )
3575 }
3576
3577 fn test_playbook() -> Playbook {
3578 let yaml = r#"
3579name: test-playbook
3580version: "1.0.0"
3581model:
3582 hf_repo: "test/model"
3583 formats: [gguf]
3584test_matrix:
3585 modalities: [run]
3586 backends: [cpu]
3587 scenario_count: 5
3588"#;
3589 Playbook::from_yaml(yaml).expect("Failed to parse")
3590 }
3591
3592 fn create_test_model_file(format: Format) -> (tempfile::TempDir, String) {
3595 let tmp = tempfile::tempdir().unwrap();
3596 let filename = match format {
3597 Format::Gguf => "model.gguf",
3598 Format::Apr => "model.apr",
3599 Format::SafeTensors => "model.safetensors",
3600 };
3601 let file_path = tmp.path().join(filename);
3602 std::fs::write(&file_path, b"fake model data").unwrap();
3603 let path = file_path.to_string_lossy().to_string();
3604 (tmp, path)
3605 }
3606
3607 #[test]
3608 fn test_executor_dry_run() {
3609 let mock_runner = MockCommandRunner::new();
3610 let config = ExecutionConfig {
3611 dry_run: true,
3612 ..Default::default()
3613 };
3614 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
3615 let playbook = test_playbook();
3616
3617 let result = executor.execute(&playbook).expect("Execution failed");
3618
3619 assert_eq!(result.skipped, 5);
3620 assert!(result.passed >= 1);
3622 }
3623
3624 #[test]
3625 fn test_execution_result_pass_rate() {
3626 let result = ExecutionResult {
3627 playbook_name: "test".to_string(),
3628 total_scenarios: 100,
3629 passed: 95,
3630 failed: 5,
3631 skipped: 0,
3632 duration_ms: 1000,
3633 gateway_failed: None,
3634 evidence: EvidenceCollector::new(),
3635 };
3636
3637 assert!((result.pass_rate() - 95.0).abs() < f64::EPSILON);
3638 }
3639
3640 #[test]
3641 fn test_failure_policy_stop_on_first() {
3642 let config = ExecutionConfig {
3643 failure_policy: FailurePolicy::StopOnFirst,
3644 ..Default::default()
3645 };
3646 let executor = Executor::with_config(config);
3647 assert_eq!(executor.config.failure_policy, FailurePolicy::StopOnFirst);
3648 }
3649
3650 #[test]
3651 fn test_execution_config_default() {
3652 let config = ExecutionConfig::default();
3653 assert_eq!(config.failure_policy, FailurePolicy::StopOnP0);
3654 assert_eq!(config.default_timeout_ms, 60_000);
3655 assert_eq!(config.max_workers, 4);
3656 assert!(!config.dry_run);
3657 }
3658
3659 #[test]
3660 fn test_executor_default() {
3661 let executor = Executor::default();
3662 assert_eq!(executor.config.failure_policy, FailurePolicy::StopOnP0);
3663 }
3664
3665 #[test]
3666 fn test_executor_evidence() {
3667 let executor = Executor::new();
3668 let evidence = executor.evidence();
3669 assert_eq!(evidence.all().len(), 0);
3670 }
3671
3672 #[test]
3673 fn test_execution_result_is_success() {
3674 let success = ExecutionResult {
3675 playbook_name: "test".to_string(),
3676 total_scenarios: 10,
3677 passed: 10,
3678 failed: 0,
3679 skipped: 0,
3680 duration_ms: 100,
3681 gateway_failed: None,
3682 evidence: EvidenceCollector::new(),
3683 };
3684 assert!(success.is_success());
3685
3686 let with_failures = ExecutionResult {
3687 playbook_name: "test".to_string(),
3688 total_scenarios: 10,
3689 passed: 8,
3690 failed: 2,
3691 skipped: 0,
3692 duration_ms: 100,
3693 gateway_failed: None,
3694 evidence: EvidenceCollector::new(),
3695 };
3696 assert!(!with_failures.is_success());
3697
3698 let with_gateway_failure = ExecutionResult {
3699 playbook_name: "test".to_string(),
3700 total_scenarios: 10,
3701 passed: 0,
3702 failed: 0,
3703 skipped: 0,
3704 duration_ms: 100,
3705 gateway_failed: Some("G1 failed".to_string()),
3706 evidence: EvidenceCollector::new(),
3707 };
3708 assert!(!with_gateway_failure.is_success());
3709 }
3710
3711 #[test]
3712 fn test_execution_result_pass_rate_zero() {
3713 let result = ExecutionResult {
3714 playbook_name: "test".to_string(),
3715 total_scenarios: 0,
3716 passed: 0,
3717 failed: 0,
3718 skipped: 0,
3719 duration_ms: 0,
3720 gateway_failed: None,
3721 evidence: EvidenceCollector::new(),
3722 };
3723 assert!((result.pass_rate() - 0.0).abs() < f64::EPSILON);
3724 }
3725
3726 #[test]
3727 fn test_failure_policy_default() {
3728 let policy = FailurePolicy::default();
3729 assert_eq!(policy, FailurePolicy::StopOnP0);
3730 }
3731
3732 #[test]
3733 fn test_failure_policy_debug() {
3734 let policy = FailurePolicy::CollectAll;
3735 let debug_str = format!("{policy:?}");
3736 assert!(debug_str.contains("CollectAll"));
3737 }
3738
3739 #[test]
3740 fn test_executor_with_collect_all_policy() {
3741 let config = ExecutionConfig {
3742 failure_policy: FailurePolicy::CollectAll,
3743 ..Default::default()
3744 };
3745 let executor = Executor::with_config(config);
3746 assert_eq!(executor.config.failure_policy, FailurePolicy::CollectAll);
3747 }
3748
3749 #[test]
3750 fn test_executor_with_stop_on_p0_policy() {
3751 let config = ExecutionConfig {
3752 failure_policy: FailurePolicy::StopOnP0,
3753 ..Default::default()
3754 };
3755 let executor = Executor::with_config(config);
3756 assert_eq!(executor.config.failure_policy, FailurePolicy::StopOnP0);
3757 }
3758
3759 #[test]
3760 fn test_executor_config_clone() {
3761 let config = ExecutionConfig::default();
3762 let cloned = config.clone();
3763 assert_eq!(cloned.failure_policy, config.failure_policy);
3764 assert_eq!(cloned.max_workers, config.max_workers);
3765 }
3766
3767 #[test]
3768 fn test_execution_result_clone() {
3769 let result = ExecutionResult {
3770 playbook_name: "test".to_string(),
3771 total_scenarios: 10,
3772 passed: 10,
3773 failed: 0,
3774 skipped: 0,
3775 duration_ms: 100,
3776 gateway_failed: None,
3777 evidence: EvidenceCollector::new(),
3778 };
3779 let cloned = result.clone();
3780 assert_eq!(cloned.playbook_name, result.playbook_name);
3781 assert_eq!(cloned.total_scenarios, result.total_scenarios);
3782 }
3783
3784 #[test]
3785 fn test_check_gateways() {
3786 let executor = Executor::new();
3787 let playbook = test_playbook();
3788
3789 let result = executor.check_gateways(&playbook);
3790 assert!(result.is_ok());
3791 }
3792
3793 #[test]
3794 fn test_executor_debug() {
3795 let executor = Executor::new();
3796 let debug_str = format!("{executor:?}");
3797 assert!(debug_str.contains("Executor"));
3798 }
3799
3800 #[test]
3801 fn test_execution_config_debug() {
3802 let config = ExecutionConfig::default();
3803 let debug_str = format!("{config:?}");
3804 assert!(debug_str.contains("ExecutionConfig"));
3805 }
3806
3807 #[test]
3808 fn test_execution_result_debug() {
3809 let result = ExecutionResult {
3810 playbook_name: "test".to_string(),
3811 total_scenarios: 10,
3812 passed: 10,
3813 failed: 0,
3814 skipped: 0,
3815 duration_ms: 100,
3816 gateway_failed: None,
3817 evidence: EvidenceCollector::new(),
3818 };
3819 let debug_str = format!("{result:?}");
3820 assert!(debug_str.contains("ExecutionResult"));
3821 }
3822
3823 #[test]
3824 fn test_failure_policy_eq() {
3825 assert_eq!(FailurePolicy::StopOnFirst, FailurePolicy::StopOnFirst);
3826 assert_ne!(FailurePolicy::StopOnFirst, FailurePolicy::CollectAll);
3827 }
3828
3829 #[test]
3830 fn test_failure_policy_clone() {
3831 let policy = FailurePolicy::StopOnP0;
3832 let cloned = policy;
3833 assert_eq!(policy, cloned);
3834 }
3835
3836 #[test]
3837 fn test_failure_policy_fail_fast() {
3838 let policy = FailurePolicy::FailFast;
3839 assert!(policy.emit_diagnostic());
3840 assert!(policy.stops_on_any_failure());
3841 }
3842
3843 #[test]
3844 fn test_failure_policy_emit_diagnostic() {
3845 assert!(FailurePolicy::FailFast.emit_diagnostic());
3846 assert!(!FailurePolicy::StopOnFirst.emit_diagnostic());
3847 assert!(!FailurePolicy::StopOnP0.emit_diagnostic());
3848 assert!(!FailurePolicy::CollectAll.emit_diagnostic());
3849 }
3850
3851 #[test]
3852 fn test_failure_policy_stops_on_any_failure() {
3853 assert!(FailurePolicy::FailFast.stops_on_any_failure());
3854 assert!(FailurePolicy::StopOnFirst.stops_on_any_failure());
3855 assert!(!FailurePolicy::StopOnP0.stops_on_any_failure());
3856 assert!(!FailurePolicy::CollectAll.stops_on_any_failure());
3857 }
3858
3859 #[test]
3860 fn test_executor_custom_timeout() {
3861 let config = ExecutionConfig {
3862 default_timeout_ms: 30_000,
3863 ..Default::default()
3864 };
3865 let executor = Executor::with_config(config);
3866 assert_eq!(executor.config.default_timeout_ms, 30_000);
3867 }
3868
3869 #[test]
3870 fn test_executor_custom_workers() {
3871 let config = ExecutionConfig {
3872 max_workers: 8,
3873 ..Default::default()
3874 };
3875 let executor = Executor::with_config(config);
3876 assert_eq!(executor.config.max_workers, 8);
3877 }
3878
3879 #[test]
3880 fn test_tool_test_result_to_evidence_passed() {
3881 let result = ToolTestResult {
3882 tool: "inspect".to_string(),
3883 passed: true,
3884 exit_code: 0,
3885 stdout: "Model info...".to_string(),
3886 stderr: String::new(),
3887 duration_ms: 100,
3888 gate_id: "F-INSPECT-001".to_string(),
3889 };
3890
3891 let model_id = ModelId::new("test", "model");
3892 let evidence = result.to_evidence(&model_id);
3893
3894 assert!(evidence.outcome.is_pass());
3895 assert_eq!(evidence.gate_id, "F-INSPECT-001");
3896 }
3897
3898 #[test]
3899 fn test_tool_test_result_to_evidence_failed() {
3900 let result = ToolTestResult {
3901 tool: "validate".to_string(),
3902 passed: false,
3903 exit_code: 5,
3904 stdout: String::new(),
3905 stderr: "Validation failed".to_string(),
3906 duration_ms: 50,
3907 gate_id: "F-VALIDATE-001".to_string(),
3908 };
3909
3910 let model_id = ModelId::new("test", "model");
3911 let evidence = result.to_evidence(&model_id);
3912
3913 assert!(evidence.outcome.is_fail());
3914 assert!(!evidence.reason.is_empty());
3915 }
3916
3917 #[test]
3918 fn test_tool_test_result_clone() {
3919 let result = ToolTestResult {
3920 tool: "bench".to_string(),
3921 passed: true,
3922 exit_code: 0,
3923 stdout: "Benchmark output".to_string(),
3924 stderr: String::new(),
3925 duration_ms: 500,
3926 gate_id: "F-BENCH-001".to_string(),
3927 };
3928
3929 let cloned = result.clone();
3930 assert_eq!(cloned.tool, result.tool);
3931 assert_eq!(cloned.passed, result.passed);
3932 assert_eq!(cloned.exit_code, result.exit_code);
3933 }
3934
3935 #[test]
3936 fn test_tool_test_result_debug() {
3937 let result = ToolTestResult {
3938 tool: "profile".to_string(),
3939 passed: true,
3940 exit_code: 0,
3941 stdout: String::new(),
3942 stderr: String::new(),
3943 duration_ms: 1000,
3944 gate_id: "F-PROFILE-001".to_string(),
3945 };
3946
3947 let debug_str = format!("{result:?}");
3948 assert!(debug_str.contains("ToolTestResult"));
3949 assert!(debug_str.contains("profile"));
3950 }
3951
3952 #[test]
3953 fn test_tool_executor_new() {
3954 let executor = ToolExecutor::new("/path/to/model.gguf".to_string(), true, 60_000);
3955 assert!(executor.no_gpu);
3956 }
3957
3958 #[test]
3959 fn test_execution_config_no_gpu() {
3960 let config = ExecutionConfig {
3961 no_gpu: true,
3962 ..Default::default()
3963 };
3964 assert!(config.no_gpu);
3965 }
3966
3967 #[test]
3968 fn test_execution_config_conversion_tests() {
3969 let config = ExecutionConfig::default();
3971 assert!(config.run_conversion_tests);
3972
3973 let config_disabled = ExecutionConfig {
3975 run_conversion_tests: false,
3976 ..Default::default()
3977 };
3978 assert!(!config_disabled.run_conversion_tests);
3979 }
3980
3981 #[test]
3982 fn test_execution_result_with_skipped() {
3983 let result = ExecutionResult {
3984 playbook_name: "test".to_string(),
3985 total_scenarios: 10,
3986 passed: 5,
3987 failed: 2,
3988 skipped: 3,
3989 duration_ms: 100,
3990 gateway_failed: None,
3991 evidence: EvidenceCollector::new(),
3992 };
3993 assert_eq!(result.skipped, 3);
3994 let executed = result.passed + result.failed;
3996 assert_eq!(executed, 7);
3997 }
3998
3999 #[test]
4000 fn test_executor_config_method() {
4001 let executor = Executor::new();
4002 let config = executor.config();
4003 assert_eq!(config.failure_policy, FailurePolicy::StopOnP0);
4004 }
4005
4006 #[test]
4007 fn test_execution_config_differential_defaults() {
4008 let config = ExecutionConfig::default();
4009 assert!(config.run_differential_tests);
4011 assert!(config.run_trace_payload);
4012 assert!(!config.run_profile_ci);
4014 }
4015
4016 #[test]
4017 fn test_execution_config_differential_custom() {
4018 let config = ExecutionConfig {
4019 run_differential_tests: false,
4020 run_profile_ci: true,
4021 run_trace_payload: false,
4022 ..Default::default()
4023 };
4024 assert!(!config.run_differential_tests);
4025 assert!(config.run_profile_ci);
4026 assert!(!config.run_trace_payload);
4027 }
4028
4029 #[test]
4030 fn test_parse_tps_from_output_valid() {
4031 let output = "Some text tok/s: 12.34 more text";
4032 let tps = Executor::parse_tps_from_output(output);
4033 assert!(tps.is_some());
4034 assert!((tps.unwrap() - 12.34).abs() < f64::EPSILON);
4035 }
4036
4037 #[test]
4038 fn test_parse_tps_from_output_with_whitespace() {
4039 let output = "tok/s: 45.67";
4040 let tps = Executor::parse_tps_from_output(output);
4041 assert!(tps.is_some());
4042 assert!((tps.unwrap() - 45.67).abs() < f64::EPSILON);
4043 }
4044
4045 #[test]
4046 fn test_parse_tps_from_output_integer() {
4047 let output = "tok/s: 100";
4048 let tps = Executor::parse_tps_from_output(output);
4049 assert!(tps.is_some());
4050 assert!((tps.unwrap() - 100.0).abs() < f64::EPSILON);
4051 }
4052
4053 #[test]
4054 fn test_parse_tps_from_output_not_found() {
4055 let output = "no tokens per second here";
4056 let tps = Executor::parse_tps_from_output(output);
4057 assert!(tps.is_none());
4058 }
4059
4060 #[test]
4061 fn test_parse_tps_from_output_empty() {
4062 let output = "";
4063 let tps = Executor::parse_tps_from_output(output);
4064 assert!(tps.is_none());
4065 }
4066
4067 #[test]
4068 fn test_parse_tps_from_output_invalid_number() {
4069 let output = "tok/s: abc";
4070 let tps = Executor::parse_tps_from_output(output);
4071 assert!(tps.is_none());
4072 }
4073
4074 #[test]
4075 fn test_extract_generated_text_simple() {
4076 let output = "Hello world\nThis is text";
4077 let result = Executor::extract_generated_text(output);
4078 assert_eq!(result, "Hello world\nThis is text");
4079 }
4080
4081 #[test]
4082 fn test_extract_generated_text_filters_separator() {
4083 let output = "Generated text\n=== BENCHMARK ===\nMore stuff";
4084 let result = Executor::extract_generated_text(output);
4085 assert!(!result.contains("==="));
4086 assert!(result.contains("Generated text"));
4087 }
4088
4089 #[test]
4090 fn test_extract_generated_text_filters_tps() {
4091 let output = "Hello world\ntok/s: 12.34\nAfter tps";
4092 let result = Executor::extract_generated_text(output);
4093 assert!(!result.contains("tok/s"));
4094 assert!(result.contains("Hello world"));
4095 assert!(result.contains("After tps"));
4096 }
4097
4098 #[test]
4099 fn test_extract_generated_text_empty() {
4100 let output = "";
4101 let result = Executor::extract_generated_text(output);
4102 assert!(result.is_empty());
4103 }
4104
4105 #[test]
4106 fn test_extract_generated_text_only_filtered() {
4107 let output = "=== START ===\ntok/s: 10\n=== END ===";
4108 let result = Executor::extract_generated_text(output);
4109 assert!(result.is_empty());
4110 }
4111
4112 #[test]
4113 fn test_extract_output_text_simple() {
4114 let output = "Some header\nOutput:\nThe answer is 4\nCompleted in 1.2s";
4115 let result = Executor::extract_output_text(output);
4116 assert_eq!(result, "The answer is 4");
4117 }
4118
4119 #[test]
4120 fn test_extract_output_text_multiline() {
4121 let output = "Header\nOutput:\nLine 1\nLine 2\nLine 3\nCompleted in 1s";
4122 let result = Executor::extract_output_text(output);
4123 assert_eq!(result, "Line 1 Line 2 Line 3");
4124 }
4125
4126 #[test]
4127 fn test_extract_output_text_no_output_marker() {
4128 let output = "Just some text without Output marker";
4129 let result = Executor::extract_output_text(output);
4130 assert!(result.is_empty());
4131 }
4132
4133 #[test]
4134 fn test_extract_output_text_empty() {
4135 let output = "";
4136 let result = Executor::extract_output_text(output);
4137 assert!(result.is_empty());
4138 }
4139
4140 #[test]
4141 fn test_extract_output_text_empty_output() {
4142 let output = "Header\nOutput:\nCompleted in 1s";
4143 let result = Executor::extract_output_text(output);
4144 assert!(result.is_empty());
4145 }
4146
4147 #[test]
4148 fn test_extract_output_text_stops_at_empty_line() {
4149 let output = "Header\nOutput:\nThe answer\n\nMore text after blank";
4150 let result = Executor::extract_output_text(output);
4151 assert_eq!(result, "The answer");
4152 }
4153
4154 #[test]
4155 fn test_golden_scenario_creation() {
4156 let model_id = ModelId::new("test", "model");
4157 let scenario = Executor::golden_scenario(&model_id);
4158 assert_eq!(scenario.model.org, "test");
4159 assert_eq!(scenario.model.name, "model");
4160 assert_eq!(scenario.modality, Modality::Run);
4161 assert_eq!(scenario.backend, Backend::Cpu);
4162 assert_eq!(scenario.format, Format::Apr);
4163 assert!(scenario.prompt.contains("Golden Rule"));
4164 }
4165
4166 #[test]
4167 fn test_execution_config_golden_rule_default() {
4168 let config = ExecutionConfig::default();
4169 assert!(config.run_golden_rule_test);
4170 assert!(config.golden_reference_path.is_none());
4171 }
4172
4173 #[test]
4174 fn test_execution_config_golden_rule_custom() {
4175 let config = ExecutionConfig {
4176 run_golden_rule_test: false,
4177 golden_reference_path: Some("/path/to/reference.json".to_string()),
4178 ..Default::default()
4179 };
4180 assert!(!config.run_golden_rule_test);
4181 assert_eq!(
4182 config.golden_reference_path.as_deref(),
4183 Some("/path/to/reference.json")
4184 );
4185 }
4186
4187 #[test]
4188 fn test_tool_executor_fields() {
4189 let executor = ToolExecutor::new("/path/model.gguf".to_string(), true, 30_000);
4190 assert_eq!(executor.model_path, "/path/model.gguf");
4191 assert!(executor.no_gpu);
4192 assert_eq!(executor.timeout_ms, 30_000);
4193 }
4194
4195 #[test]
4196 fn test_tool_executor_no_gpu_false() {
4197 let executor = ToolExecutor::new("model.gguf".to_string(), false, 60_000);
4198 assert!(!executor.no_gpu);
4199 }
4200
4201 #[test]
4202 fn test_tool_test_result_gate_id() {
4203 let result = ToolTestResult {
4204 tool: "custom-tool".to_string(),
4205 passed: true,
4206 exit_code: 0,
4207 stdout: String::new(),
4208 stderr: String::new(),
4209 duration_ms: 100,
4210 gate_id: "F-CUSTOM-001".to_string(),
4211 };
4212 assert_eq!(result.gate_id, "F-CUSTOM-001");
4213 }
4214
4215 #[test]
4216 fn test_execution_result_fields() {
4217 let result = ExecutionResult {
4218 playbook_name: "my-playbook".to_string(),
4219 total_scenarios: 50,
4220 passed: 45,
4221 failed: 3,
4222 skipped: 2,
4223 duration_ms: 5000,
4224 gateway_failed: None,
4225 evidence: EvidenceCollector::new(),
4226 };
4227 assert_eq!(result.playbook_name, "my-playbook");
4228 assert_eq!(result.total_scenarios, 50);
4229 assert_eq!(result.passed, 45);
4230 assert_eq!(result.failed, 3);
4231 assert_eq!(result.skipped, 2);
4232 assert_eq!(result.duration_ms, 5000);
4233 }
4234
4235 #[test]
4236 fn test_failure_policy_copy() {
4237 let policy = FailurePolicy::CollectAll;
4238 let copied: FailurePolicy = policy;
4239 assert_eq!(copied, FailurePolicy::CollectAll);
4240 }
4241
4242 #[test]
4243 fn test_extract_output_text_with_trailing_content() {
4244 let output =
4245 "Prefix\nOutput:\nAnswer is 4\nMore answer text\nCompleted in 2.5s\nExtra stuff";
4246 let result = Executor::extract_output_text(output);
4247 assert_eq!(result, "Answer is 4 More answer text");
4248 }
4249
4250 #[test]
4251 fn test_extract_generated_text_mixed_content() {
4252 let output = "Line 1\n=== SEPARATOR ===\nLine 2\ntok/s: 50.0\nLine 3";
4253 let result = Executor::extract_generated_text(output);
4254 assert!(result.contains("Line 1"));
4255 assert!(result.contains("Line 2"));
4256 assert!(result.contains("Line 3"));
4257 assert!(!result.contains("==="));
4258 assert!(!result.contains("tok/s"));
4259 }
4260
4261 #[test]
4262 fn test_parse_tps_from_output_at_end() {
4263 let output = "All output finished tok/s: 99.9";
4264 let tps = Executor::parse_tps_from_output(output);
4265 assert!(tps.is_some());
4266 assert!((tps.unwrap() - 99.9).abs() < 0.01);
4267 }
4268
4269 #[test]
4270 fn test_parse_tps_from_output_multiline() {
4271 let output = "Line 1\nLine 2\ntok/s: 25.5\nLine 4";
4272 let tps = Executor::parse_tps_from_output(output);
4273 assert!(tps.is_some());
4274 assert!((tps.unwrap() - 25.5).abs() < f64::EPSILON);
4275 }
4276
4277 #[test]
4278 fn test_extract_output_text_output_at_end() {
4279 let output = "Header info\nOutput:\nFinal answer here";
4280 let result = Executor::extract_output_text(output);
4281 assert_eq!(result, "Final answer here");
4282 }
4283
4284 #[test]
4285 fn test_execution_result_with_gateway_failure() {
4286 let result = ExecutionResult {
4287 playbook_name: "test".to_string(),
4288 total_scenarios: 10,
4289 passed: 0,
4290 failed: 10,
4291 skipped: 0,
4292 duration_ms: 100,
4293 gateway_failed: Some("G1: Model failed to load".to_string()),
4294 evidence: EvidenceCollector::new(),
4295 };
4296 assert!(!result.is_success());
4297 assert!(result.gateway_failed.is_some());
4298 assert!(result.gateway_failed.as_ref().unwrap().contains("G1"));
4299 }
4300
4301 #[test]
4302 fn test_execution_config_all_fields() {
4303 let config = ExecutionConfig {
4304 failure_policy: FailurePolicy::CollectAll,
4305 default_timeout_ms: 30_000,
4306 max_workers: 2,
4307 dry_run: true,
4308 model_path: Some("/path/to/model.gguf".to_string()),
4309 no_gpu: true,
4310 run_conversion_tests: false,
4311 run_differential_tests: false,
4312 run_profile_ci: true,
4313 run_trace_payload: false,
4314 run_golden_rule_test: false,
4315 golden_reference_path: Some("/path/to/ref.json".to_string()),
4316 lock_file_path: None,
4317 check_integrity: false,
4318 warn_implicit_skips: false,
4319 run_hf_parity: false,
4320 hf_parity_corpus_path: None,
4321 hf_parity_model_family: None,
4322 output_dir: Some("test_output".to_string()),
4323 run_contract_tests: false,
4324 run_ollama_parity: false,
4325 };
4326 assert_eq!(config.failure_policy, FailurePolicy::CollectAll);
4327 assert!(config.dry_run);
4328 assert!(config.no_gpu);
4329 assert!(!config.run_conversion_tests);
4330 assert!(!config.run_differential_tests);
4331 assert!(config.run_profile_ci);
4332 assert!(!config.run_contract_tests);
4333 }
4334
4335 #[test]
4336 fn test_tool_test_result_fields_comprehensive() {
4337 let result = ToolTestResult {
4338 tool: "custom-test".to_string(),
4339 passed: false,
4340 exit_code: 127,
4341 stdout: "stdout content".to_string(),
4342 stderr: "error: command not found".to_string(),
4343 duration_ms: 150,
4344 gate_id: "F-CUSTOM-001".to_string(),
4345 };
4346 assert_eq!(result.tool, "custom-test");
4347 assert!(!result.passed);
4348 assert_eq!(result.exit_code, 127);
4349 assert!(!result.stdout.is_empty());
4350 assert!(!result.stderr.is_empty());
4351 }
4352
4353 #[test]
4354 fn test_golden_scenario_prompt_content() {
4355 let model_id = ModelId::new("org", "name");
4356 let scenario = Executor::golden_scenario(&model_id);
4357 assert!(scenario.prompt.contains("Golden Rule"));
4358 assert!(scenario.prompt.contains("convert"));
4359 assert!(scenario.prompt.contains("inference"));
4360 }
4361
4362 #[test]
4363 fn test_executor_with_custom_timeout_and_workers() {
4364 let config = ExecutionConfig {
4365 default_timeout_ms: 120_000,
4366 max_workers: 16,
4367 ..Default::default()
4368 };
4369 let executor = Executor::with_config(config);
4370 assert_eq!(executor.config().default_timeout_ms, 120_000);
4371 assert_eq!(executor.config().max_workers, 16);
4372 }
4373
4374 #[test]
4375 fn test_execution_result_pass_rate_partial() {
4376 let result = ExecutionResult {
4377 playbook_name: "test".to_string(),
4378 total_scenarios: 3,
4379 passed: 1,
4380 failed: 2,
4381 skipped: 0,
4382 duration_ms: 100,
4383 gateway_failed: None,
4384 evidence: EvidenceCollector::new(),
4385 };
4386 let rate = result.pass_rate();
4387 assert!((rate - 100.0 / 3.0).abs() < 0.01);
4388 }
4389
4390 #[test]
4391 fn test_tool_test_result_to_evidence_with_content() {
4392 let result = ToolTestResult {
4393 tool: "validate".to_string(),
4394 passed: true,
4395 exit_code: 0,
4396 stdout: "Model validated successfully".to_string(),
4397 stderr: String::new(),
4398 duration_ms: 200,
4399 gate_id: "F-VALIDATE-001".to_string(),
4400 };
4401 let model_id = ModelId::new("org", "model");
4402 let evidence = result.to_evidence(&model_id);
4403 assert!(evidence.outcome.is_pass());
4404 assert!(evidence.output.contains("validated"));
4405 }
4406
4407 #[test]
4408 fn test_tool_test_result_with_zero_duration() {
4409 let result = ToolTestResult {
4410 tool: "fast-test".to_string(),
4411 passed: true,
4412 exit_code: 0,
4413 stdout: "OK".to_string(),
4414 stderr: String::new(),
4415 duration_ms: 0,
4416 gate_id: "F-FAST-001".to_string(),
4417 };
4418 assert_eq!(result.duration_ms, 0);
4419 }
4420
4421 #[test]
4422 fn test_extract_output_text_preserves_content() {
4423 let output = "Info\nOutput:\n First line\n Second line \n Third line\nCompleted in 1s";
4424 let result = Executor::extract_output_text(output);
4425 assert!(result.contains("First line"));
4426 assert!(result.contains("Second line"));
4427 assert!(result.contains("Third line"));
4428 }
4429
4430 use crate::command::MockCommandRunner;
4435
4436 #[test]
4437 fn test_executor_with_mock_runner_subprocess_execution() {
4438 let (_tmp, model_path) = create_test_model_file(Format::Gguf);
4439 let mock_runner = MockCommandRunner::new()
4440 .with_tps(42.0)
4441 .with_inference_response("The answer is 4.");
4442
4443 let config = ExecutionConfig {
4444 model_path: Some(model_path),
4445 ..Default::default()
4446 };
4447
4448 let executor = Executor::with_runner(config, Arc::new(mock_runner));
4449
4450 let scenario = QaScenario::new(
4451 ModelId::new("test", "model"),
4452 Modality::Run,
4453 Backend::Cpu,
4454 Format::Gguf,
4455 "What is 2+2?".to_string(),
4456 0,
4457 );
4458
4459 let (output, stderr, exit_code, tps, skipped) = executor.subprocess_execution(&scenario);
4460
4461 assert!(!skipped);
4462 assert!(output.contains("4") || output.is_empty()); assert!(stderr.is_none_or(|s| s.is_empty()));
4464 assert_eq!(exit_code, 0);
4465 let _ = tps;
4467 }
4468
4469 #[test]
4470 fn test_executor_with_mock_runner_inference_failure() {
4471 let (_tmp, model_path) = create_test_model_file(Format::Gguf);
4472 let mock_runner = MockCommandRunner::new().with_inference_failure();
4473
4474 let config = ExecutionConfig {
4475 model_path: Some(model_path),
4476 ..Default::default()
4477 };
4478
4479 let executor = Executor::with_runner(config, Arc::new(mock_runner));
4480
4481 let scenario = QaScenario::new(
4482 ModelId::new("test", "model"),
4483 Modality::Run,
4484 Backend::Cpu,
4485 Format::Gguf,
4486 "What is 2+2?".to_string(),
4487 0,
4488 );
4489
4490 let (_, stderr, exit_code, _, _) = executor.subprocess_execution(&scenario);
4491
4492 assert_eq!(exit_code, 1);
4493 assert!(stderr.is_some());
4494 }
4495
4496 #[test]
4497 fn test_executor_with_mock_runner_execute_scenario() {
4498 let mock_runner = MockCommandRunner::new()
4499 .with_tps(30.0)
4500 .with_inference_response("The answer is 4.");
4501
4502 let config = ExecutionConfig {
4503 model_path: Some("/test/model.gguf".to_string()),
4504 ..Default::default()
4505 };
4506
4507 let executor = Executor::with_runner(config, Arc::new(mock_runner));
4508
4509 let scenario = QaScenario::new(
4510 ModelId::new("test", "model"),
4511 Modality::Run,
4512 Backend::Cpu,
4513 Format::Gguf,
4514 "What is 2+2?".to_string(),
4515 0,
4516 );
4517
4518 let evidence = executor.execute_scenario(&scenario);
4519
4520 assert!(!evidence.id.is_empty());
4522 assert!(!evidence.gate_id.is_empty());
4523 }
4524
4525 #[test]
4526 fn test_executor_with_mock_runner_golden_rule_test() {
4527 let mock_runner = MockCommandRunner::new()
4528 .with_tps(25.0)
4529 .with_inference_response("Output:\nThe answer is 4\nCompleted in 1s");
4530
4531 let config = ExecutionConfig {
4532 model_path: Some("/test/model.gguf".to_string()),
4533 run_golden_rule_test: true,
4534 run_conversion_tests: false, ..Default::default()
4536 };
4537
4538 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
4539
4540 let model_id = ModelId::new("test", "model");
4541 let (passed, failed) =
4542 executor.run_golden_rule_test(std::path::Path::new("/test/model.gguf"), &model_id);
4543
4544 assert_eq!(passed + failed, 1);
4547 }
4548
4549 #[test]
4550 fn test_executor_with_mock_runner_golden_rule_conversion_failure() {
4551 let mock_runner = MockCommandRunner::new()
4552 .with_convert_failure()
4553 .with_inference_response("Output:\nThe answer is 4\nCompleted in 1s");
4554
4555 let config = ExecutionConfig {
4556 model_path: Some("/test/model.gguf".to_string()),
4557 ..Default::default()
4558 };
4559
4560 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
4561
4562 let model_id = ModelId::new("test", "model");
4563 let (passed, failed) =
4564 executor.run_golden_rule_test(std::path::Path::new("/test/model.gguf"), &model_id);
4565
4566 assert_eq!(passed, 0);
4568 assert_eq!(failed, 1);
4569
4570 assert!(!executor.collector.all().is_empty());
4572 }
4573
4574 #[test]
4575 fn test_executor_with_mock_runner_golden_rule_inference_failure() {
4576 let mock_runner = MockCommandRunner::new().with_inference_failure();
4577
4578 let config = ExecutionConfig {
4579 model_path: Some("/test/model.gguf".to_string()),
4580 ..Default::default()
4581 };
4582
4583 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
4584
4585 let model_id = ModelId::new("test", "model");
4586 let (passed, failed) =
4587 executor.run_golden_rule_test(std::path::Path::new("/test/model.gguf"), &model_id);
4588
4589 assert_eq!(passed, 0);
4591 assert_eq!(failed, 1);
4592 }
4593
4594 #[test]
4595 fn test_tool_executor_with_mock_runner_inspect() {
4596 let mock_runner = MockCommandRunner::new();
4597 let executor = ToolExecutor::with_runner(
4598 "/test/model.gguf".to_string(),
4599 true,
4600 60_000,
4601 Arc::new(mock_runner),
4602 );
4603
4604 let result = executor.execute_inspect();
4605
4606 assert!(result.passed);
4607 assert_eq!(result.exit_code, 0);
4608 assert!(result.stdout.contains("GGUF"));
4609 }
4610
4611 #[test]
4612 fn test_tool_executor_with_mock_runner_validate() {
4613 let mock_runner = MockCommandRunner::new();
4614 let executor = ToolExecutor::with_runner(
4615 "/test/model.gguf".to_string(),
4616 false,
4617 60_000,
4618 Arc::new(mock_runner),
4619 );
4620
4621 let result = executor.execute_validate();
4622
4623 assert!(result.passed);
4624 assert_eq!(result.exit_code, 0);
4625 }
4626
4627 #[test]
4628 fn test_tool_executor_with_mock_runner_bench() {
4629 let mock_runner = MockCommandRunner::new().with_tps(50.0);
4630 let executor = ToolExecutor::with_runner(
4631 "/test/model.gguf".to_string(),
4632 true,
4633 60_000,
4634 Arc::new(mock_runner),
4635 );
4636
4637 let result = executor.execute_bench();
4638
4639 assert!(result.passed);
4640 assert_eq!(result.exit_code, 0);
4641 assert!(result.stdout.contains("50.0"));
4642 }
4643
4644 #[test]
4645 fn test_tool_executor_with_mock_runner_check() {
4646 let mock_runner = MockCommandRunner::new();
4647 let executor = ToolExecutor::with_runner(
4648 "/test/model.gguf".to_string(),
4649 false,
4650 60_000,
4651 Arc::new(mock_runner),
4652 );
4653
4654 let result = executor.execute_check();
4655
4656 assert!(result.passed);
4657 assert_eq!(result.exit_code, 0);
4658 }
4659
4660 #[test]
4661 fn test_tool_executor_with_mock_runner_trace() {
4662 let mock_runner = MockCommandRunner::new().with_tps(25.0);
4663 let executor = ToolExecutor::with_runner(
4664 "/test/model.gguf".to_string(),
4665 true,
4666 60_000,
4667 Arc::new(mock_runner),
4668 );
4669
4670 let result = executor.execute_trace("layer");
4671
4672 assert!(result.passed);
4673 assert_eq!(result.exit_code, 0);
4674 assert!(result.tool.contains("trace"));
4675 }
4676
4677 #[test]
4678 fn test_tool_executor_with_mock_runner_profile() {
4679 let mock_runner = MockCommandRunner::new().with_tps(35.0);
4680 let executor = ToolExecutor::with_runner(
4681 "/test/model.gguf".to_string(),
4682 false,
4683 60_000,
4684 Arc::new(mock_runner),
4685 );
4686
4687 let result = executor.execute_profile();
4688
4689 assert!(result.passed);
4690 assert_eq!(result.exit_code, 0);
4691 assert!(result.stdout.contains("throughput"));
4692 }
4693
4694 #[test]
4695 fn test_tool_executor_with_mock_runner_profile_ci() {
4696 let mock_runner = MockCommandRunner::new().with_tps(20.0);
4697 let executor = ToolExecutor::with_runner(
4698 "/test/model.gguf".to_string(),
4699 false,
4700 60_000,
4701 Arc::new(mock_runner),
4702 );
4703
4704 let result = executor.execute_profile_ci();
4705
4706 assert!(result.passed);
4708 assert!(result.stdout.contains("passed"));
4709 }
4710
4711 #[test]
4712 fn test_tool_executor_with_mock_runner_profile_ci_assertion_failure() {
4713 let mock_runner = MockCommandRunner::new().with_tps(5.0);
4715 let executor = ToolExecutor::with_runner(
4716 "/test/model.gguf".to_string(),
4717 false,
4718 60_000,
4719 Arc::new(mock_runner),
4720 );
4721
4722 let result = executor.execute_profile_ci_assertion_failure();
4723
4724 assert!(result.passed); assert!(result.stdout.contains("\"passed\":false"));
4728 }
4729
4730 #[test]
4731 fn test_tool_executor_with_mock_runner_profile_ci_p99() {
4732 let mock_runner = MockCommandRunner::new().with_tps(30.0);
4733 let executor = ToolExecutor::with_runner(
4734 "/test/model.gguf".to_string(),
4735 false,
4736 60_000,
4737 Arc::new(mock_runner),
4738 );
4739
4740 let result = executor.execute_profile_ci_p99();
4741
4742 assert!(result.passed);
4744 assert!(result.stdout.contains("latency"));
4745 }
4746
4747 #[test]
4748 fn test_tool_executor_with_runner_debug() {
4749 let mock_runner = MockCommandRunner::new();
4750 let executor = ToolExecutor::with_runner(
4751 "/test/model.gguf".to_string(),
4752 true,
4753 60_000,
4754 Arc::new(mock_runner),
4755 );
4756
4757 let debug_str = format!("{executor:?}");
4758 assert!(debug_str.contains("ToolExecutor"));
4759 assert!(debug_str.contains("model_path"));
4760 }
4761
4762 #[test]
4763 fn test_executor_with_runner_debug() {
4764 let mock_runner = MockCommandRunner::new();
4765 let config = ExecutionConfig::default();
4766 let executor = Executor::with_runner(config, Arc::new(mock_runner));
4767
4768 let debug_str = format!("{executor:?}");
4769 assert!(debug_str.contains("Executor"));
4770 assert!(debug_str.contains("config"));
4771 }
4772
4773 #[test]
4774 fn test_executor_subprocess_execution_no_gpu() {
4775 let mock_runner = MockCommandRunner::new();
4776 let config = ExecutionConfig {
4777 model_path: Some("/test/model.gguf".to_string()),
4778 no_gpu: true,
4779 ..Default::default()
4780 };
4781
4782 let executor = Executor::with_runner(config, Arc::new(mock_runner));
4783
4784 let scenario = QaScenario::new(
4785 ModelId::new("test", "model"),
4786 Modality::Run,
4787 Backend::Cpu,
4788 Format::Gguf,
4789 "Test prompt".to_string(),
4790 0,
4791 );
4792
4793 let (_, _, exit_code, _, _) = executor.subprocess_execution(&scenario);
4794 assert_eq!(exit_code, 0);
4795 }
4796
4797 #[test]
4798 fn test_executor_execute_playbook_with_subprocess_mode() {
4799 let mock_runner = MockCommandRunner::new()
4800 .with_tps(25.0)
4801 .with_inference_response("The answer is 4.");
4802
4803 let config = ExecutionConfig {
4804 model_path: Some("/test/model.gguf".to_string()),
4805 run_conversion_tests: false,
4806 run_differential_tests: false,
4807 run_golden_rule_test: false,
4808 run_trace_payload: false,
4809 run_profile_ci: false,
4810 run_contract_tests: false,
4811 ..Default::default()
4812 };
4813
4814 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
4815
4816 let yaml = r#"
4817name: test-subprocess
4818version: "1.0.0"
4819model:
4820 hf_repo: "test/model"
4821 formats: [gguf]
4822test_matrix:
4823 modalities: [run]
4824 backends: [cpu]
4825 scenario_count: 3
4826"#;
4827 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
4828 let result = executor.execute(&playbook).expect("Execution failed");
4829
4830 assert_eq!(result.total_scenarios, 3);
4832 assert!(result.passed > 0 || result.failed > 0);
4834 }
4835
4836 #[test]
4837 fn test_build_result_from_output() {
4838 let mock_runner = MockCommandRunner::new();
4839 let executor = ToolExecutor::with_runner(
4840 "/test/model.gguf".to_string(),
4841 false,
4842 60_000,
4843 Arc::new(mock_runner),
4844 );
4845
4846 let output = crate::command::CommandOutput::success("test output");
4847 let start = std::time::Instant::now();
4848 let result = executor.build_result_from_output("test-tool", output, start);
4849
4850 assert!(result.passed);
4851 assert_eq!(result.exit_code, 0);
4852 assert_eq!(result.tool, "test-tool");
4853 assert_eq!(result.gate_id, "F-TEST_TOOL-001");
4854 }
4855
4856 #[test]
4857 fn test_build_result_from_output_failure() {
4858 let mock_runner = MockCommandRunner::new();
4859 let executor = ToolExecutor::with_runner(
4860 "/test/model.gguf".to_string(),
4861 false,
4862 60_000,
4863 Arc::new(mock_runner),
4864 );
4865
4866 let output = crate::command::CommandOutput::failure(1, "error message");
4867 let start = std::time::Instant::now();
4868 let result = executor.build_result_from_output("failed-tool", output, start);
4869
4870 assert!(!result.passed);
4871 assert_eq!(result.exit_code, 1);
4872 assert_eq!(result.stderr, "error message");
4873 }
4874
4875 #[test]
4876 fn test_tool_executor_execute_all() {
4877 let mock_runner = MockCommandRunner::new().with_tps(30.0);
4878 let executor = ToolExecutor::with_runner(
4879 "/test/model.gguf".to_string(),
4880 true,
4881 60_000,
4882 Arc::new(mock_runner),
4883 );
4884
4885 let results = executor.execute_all();
4886
4887 assert!(results.len() >= 12);
4891 let passed_count = results.iter().filter(|r| r.passed).count();
4893 assert!(passed_count > 0);
4894 }
4895
4896 #[test]
4897 fn test_tool_executor_execute_all_with_serve_false() {
4898 let mock_runner = MockCommandRunner::new().with_tps(30.0);
4899 let executor = ToolExecutor::with_runner(
4900 "/test/model.gguf".to_string(),
4901 false,
4902 60_000,
4903 Arc::new(mock_runner),
4904 );
4905
4906 let results = executor.execute_all_with_serve(false);
4907
4908 assert!(results.len() >= 12);
4910 }
4911
4912 #[test]
4913 fn test_executor_execute_scenario_crash() {
4914 let mock_runner = MockCommandRunner::new().with_crash();
4916
4917 let config = ExecutionConfig {
4918 model_path: Some("/test/model.gguf".to_string()),
4919 ..Default::default()
4920 };
4921
4922 let executor = Executor::with_runner(config, Arc::new(mock_runner));
4923
4924 let scenario = QaScenario::new(
4925 ModelId::new("test", "model"),
4926 Modality::Run,
4927 Backend::Cpu,
4928 Format::Gguf,
4929 "What is 2+2?".to_string(),
4930 0,
4931 );
4932
4933 let evidence = executor.execute_scenario(&scenario);
4934
4935 assert!(evidence.outcome.is_fail());
4937 assert_eq!(evidence.gate_id, "G3-STABLE");
4938 }
4939
4940 #[test]
4941 fn test_executor_run_conversion_tests_success() {
4942 let mock_runner = MockCommandRunner::new();
4943 let config = ExecutionConfig {
4944 model_path: Some("/test/model.gguf".to_string()),
4945 run_conversion_tests: true,
4946 no_gpu: true,
4947 ..Default::default()
4948 };
4949
4950 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
4951 let model_id = ModelId::new("test", "model");
4952
4953 let (passed, failed) =
4954 executor.run_conversion_tests(std::path::Path::new("/test/model.gguf"), &model_id);
4955
4956 let _ = (passed, failed); }
4959
4960 #[test]
4961 fn test_executor_execute_scenario_with_stderr() {
4962 let mock_runner =
4963 MockCommandRunner::new().with_inference_response_and_stderr("Output: 4", "Warning");
4964
4965 let config = ExecutionConfig {
4966 model_path: Some("/test/model.gguf".to_string()),
4967 ..Default::default()
4968 };
4969
4970 let executor = Executor::with_runner(config, Arc::new(mock_runner));
4971
4972 let scenario = QaScenario::new(
4973 ModelId::new("test", "model"),
4974 Modality::Run,
4975 Backend::Cpu,
4976 Format::Gguf,
4977 "What is 2+2?".to_string(),
4978 0,
4979 );
4980
4981 let evidence = executor.execute_scenario(&scenario);
4982 assert!(evidence.stderr.is_some() || evidence.stderr.is_none());
4984 }
4985
4986 #[test]
4987 fn test_executor_execute_with_conversion_and_golden() {
4988 let mock_runner = MockCommandRunner::new()
4989 .with_tps(25.0)
4990 .with_inference_response("Output:\nThe answer is 4\nCompleted in 1s");
4991
4992 let config = ExecutionConfig {
4993 model_path: Some("/test/model.gguf".to_string()),
4994 run_conversion_tests: true,
4995 run_golden_rule_test: true,
4996 no_gpu: true,
4997 ..Default::default()
4998 };
4999
5000 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
5001
5002 let yaml = r#"
5003name: test-full
5004version: "1.0.0"
5005model:
5006 hf_repo: "test/model"
5007 formats: [gguf]
5008test_matrix:
5009 modalities: [run]
5010 backends: [cpu]
5011 scenario_count: 2
5012"#;
5013 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
5014 let result = executor.execute(&playbook).expect("Execution failed");
5015
5016 assert!(result.total_scenarios >= 2);
5018 }
5019
5020 #[test]
5021 fn test_executor_golden_rule_output_differs() {
5022 let mock_runner = MockCommandRunner::new()
5025 .with_inference_response("Output:\nThe answer is 4\nCompleted in 1s");
5026
5027 let config = ExecutionConfig::default();
5028 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
5029 let model_id = ModelId::new("test", "model");
5030
5031 let (passed, failed) =
5032 executor.run_golden_rule_test(std::path::Path::new("/test/model.gguf"), &model_id);
5033
5034 assert_eq!(passed, 1);
5036 assert_eq!(failed, 0);
5037 }
5038
5039 #[test]
5040 fn test_executor_subprocess_with_tps_parsing() {
5041 let mock_runner = MockCommandRunner::new().with_tps(42.5);
5043
5044 let config = ExecutionConfig {
5045 model_path: Some("/test/model.gguf".to_string()),
5046 ..Default::default()
5047 };
5048
5049 let executor = Executor::with_runner(config, Arc::new(mock_runner));
5050
5051 let scenario = test_scenario();
5052 let (_, _, _, tps, _) = executor.subprocess_execution(&scenario);
5053
5054 assert!(tps.is_some());
5056 assert!((tps.unwrap() - 42.5).abs() < f64::EPSILON);
5057 }
5058
5059 #[test]
5060 fn test_tool_test_result_to_evidence_gate_id() {
5061 let result = ToolTestResult {
5062 tool: "special".to_string(),
5063 passed: true,
5064 exit_code: 0,
5065 stdout: "OK".to_string(),
5066 stderr: String::new(),
5067 duration_ms: 50,
5068 gate_id: "F-SPECIAL-TEST-001".to_string(),
5069 };
5070
5071 let model_id = ModelId::new("org", "name");
5072 let evidence = result.to_evidence(&model_id);
5073
5074 assert_eq!(evidence.gate_id, "F-SPECIAL-TEST-001");
5075 assert_eq!(evidence.scenario.model.org, "org");
5076 assert_eq!(evidence.scenario.model.name, "name");
5077 }
5078
5079 #[test]
5080 fn test_execution_result_evidence_collector() {
5081 let mut collector = EvidenceCollector::new();
5082 let evidence = Evidence::corroborated("F-TEST-001", test_scenario(), "Test output", 100);
5083 collector.add(evidence);
5084
5085 let result = ExecutionResult {
5086 playbook_name: "test".to_string(),
5087 total_scenarios: 1,
5088 passed: 1,
5089 failed: 0,
5090 skipped: 0,
5091 duration_ms: 100,
5092 gateway_failed: None,
5093 evidence: collector,
5094 };
5095
5096 assert_eq!(result.evidence.all().len(), 1);
5097 }
5098
5099 #[test]
5100 fn test_executor_execute_scenario_with_metrics() {
5101 let mock_runner = MockCommandRunner::new()
5102 .with_tps(75.5)
5103 .with_inference_response("The answer is 4.");
5104
5105 let config = ExecutionConfig {
5106 model_path: Some("/test/model.gguf".to_string()),
5107 ..Default::default()
5108 };
5109
5110 let executor = Executor::with_runner(config, Arc::new(mock_runner));
5111 let scenario = test_scenario();
5112
5113 let evidence = executor.execute_scenario(&scenario);
5114
5115 let _ = evidence.metrics.duration_ms; }
5118
5119 #[test]
5120 fn test_extract_output_text_with_whitespace_lines() {
5121 let output = "Header\nOutput:\n \nActual content\n \nCompleted in 1s";
5124 let result = Executor::extract_output_text(output);
5125 assert!(result.contains("Actual content"));
5127 }
5128
5129 #[test]
5130 fn test_extract_output_text_only_header() {
5131 let output = "Only Header no Output marker";
5132 let result = Executor::extract_output_text(output);
5133 assert!(result.is_empty());
5134 }
5135
5136 #[test]
5137 fn test_parse_tps_from_output_multiple_colons() {
5138 let output = "Info: tok/s: 88.8 more info";
5139 let tps = Executor::parse_tps_from_output(output);
5140 assert!(tps.is_some());
5141 assert!((tps.unwrap() - 88.8).abs() < f64::EPSILON);
5142 }
5143
5144 #[test]
5145 fn test_tool_executor_trace_all_levels() {
5146 let mock_runner = MockCommandRunner::new();
5147 let executor = ToolExecutor::with_runner(
5148 "/test/model.gguf".to_string(),
5149 false,
5150 60_000,
5151 Arc::new(mock_runner),
5152 );
5153
5154 for level in &["none", "basic", "layer", "payload"] {
5155 let result = executor.execute_trace(level);
5156 assert!(result.passed);
5157 assert!(result.tool.contains("trace"));
5158 assert!(result.tool.contains(level));
5159 }
5160 }
5161
5162 #[test]
5163 fn test_execution_config_partial_override() {
5164 let config = ExecutionConfig {
5165 dry_run: true,
5166 max_workers: 1,
5167 ..Default::default()
5168 };
5169
5170 assert!(config.dry_run);
5171 assert_eq!(config.max_workers, 1);
5172 assert!(config.run_conversion_tests);
5174 assert!(config.run_golden_rule_test);
5175 }
5176
5177 #[test]
5178 fn test_executor_evidence_after_execute() {
5179 let mock_runner = MockCommandRunner::new().with_inference_response("The answer is 4.");
5180
5181 let config = ExecutionConfig {
5182 model_path: Some("/test/model.gguf".to_string()),
5183 run_conversion_tests: false,
5184 run_golden_rule_test: false,
5185 ..Default::default()
5186 };
5187
5188 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
5189
5190 let yaml = r#"
5191name: evidence-test
5192version: "1.0.0"
5193model:
5194 hf_repo: "test/model"
5195 formats: [gguf]
5196test_matrix:
5197 modalities: [run]
5198 backends: [cpu]
5199 scenario_count: 3
5200"#;
5201 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
5202 let _ = executor.execute(&playbook).expect("Execution failed");
5203
5204 assert!(!executor.evidence().all().is_empty());
5206 }
5207
5208 #[test]
5209 fn test_tool_executor_gate_id_format() {
5210 let mock_runner = MockCommandRunner::new();
5211 let executor = ToolExecutor::with_runner(
5212 "/test/model.gguf".to_string(),
5213 false,
5214 60_000,
5215 Arc::new(mock_runner),
5216 );
5217
5218 let result = executor.execute_inspect();
5219 assert_eq!(result.gate_id, "F-INSPECT-001");
5220
5221 let result = executor.execute_validate();
5222 assert_eq!(result.gate_id, "F-VALIDATE-001");
5223
5224 let result = executor.execute_bench();
5225 assert_eq!(result.gate_id, "F-BENCH-001");
5226
5227 let result = executor.execute_check();
5228 assert_eq!(result.gate_id, "F-CHECK-001");
5229
5230 let result = executor.execute_profile();
5231 assert_eq!(result.gate_id, "F-PROFILE-001");
5232 }
5233
5234 #[test]
5235 fn test_tool_executor_profile_ci_feature_unavailable() {
5236 let mock_runner = MockCommandRunner::new().with_profile_ci_unavailable();
5237 let executor = ToolExecutor::with_runner(
5238 "/test/model.gguf".to_string(),
5239 false,
5240 60_000,
5241 Arc::new(mock_runner),
5242 );
5243
5244 let result = executor.execute_profile_ci();
5245
5246 assert!(!result.passed);
5248 assert_eq!(result.exit_code, -2);
5249 assert!(result.stderr.contains("Feature not available"));
5250 assert_eq!(result.gate_id, "F-PROFILE-006");
5251 }
5252
5253 #[test]
5254 fn test_tool_executor_profile_ci_assertion_unavailable() {
5255 let mock_runner = MockCommandRunner::new().with_profile_ci_unavailable();
5256 let executor = ToolExecutor::with_runner(
5257 "/test/model.gguf".to_string(),
5258 false,
5259 60_000,
5260 Arc::new(mock_runner),
5261 );
5262
5263 let result = executor.execute_profile_ci_assertion_failure();
5264
5265 assert!(!result.passed);
5267 assert_eq!(result.exit_code, -2);
5268 assert_eq!(result.gate_id, "F-PROFILE-007");
5269 }
5270
5271 #[test]
5272 fn test_tool_executor_profile_ci_p99_unavailable() {
5273 let mock_runner = MockCommandRunner::new().with_profile_ci_unavailable();
5274 let executor = ToolExecutor::with_runner(
5275 "/test/model.gguf".to_string(),
5276 false,
5277 60_000,
5278 Arc::new(mock_runner),
5279 );
5280
5281 let result = executor.execute_profile_ci_p99();
5282
5283 assert!(!result.passed);
5285 assert_eq!(result.exit_code, -2);
5286 assert_eq!(result.gate_id, "F-PROFILE-008");
5287 }
5288
5289 #[test]
5290 fn test_tool_executor_inspect_failure() {
5291 let mock_runner = MockCommandRunner::new().with_inspect_failure();
5292 let executor = ToolExecutor::with_runner(
5293 "/test/model.gguf".to_string(),
5294 false,
5295 60_000,
5296 Arc::new(mock_runner),
5297 );
5298
5299 let result = executor.execute_inspect();
5300
5301 assert!(!result.passed);
5302 assert_eq!(result.exit_code, 1);
5303 }
5304
5305 #[test]
5306 fn test_tool_executor_validate_failure() {
5307 let mock_runner = MockCommandRunner::new().with_validate_failure();
5308 let executor = ToolExecutor::with_runner(
5309 "/test/model.gguf".to_string(),
5310 false,
5311 60_000,
5312 Arc::new(mock_runner),
5313 );
5314
5315 let result = executor.execute_validate();
5316
5317 assert!(!result.passed);
5318 assert_eq!(result.exit_code, 1);
5319 }
5320
5321 #[test]
5322 fn test_tool_executor_bench_failure() {
5323 let mock_runner = MockCommandRunner::new().with_bench_failure();
5324 let executor = ToolExecutor::with_runner(
5325 "/test/model.gguf".to_string(),
5326 false,
5327 60_000,
5328 Arc::new(mock_runner),
5329 );
5330
5331 let result = executor.execute_bench();
5332
5333 assert!(!result.passed);
5334 assert_eq!(result.exit_code, 1);
5335 }
5336
5337 #[test]
5338 fn test_tool_executor_check_failure() {
5339 let mock_runner = MockCommandRunner::new().with_check_failure();
5340 let executor = ToolExecutor::with_runner(
5341 "/test/model.gguf".to_string(),
5342 false,
5343 60_000,
5344 Arc::new(mock_runner),
5345 );
5346
5347 let result = executor.execute_check();
5348
5349 assert!(!result.passed);
5350 assert_eq!(result.exit_code, 1);
5351 }
5352
5353 #[test]
5354 fn test_tool_executor_profile_failure() {
5355 let mock_runner = MockCommandRunner::new().with_profile_failure();
5356 let executor = ToolExecutor::with_runner(
5357 "/test/model.gguf".to_string(),
5358 false,
5359 60_000,
5360 Arc::new(mock_runner),
5361 );
5362
5363 let result = executor.execute_profile();
5364
5365 assert!(!result.passed);
5366 assert_eq!(result.exit_code, 1);
5367 }
5368
5369 #[test]
5370 fn test_tool_executor_trace_failure() {
5371 let mock_runner = MockCommandRunner::new().with_inference_failure();
5372 let executor = ToolExecutor::with_runner(
5373 "/test/model.gguf".to_string(),
5374 false,
5375 60_000,
5376 Arc::new(mock_runner),
5377 );
5378
5379 let result = executor.execute_trace("layer");
5380
5381 assert!(!result.passed);
5382 assert_eq!(result.exit_code, 1);
5383 }
5384
5385 #[test]
5386 fn test_tool_executor_profile_ci_passes_with_metrics() {
5387 let mock_runner = MockCommandRunner::new().with_tps(100.0);
5389 let executor = ToolExecutor::with_runner(
5390 "/test/model.gguf".to_string(),
5391 false,
5392 60_000,
5393 Arc::new(mock_runner),
5394 );
5395
5396 let result = executor.execute_profile_ci();
5397
5398 assert!(result.passed);
5399 assert!(result.stdout.contains("throughput"));
5400 }
5401
5402 #[test]
5403 fn test_tool_executor_with_no_gpu_true() {
5404 let mock_runner = MockCommandRunner::new();
5405 let executor = ToolExecutor::with_runner(
5406 "/test/model.gguf".to_string(),
5407 true, 30_000,
5409 Arc::new(mock_runner),
5410 );
5411
5412 let debug_str = format!("{executor:?}");
5414 assert!(debug_str.contains("no_gpu: true"));
5415 }
5416
5417 #[test]
5418 fn test_tool_executor_execute_trace_levels() {
5419 let mock_runner = MockCommandRunner::new();
5420 let executor = ToolExecutor::with_runner(
5421 "/test/model.gguf".to_string(),
5422 false,
5423 60_000,
5424 Arc::new(mock_runner),
5425 );
5426
5427 let result_layer = executor.execute_trace("layer");
5428 assert!(result_layer.tool.contains("trace-layer"));
5429
5430 let result_op = executor.execute_trace("op");
5431 assert!(result_op.tool.contains("trace-op"));
5432
5433 let result_tensor = executor.execute_trace("tensor");
5434 assert!(result_tensor.tool.contains("trace-tensor"));
5435 }
5436
5437 #[test]
5438 fn test_resolve_model_path_gguf() {
5439 let temp_dir = tempfile::tempdir().unwrap();
5440 let gguf_dir = temp_dir.path().join("gguf");
5441 std::fs::create_dir_all(&gguf_dir).unwrap();
5442 std::fs::write(gguf_dir.join("model.gguf"), b"fake").unwrap();
5443
5444 let config = ExecutionConfig {
5445 model_path: Some(temp_dir.path().to_string_lossy().to_string()),
5446 ..Default::default()
5447 };
5448 let executor = Executor::with_config(config);
5449
5450 let scenario = QaScenario::new(
5451 ModelId::new("test", "model"),
5452 Modality::Run,
5453 Backend::Cpu,
5454 Format::Gguf,
5455 "test".to_string(),
5456 0,
5457 );
5458
5459 let path = executor.resolve_model_path(&scenario);
5460 assert!(path.unwrap().contains("gguf"));
5461 }
5462
5463 #[test]
5464 fn test_resolve_model_path_apr() {
5465 let temp_dir = tempfile::tempdir().unwrap();
5466 let apr_dir = temp_dir.path().join("apr");
5467 std::fs::create_dir_all(&apr_dir).unwrap();
5468 std::fs::write(apr_dir.join("model.apr"), b"fake").unwrap();
5469
5470 let config = ExecutionConfig {
5471 model_path: Some(temp_dir.path().to_string_lossy().to_string()),
5472 ..Default::default()
5473 };
5474 let executor = Executor::with_config(config);
5475
5476 let scenario = QaScenario::new(
5477 ModelId::new("test", "model"),
5478 Modality::Run,
5479 Backend::Cpu,
5480 Format::Apr,
5481 "test".to_string(),
5482 0,
5483 );
5484
5485 let path = executor.resolve_model_path(&scenario);
5486 assert!(path.unwrap().contains("apr"));
5487 }
5488
5489 #[test]
5490 fn test_resolve_model_path_safetensors() {
5491 let temp_dir = tempfile::tempdir().unwrap();
5492 let st_dir = temp_dir.path().join("safetensors");
5493 std::fs::create_dir_all(&st_dir).unwrap();
5494 std::fs::write(st_dir.join("model.safetensors"), b"fake").unwrap();
5495
5496 let config = ExecutionConfig {
5497 model_path: Some(temp_dir.path().to_string_lossy().to_string()),
5498 ..Default::default()
5499 };
5500 let executor = Executor::with_config(config);
5501
5502 let scenario = QaScenario::new(
5503 ModelId::new("test", "model"),
5504 Modality::Run,
5505 Backend::Cpu,
5506 Format::SafeTensors,
5507 "test".to_string(),
5508 0,
5509 );
5510
5511 let path = executor.resolve_model_path(&scenario);
5512 assert!(path.unwrap().contains("safetensors"));
5513 }
5514
5515 #[test]
5516 fn test_resolve_model_path_no_cache() {
5517 let config = ExecutionConfig {
5519 model_path: None,
5520 ..Default::default()
5521 };
5522 let executor = Executor::with_config(config);
5523
5524 let scenario = QaScenario::new(
5525 ModelId::new("test", "model"),
5526 Modality::Run,
5527 Backend::Cpu,
5528 Format::Gguf,
5529 "test".to_string(),
5530 0,
5531 );
5532
5533 let path = executor.resolve_model_path(&scenario);
5534 assert!(path.is_none());
5536 }
5537
5538 #[test]
5539 fn test_executor_execute_dry_run() {
5540 let mock_runner = MockCommandRunner::new();
5541 let config = ExecutionConfig {
5542 dry_run: true,
5543 ..Default::default()
5544 };
5545 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
5546
5547 let yaml = r#"
5548name: dry-run-test
5549version: "1.0.0"
5550model:
5551 hf_repo: "test/model"
5552 formats: [gguf]
5553test_matrix:
5554 modalities: [run]
5555 backends: [cpu]
5556 scenario_count: 3
5557"#;
5558 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
5559 let result = executor.execute(&playbook).expect("Execution failed");
5560
5561 assert_eq!(result.skipped, 3);
5563 assert!(result.passed >= 1);
5565 }
5566
5567 #[test]
5568 fn test_executor_execute_with_stop_on_first_policy() {
5569 let mock_runner = MockCommandRunner::new().with_inference_failure();
5570
5571 let config = ExecutionConfig {
5572 model_path: Some("/test/model.gguf".to_string()),
5573 failure_policy: FailurePolicy::StopOnFirst,
5574 run_conversion_tests: false,
5575 run_golden_rule_test: false,
5576 ..Default::default()
5577 };
5578
5579 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
5580
5581 let yaml = r#"
5582name: stop-on-first-test
5583version: "1.0.0"
5584model:
5585 hf_repo: "test/model"
5586 formats: [gguf]
5587test_matrix:
5588 modalities: [run]
5589 backends: [cpu]
5590 scenario_count: 5
5591"#;
5592 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
5593 let result = executor.execute(&playbook).expect("Execution failed");
5594
5595 assert_eq!(result.failed, 1);
5597 }
5598
5599 #[test]
5600 fn test_executor_execute_with_collect_all_policy() {
5601 let mock_runner = MockCommandRunner::new().with_inference_failure();
5602
5603 let config = ExecutionConfig {
5604 model_path: Some("/test/model.gguf".to_string()),
5605 failure_policy: FailurePolicy::CollectAll,
5606 run_conversion_tests: false,
5607 run_golden_rule_test: false,
5608 run_contract_tests: false,
5609 ..Default::default()
5610 };
5611
5612 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
5613
5614 let yaml = r#"
5615name: collect-all-test
5616version: "1.0.0"
5617model:
5618 hf_repo: "test/model"
5619 formats: [gguf]
5620test_matrix:
5621 modalities: [run]
5622 backends: [cpu]
5623 scenario_count: 3
5624"#;
5625 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
5626 let result = executor.execute(&playbook).expect("Execution failed");
5627
5628 assert_eq!(result.failed, 3);
5630 }
5631
5632 #[test]
5633 fn test_executor_default_impl() {
5634 let executor = Executor::default();
5635 assert_eq!(executor.config().max_workers, 4);
5636 assert!(!executor.config().dry_run);
5637 }
5638
5639 #[test]
5640 fn test_parse_tps_from_output_with_tps() {
5641 let output = "Info: Loading model\ntok/s: 42.5\nDone";
5642 let tps = Executor::parse_tps_from_output(output);
5643 assert!(tps.is_some());
5644 assert!((tps.unwrap() - 42.5).abs() < 0.01);
5645 }
5646
5647 #[test]
5648 fn test_parse_tps_from_output_no_tps() {
5649 let output = "Some random output without tok/s";
5650 let tps = Executor::parse_tps_from_output(output);
5651 assert!(tps.is_none());
5652 }
5653
5654 #[test]
5655 fn test_extract_generated_text() {
5656 let output = "=== Model Info ===\nThis is generated text\ntok/s: 30.0";
5657 let text = Executor::extract_generated_text(output);
5658 assert!(text.contains("This is generated text"));
5659 assert!(!text.contains("tok/s"));
5660 assert!(!text.contains("==="));
5661 }
5662
5663 #[test]
5664 fn test_extract_output_text_multiline_detailed() {
5665 let output = "Some prefix\nOutput:\nLine 1\nLine 2\nLine 3\nCompleted in 1s";
5666 let text = Executor::extract_output_text(output);
5667 assert!(text.contains("Line 1"));
5668 assert!(text.contains("Line 2"));
5669 assert!(text.contains("Line 3"));
5670 }
5671
5672 #[test]
5673 fn test_extract_output_text_with_empty_lines() {
5674 let output = "Output:\nActual output here\n\nCompleted";
5675 let text = Executor::extract_output_text(output);
5676 assert!(text.contains("Actual output here"));
5677 }
5678
5679 #[test]
5680 fn test_failure_policy_default_is_stop_on_p0() {
5681 let policy = FailurePolicy::default();
5682 assert_eq!(policy, FailurePolicy::StopOnP0);
5683 }
5684
5685 #[test]
5686 fn test_execution_config_debug_display() {
5687 let config = ExecutionConfig::default();
5688 let debug_str = format!("{config:?}");
5689 assert!(debug_str.contains("ExecutionConfig"));
5690 assert!(debug_str.contains("failure_policy"));
5691 }
5692
5693 #[test]
5694 fn test_tool_test_result_all_fields() {
5695 let result = ToolTestResult {
5696 tool: "test-tool".to_string(),
5697 passed: true,
5698 exit_code: 0,
5699 stdout: "stdout".to_string(),
5700 stderr: String::new(),
5701 duration_ms: 100,
5702 gate_id: "F-TEST-001".to_string(),
5703 };
5704 assert_eq!(result.tool, "test-tool");
5705 assert!(result.passed);
5706 assert_eq!(result.gate_id, "F-TEST-001");
5707 }
5708
5709 #[test]
5710 fn test_executor_evidence_accessor() {
5711 let executor = Executor::new();
5712 let evidence = executor.evidence();
5713 assert_eq!(evidence.total(), 0);
5714 }
5715
5716 #[test]
5717 fn test_execution_result_is_success_false_due_to_failed() {
5718 let result = ExecutionResult {
5719 playbook_name: "test".to_string(),
5720 total_scenarios: 10,
5721 passed: 9,
5722 failed: 1,
5723 skipped: 0,
5724 duration_ms: 100,
5725 gateway_failed: None,
5726 evidence: EvidenceCollector::new(),
5727 };
5728 assert!(!result.is_success());
5729 }
5730
5731 #[test]
5732 fn test_execution_result_is_success_when_all_pass() {
5733 let result = ExecutionResult {
5734 playbook_name: "test".to_string(),
5735 total_scenarios: 10,
5736 passed: 10,
5737 failed: 0,
5738 skipped: 0,
5739 duration_ms: 100,
5740 gateway_failed: None,
5741 evidence: EvidenceCollector::new(),
5742 };
5743 assert!(result.is_success());
5744 }
5745
5746 #[test]
5747 fn test_tool_test_result_to_evidence_when_failed() {
5748 let result = ToolTestResult {
5749 tool: "validate".to_string(),
5750 passed: false,
5751 exit_code: 1,
5752 stdout: String::new(),
5753 stderr: "Validation failed".to_string(),
5754 duration_ms: 200,
5755 gate_id: "F-VALIDATE-001".to_string(),
5756 };
5757 let model_id = ModelId::new("org", "model");
5758 let evidence = result.to_evidence(&model_id);
5759 assert!(!evidence.outcome.is_pass());
5760 assert!(evidence.reason.contains("Validation failed") || evidence.output.is_empty());
5761 }
5762
5763 #[test]
5764 fn test_executor_with_mock_runner_trace_failure_case() {
5765 let mock_runner = MockCommandRunner::new().with_inference_failure();
5766
5767 let config = ExecutionConfig {
5768 model_path: Some("/test/model.gguf".to_string()),
5769 ..Default::default()
5770 };
5771
5772 let executor = Executor::with_runner(config, Arc::new(mock_runner));
5773
5774 let scenario = QaScenario::new(
5775 ModelId::new("test", "model"),
5776 Modality::Run,
5777 Backend::Cpu,
5778 Format::Gguf,
5779 "What is 2+2?".to_string(),
5780 0,
5781 );
5782
5783 let (_, stderr, exit_code, _, _) = executor.subprocess_execution(&scenario);
5784
5785 assert_eq!(exit_code, 1);
5787 assert!(stderr.is_some());
5788 }
5789
5790 #[test]
5791 fn test_resolve_model_path_apr_format() {
5792 let tmp = tempfile::tempdir().unwrap();
5793 let apr_dir = tmp.path().join("apr");
5794 std::fs::create_dir_all(&apr_dir).unwrap();
5795 std::fs::write(apr_dir.join("model.apr"), b"fake apr").unwrap();
5796
5797 let config = ExecutionConfig {
5798 model_path: Some(tmp.path().to_string_lossy().to_string()),
5799 ..Default::default()
5800 };
5801 let executor = Executor::with_config(config);
5802 let scenario = QaScenario::new(
5803 ModelId::new("test", "model"),
5804 Modality::Run,
5805 Backend::Cpu,
5806 Format::Apr,
5807 "test".to_string(),
5808 0,
5809 );
5810 let path = executor.resolve_model_path(&scenario);
5811 assert!(path.is_some());
5812 assert!(path.unwrap().contains("apr"));
5813 }
5814
5815 #[test]
5816 fn test_resolve_model_path_safetensors_format() {
5817 let tmp = tempfile::tempdir().unwrap();
5818 let st_dir = tmp.path().join("safetensors");
5819 std::fs::create_dir_all(&st_dir).unwrap();
5820 std::fs::write(st_dir.join("model.safetensors"), b"fake st").unwrap();
5821
5822 let config = ExecutionConfig {
5823 model_path: Some(tmp.path().to_string_lossy().to_string()),
5824 ..Default::default()
5825 };
5826 let executor = Executor::with_config(config);
5827 let scenario = QaScenario::new(
5828 ModelId::new("test", "model"),
5829 Modality::Run,
5830 Backend::Cpu,
5831 Format::SafeTensors,
5832 "test".to_string(),
5833 0,
5834 );
5835 let path = executor.resolve_model_path(&scenario);
5836 assert!(path.is_some());
5837 assert!(path.unwrap().contains("safetensors"));
5838 }
5839
5840 #[test]
5841 fn test_resolve_model_path_gguf_format() {
5842 let tmp = tempfile::tempdir().unwrap();
5843 let gguf_dir = tmp.path().join("gguf");
5844 std::fs::create_dir_all(&gguf_dir).unwrap();
5845 std::fs::write(gguf_dir.join("model.gguf"), b"fake gguf").unwrap();
5846
5847 let config = ExecutionConfig {
5848 model_path: Some(tmp.path().to_string_lossy().to_string()),
5849 ..Default::default()
5850 };
5851 let executor = Executor::with_config(config);
5852 let scenario = QaScenario::new(
5853 ModelId::new("test", "model"),
5854 Modality::Run,
5855 Backend::Cpu,
5856 Format::Gguf,
5857 "test".to_string(),
5858 0,
5859 );
5860 let path = executor.resolve_model_path(&scenario);
5861 assert!(path.is_some());
5862 assert!(path.unwrap().contains("gguf"));
5863 }
5864
5865 #[test]
5866 fn test_resolve_model_path_no_model_path() {
5867 let config = ExecutionConfig {
5869 model_path: None,
5870 ..Default::default()
5871 };
5872 let executor = Executor::with_config(config);
5873 let scenario = QaScenario::new(
5874 ModelId::new("test", "model"),
5875 Modality::Run,
5876 Backend::Cpu,
5877 Format::Gguf,
5878 "test".to_string(),
5879 0,
5880 );
5881 let path = executor.resolve_model_path(&scenario);
5882 assert!(path.is_none());
5884 }
5885
5886 #[test]
5887 fn test_executor_subprocess_execution_formats() {
5888 let mock_runner = MockCommandRunner::new().with_inference_response("The answer is 4.");
5889
5890 let config = ExecutionConfig {
5891 model_path: Some("/test/cache".to_string()),
5892 ..Default::default()
5893 };
5894
5895 let executor = Executor::with_runner(config, Arc::new(mock_runner));
5896
5897 let scenario_apr = QaScenario::new(
5899 ModelId::new("test", "model"),
5900 Modality::Run,
5901 Backend::Cpu,
5902 Format::Apr,
5903 "What is 2+2?".to_string(),
5904 0,
5905 );
5906 let (_, _, exit_code, _, _) = executor.subprocess_execution(&scenario_apr);
5907 assert_eq!(exit_code, 0);
5908 }
5909
5910 #[test]
5911 fn test_executor_subprocess_execution_safetensors() {
5912 let mock_runner = MockCommandRunner::new().with_inference_response("The answer is 4.");
5913
5914 let config = ExecutionConfig {
5915 model_path: Some("/test/cache".to_string()),
5916 ..Default::default()
5917 };
5918
5919 let executor = Executor::with_runner(config, Arc::new(mock_runner));
5920
5921 let scenario = QaScenario::new(
5922 ModelId::new("test", "model"),
5923 Modality::Run,
5924 Backend::Cpu,
5925 Format::SafeTensors,
5926 "What is 2+2?".to_string(),
5927 0,
5928 );
5929 let (_, _, exit_code, _, _) = executor.subprocess_execution(&scenario);
5930 assert_eq!(exit_code, 0);
5931 }
5932
5933 #[test]
5934 fn test_execute_scenario_with_exit_code_failure() {
5935 let mock_runner = MockCommandRunner::new().with_exit_code(5);
5936
5937 let config = ExecutionConfig {
5938 model_path: Some("/test/model.gguf".to_string()),
5939 ..Default::default()
5940 };
5941
5942 let executor = Executor::with_runner(config, Arc::new(mock_runner));
5943
5944 let scenario = QaScenario::new(
5945 ModelId::new("test", "model"),
5946 Modality::Run,
5947 Backend::Cpu,
5948 Format::Gguf,
5949 "What is 2+2?".to_string(),
5950 0,
5951 );
5952
5953 let evidence = executor.execute_scenario(&scenario);
5954
5955 assert!(evidence.outcome.is_fail());
5957 assert!(evidence.exit_code.is_some());
5958 assert_eq!(evidence.exit_code.unwrap(), 5);
5959 }
5960
5961 #[test]
5962 fn test_execute_scenario_with_stderr_corroborated() {
5963 let mock_runner = MockCommandRunner::new()
5964 .with_inference_response_and_stderr("The answer is 4.", "Some warning");
5965
5966 let config = ExecutionConfig {
5967 model_path: Some("/test/model.gguf".to_string()),
5968 ..Default::default()
5969 };
5970
5971 let executor = Executor::with_runner(config, Arc::new(mock_runner));
5972
5973 let scenario = QaScenario::new(
5974 ModelId::new("test", "model"),
5975 Modality::Run,
5976 Backend::Cpu,
5977 Format::Gguf,
5978 "What is 2+2?".to_string(),
5979 0,
5980 );
5981
5982 let evidence = executor.execute_scenario(&scenario);
5983 assert!(evidence.outcome.is_pass());
5985 }
5986
5987 #[test]
5988 fn test_executor_run_conversion_tests_no_gpu() {
5989 let mock_runner = MockCommandRunner::new();
5990 let config = ExecutionConfig {
5991 model_path: Some("/test/model.gguf".to_string()),
5992 run_conversion_tests: true,
5993 no_gpu: true,
5994 ..Default::default()
5995 };
5996
5997 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
5998 let model_id = ModelId::new("test", "model");
5999
6000 let (passed, failed) =
6002 executor.run_conversion_tests(std::path::Path::new("/test/model.gguf"), &model_id);
6003
6004 let _ = (passed, failed);
6006 }
6007
6008 #[test]
6009 fn test_executor_execute_with_stop_on_first_failure() {
6010 let mock_runner = MockCommandRunner::new().with_inference_failure();
6011
6012 let config = ExecutionConfig {
6013 model_path: Some("/test/model.gguf".to_string()),
6014 failure_policy: FailurePolicy::StopOnFirst,
6015 run_conversion_tests: false,
6016 run_golden_rule_test: false,
6017 ..Default::default()
6018 };
6019
6020 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
6021
6022 let yaml = r#"
6023name: stop-on-first-test
6024version: "1.0.0"
6025model:
6026 hf_repo: "test/model"
6027 formats: [gguf]
6028test_matrix:
6029 modalities: [run]
6030 backends: [cpu]
6031 scenario_count: 5
6032"#;
6033 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
6034 let result = executor.execute(&playbook).expect("Execution failed");
6035
6036 assert!(result.failed >= 1);
6038 let executed = result.passed + result.failed;
6040 assert!(executed <= result.total_scenarios);
6041 }
6042
6043 #[test]
6044 fn test_executor_execute_with_collect_all_failures() {
6045 let mock_runner = MockCommandRunner::new().with_inference_failure();
6046
6047 let config = ExecutionConfig {
6048 model_path: Some("/test/model.gguf".to_string()),
6049 failure_policy: FailurePolicy::CollectAll,
6050 run_conversion_tests: false,
6051 run_golden_rule_test: false,
6052 run_contract_tests: false,
6053 ..Default::default()
6054 };
6055
6056 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
6057
6058 let yaml = r#"
6059name: collect-all-test
6060version: "1.0.0"
6061model:
6062 hf_repo: "test/model"
6063 formats: [gguf]
6064test_matrix:
6065 modalities: [run]
6066 backends: [cpu]
6067 scenario_count: 3
6068"#;
6069 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
6070 let result = executor.execute(&playbook).expect("Execution failed");
6071
6072 assert_eq!(result.failed, 3);
6074 assert_eq!(result.total_scenarios, 3);
6076 }
6077
6078 #[test]
6083 fn test_executor_stop_on_p0_with_p0_gate() {
6084 let mock_runner = MockCommandRunner::new()
6086 .with_inference_failure()
6087 .with_exit_code(1);
6088
6089 let config = ExecutionConfig {
6090 model_path: Some("/test/model.gguf".to_string()),
6091 failure_policy: FailurePolicy::StopOnP0,
6092 run_conversion_tests: false,
6093 run_golden_rule_test: false,
6094 ..Default::default()
6095 };
6096
6097 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
6098
6099 let yaml = r#"
6100name: p0-test
6101version: "1.0.0"
6102model:
6103 hf_repo: "test/model"
6104 formats: [gguf]
6105test_matrix:
6106 modalities: [run]
6107 backends: [cpu]
6108 scenario_count: 5
6109"#;
6110 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
6111 let result = executor.execute(&playbook).expect("Execution failed");
6112
6113 assert!(result.failed >= 1);
6115 }
6116
6117 #[test]
6122 fn test_executor_run_conversion_tests_default_config() {
6123 let mock_runner = MockCommandRunner::new();
6124 let config = ExecutionConfig {
6125 model_path: Some("/test/model.gguf".to_string()),
6126 run_conversion_tests: true,
6127 run_golden_rule_test: false,
6128 no_gpu: false, ..Default::default()
6130 };
6131
6132 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
6133
6134 let yaml = r#"
6135name: conv-default-test
6136version: "1.0.0"
6137model:
6138 hf_repo: "test/model"
6139 formats: [gguf]
6140test_matrix:
6141 modalities: [run]
6142 backends: [cpu]
6143 scenario_count: 1
6144"#;
6145 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
6146 let result = executor.execute(&playbook).expect("Execution failed");
6147 assert!(result.total_scenarios >= 1);
6149 }
6150
6151 #[test]
6156 #[allow(clippy::too_many_lines)]
6157 fn test_executor_golden_rule_converted_inference_fails() {
6158 use crate::command::CommandOutput;
6159
6160 struct ConvertedFailRunner;
6163 impl CommandRunner for ConvertedFailRunner {
6164 fn run_inference(
6165 &self,
6166 model_path: &Path,
6167 _prompt: &str,
6168 _max_tokens: u32,
6169 _no_gpu: bool,
6170 _extra_args: &[&str],
6171 ) -> CommandOutput {
6172 if model_path.to_string_lossy().contains(".apr") {
6174 CommandOutput {
6175 stdout: String::new(),
6176 stderr: "Failed to load converted model".to_string(),
6177 exit_code: 1,
6178 success: false,
6179 }
6180 } else {
6181 CommandOutput {
6182 stdout: "Output:\nThe answer is 4.\nCompleted in 100ms".to_string(),
6183 stderr: String::new(),
6184 exit_code: 0,
6185 success: true,
6186 }
6187 }
6188 }
6189
6190 fn convert_model(&self, _source: &Path, _target: &Path) -> CommandOutput {
6191 CommandOutput {
6192 stdout: "Conversion complete".to_string(),
6193 stderr: String::new(),
6194 exit_code: 0,
6195 success: true,
6196 }
6197 }
6198
6199 fn inspect_model(&self, _path: &Path) -> CommandOutput {
6200 CommandOutput::success("")
6201 }
6202 fn validate_model(&self, _path: &Path) -> CommandOutput {
6203 CommandOutput::success("")
6204 }
6205 fn bench_model(&self, _path: &Path) -> CommandOutput {
6206 CommandOutput::success("")
6207 }
6208 fn check_model(&self, _path: &Path) -> CommandOutput {
6209 CommandOutput::success("")
6210 }
6211 fn profile_model(&self, _path: &Path, _warmup: u32, _measure: u32) -> CommandOutput {
6212 CommandOutput::success("")
6213 }
6214 fn profile_ci(
6215 &self,
6216 _path: &Path,
6217 _min_throughput: Option<f64>,
6218 _max_p99: Option<f64>,
6219 _warmup: u32,
6220 _measure: u32,
6221 ) -> CommandOutput {
6222 CommandOutput::success("")
6223 }
6224 fn diff_tensors(&self, _model_a: &Path, _model_b: &Path, _json: bool) -> CommandOutput {
6225 CommandOutput::success("")
6226 }
6227 fn compare_inference(
6228 &self,
6229 _model_a: &Path,
6230 _model_b: &Path,
6231 _prompt: &str,
6232 _max_tokens: u32,
6233 _tolerance: f64,
6234 ) -> CommandOutput {
6235 CommandOutput::success("")
6236 }
6237 fn profile_with_flamegraph(
6238 &self,
6239 _model_path: &Path,
6240 _output_path: &Path,
6241 _no_gpu: bool,
6242 ) -> CommandOutput {
6243 CommandOutput::success("")
6244 }
6245 fn profile_with_focus(
6246 &self,
6247 _model_path: &Path,
6248 _focus: &str,
6249 _no_gpu: bool,
6250 ) -> CommandOutput {
6251 CommandOutput::success("")
6252 }
6253 fn fingerprint_model(&self, _path: &Path, _json: bool) -> CommandOutput {
6254 CommandOutput::success("")
6255 }
6256 fn validate_stats(&self, _a: &Path, _b: &Path) -> CommandOutput {
6257 CommandOutput::success("")
6258 }
6259 fn validate_model_strict(&self, _path: &Path) -> CommandOutput {
6260 CommandOutput::success(r#"{"valid":true,"tensors_checked":100,"issues":[]}"#)
6261 }
6262 fn pull_model(&self, _hf_repo: &str) -> CommandOutput {
6263 CommandOutput::success("Path: /mock/model.safetensors")
6264 }
6265 fn inspect_model_json(&self, _model_path: &Path) -> CommandOutput {
6266 CommandOutput::success(
6267 r#"{"format":"SafeTensors","tensor_count":10,"tensor_names":[]}"#,
6268 )
6269 }
6270 fn run_ollama_inference(
6271 &self,
6272 _model_tag: &str,
6273 _prompt: &str,
6274 _temperature: f64,
6275 ) -> CommandOutput {
6276 CommandOutput::success("Output:\nThe answer is 4.\nCompleted in 1.0s")
6277 }
6278 fn pull_ollama_model(&self, _model_tag: &str) -> CommandOutput {
6279 CommandOutput::success("pulling manifest... done")
6280 }
6281 fn create_ollama_model(&self, _: &str, _: &Path) -> CommandOutput {
6282 CommandOutput::success("creating model... done")
6283 }
6284 fn serve_model(&self, _: &Path, _: u16) -> CommandOutput {
6285 CommandOutput::success(r#"{"status":"listening"}"#)
6286 }
6287 fn http_get(&self, _: &str) -> CommandOutput {
6288 CommandOutput::success(r#"{"models":[]}"#)
6289 }
6290 fn profile_memory(&self, _: &Path) -> CommandOutput {
6291 CommandOutput::success(r#"{"peak_rss_mb":1024}"#)
6292 }
6293 fn run_chat(&self, _model_path: &Path, _prompt: &str, _no_gpu: bool, _extra_args: &[&str]) -> CommandOutput {
6294 CommandOutput::success("Chat output")
6295 }
6296 fn http_post(&self, _url: &str, _body: &str) -> CommandOutput {
6297 CommandOutput::success("{}")
6298 }
6299 fn spawn_serve(&self, _model_path: &Path, _port: u16, _no_gpu: bool) -> CommandOutput {
6300 CommandOutput::success("12345")
6301 }
6302 }
6303
6304 let config = ExecutionConfig {
6305 model_path: Some("/test/model.gguf".to_string()),
6306 run_conversion_tests: false,
6307 run_golden_rule_test: true,
6308 ..Default::default()
6309 };
6310
6311 let mut executor = Executor::with_runner(config, Arc::new(ConvertedFailRunner));
6312
6313 let yaml = r#"
6314name: golden-conv-fail
6315version: "1.0.0"
6316model:
6317 hf_repo: "test/model"
6318 formats: [gguf]
6319test_matrix:
6320 modalities: [run]
6321 backends: [cpu]
6322 scenario_count: 1
6323"#;
6324 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
6325 let result = executor.execute(&playbook).expect("Execution failed");
6326 assert!(result.failed >= 1);
6328 }
6329
6330 #[test]
6335 #[allow(clippy::too_many_lines)]
6336 fn test_executor_golden_rule_output_differs_with_data() {
6337 use crate::command::CommandOutput;
6338
6339 struct DiffOutputRunner;
6340 impl CommandRunner for DiffOutputRunner {
6341 fn run_inference(
6342 &self,
6343 model_path: &Path,
6344 _prompt: &str,
6345 _max_tokens: u32,
6346 _no_gpu: bool,
6347 _extra_args: &[&str],
6348 ) -> CommandOutput {
6349 if model_path.to_string_lossy().contains(".apr") {
6350 CommandOutput {
6351 stdout: "Output:\nThe answer is 5.\nCompleted in 100ms".to_string(),
6352 stderr: String::new(),
6353 exit_code: 0,
6354 success: true,
6355 }
6356 } else {
6357 CommandOutput {
6358 stdout: "Output:\nThe answer is 4.\nCompleted in 100ms".to_string(),
6359 stderr: String::new(),
6360 exit_code: 0,
6361 success: true,
6362 }
6363 }
6364 }
6365
6366 fn convert_model(&self, _source: &Path, _target: &Path) -> CommandOutput {
6367 CommandOutput {
6368 stdout: "ok".to_string(),
6369 stderr: String::new(),
6370 exit_code: 0,
6371 success: true,
6372 }
6373 }
6374
6375 fn inspect_model(&self, _path: &Path) -> CommandOutput {
6376 CommandOutput::success("")
6377 }
6378 fn validate_model(&self, _path: &Path) -> CommandOutput {
6379 CommandOutput::success("")
6380 }
6381 fn bench_model(&self, _path: &Path) -> CommandOutput {
6382 CommandOutput::success("")
6383 }
6384 fn check_model(&self, _path: &Path) -> CommandOutput {
6385 CommandOutput::success("")
6386 }
6387 fn profile_model(&self, _path: &Path, _warmup: u32, _measure: u32) -> CommandOutput {
6388 CommandOutput::success("")
6389 }
6390 fn profile_ci(
6391 &self,
6392 _path: &Path,
6393 _min_throughput: Option<f64>,
6394 _max_p99: Option<f64>,
6395 _warmup: u32,
6396 _measure: u32,
6397 ) -> CommandOutput {
6398 CommandOutput::success("")
6399 }
6400 fn diff_tensors(&self, _model_a: &Path, _model_b: &Path, _json: bool) -> CommandOutput {
6401 CommandOutput::success("")
6402 }
6403 fn compare_inference(
6404 &self,
6405 _model_a: &Path,
6406 _model_b: &Path,
6407 _prompt: &str,
6408 _max_tokens: u32,
6409 _tolerance: f64,
6410 ) -> CommandOutput {
6411 CommandOutput::success("")
6412 }
6413 fn profile_with_flamegraph(
6414 &self,
6415 _model_path: &Path,
6416 _output_path: &Path,
6417 _no_gpu: bool,
6418 ) -> CommandOutput {
6419 CommandOutput::success("")
6420 }
6421 fn profile_with_focus(
6422 &self,
6423 _model_path: &Path,
6424 _focus: &str,
6425 _no_gpu: bool,
6426 ) -> CommandOutput {
6427 CommandOutput::success("")
6428 }
6429 fn fingerprint_model(&self, _path: &Path, _json: bool) -> CommandOutput {
6430 CommandOutput::success("")
6431 }
6432 fn validate_stats(&self, _a: &Path, _b: &Path) -> CommandOutput {
6433 CommandOutput::success("")
6434 }
6435 fn validate_model_strict(&self, _path: &Path) -> CommandOutput {
6436 CommandOutput::success(r#"{"valid":true,"tensors_checked":100,"issues":[]}"#)
6437 }
6438 fn pull_model(&self, _hf_repo: &str) -> CommandOutput {
6439 CommandOutput::success("Path: /mock/model.safetensors")
6440 }
6441 fn inspect_model_json(&self, _model_path: &Path) -> CommandOutput {
6442 CommandOutput::success(
6443 r#"{"format":"SafeTensors","tensor_count":10,"tensor_names":[]}"#,
6444 )
6445 }
6446 fn run_ollama_inference(
6447 &self,
6448 _model_tag: &str,
6449 _prompt: &str,
6450 _temperature: f64,
6451 ) -> CommandOutput {
6452 CommandOutput::success("Output:\nThe answer is 4.\nCompleted in 1.0s")
6453 }
6454 fn pull_ollama_model(&self, _model_tag: &str) -> CommandOutput {
6455 CommandOutput::success("pulling manifest... done")
6456 }
6457 fn create_ollama_model(&self, _: &str, _: &Path) -> CommandOutput {
6458 CommandOutput::success("creating model... done")
6459 }
6460 fn serve_model(&self, _: &Path, _: u16) -> CommandOutput {
6461 CommandOutput::success(r#"{"status":"listening"}"#)
6462 }
6463 fn http_get(&self, _: &str) -> CommandOutput {
6464 CommandOutput::success(r#"{"models":[]}"#)
6465 }
6466 fn profile_memory(&self, _: &Path) -> CommandOutput {
6467 CommandOutput::success(r#"{"peak_rss_mb":1024}"#)
6468 }
6469 fn run_chat(&self, _model_path: &Path, _prompt: &str, _no_gpu: bool, _extra_args: &[&str]) -> CommandOutput {
6470 CommandOutput::success("Chat output")
6471 }
6472 fn http_post(&self, _url: &str, _body: &str) -> CommandOutput {
6473 CommandOutput::success("{}")
6474 }
6475 fn spawn_serve(&self, _model_path: &Path, _port: u16, _no_gpu: bool) -> CommandOutput {
6476 CommandOutput::success("12345")
6477 }
6478 }
6479
6480 let config = ExecutionConfig {
6481 model_path: Some("/test/model.gguf".to_string()),
6482 run_conversion_tests: false,
6483 run_golden_rule_test: true,
6484 ..Default::default()
6485 };
6486
6487 let mut executor = Executor::with_runner(config, Arc::new(DiffOutputRunner));
6488
6489 let yaml = r#"
6490name: golden-diff
6491version: "1.0.0"
6492model:
6493 hf_repo: "test/model"
6494 formats: [gguf]
6495test_matrix:
6496 modalities: [run]
6497 backends: [cpu]
6498 scenario_count: 1
6499"#;
6500 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
6501 let result = executor.execute(&playbook).expect("Execution failed");
6502 assert!(result.failed >= 1);
6504 }
6505
6506 #[test]
6511 #[allow(clippy::too_many_lines)]
6512 fn test_executor_subprocess_trace_with_stdout() {
6513 use crate::command::CommandOutput;
6514
6515 struct TraceStdoutRunner;
6516 impl CommandRunner for TraceStdoutRunner {
6517 fn run_inference(
6518 &self,
6519 _model_path: &Path,
6520 _prompt: &str,
6521 _max_tokens: u32,
6522 _no_gpu: bool,
6523 extra_args: &[&str],
6524 ) -> CommandOutput {
6525 if extra_args.contains(&"--trace") {
6526 CommandOutput {
6528 stdout: "trace data: layer 0 attention".to_string(),
6529 stderr: "TRACE: model loading details".to_string(),
6530 exit_code: 0,
6531 success: true,
6532 }
6533 } else {
6534 CommandOutput {
6536 stdout: String::new(),
6537 stderr: "inference error occurred".to_string(),
6538 exit_code: 1,
6539 success: false,
6540 }
6541 }
6542 }
6543
6544 fn convert_model(&self, _source: &Path, _target: &Path) -> CommandOutput {
6545 CommandOutput::success("")
6546 }
6547 fn inspect_model(&self, _path: &Path) -> CommandOutput {
6548 CommandOutput::success("")
6549 }
6550 fn validate_model(&self, _path: &Path) -> CommandOutput {
6551 CommandOutput::success("")
6552 }
6553 fn bench_model(&self, _path: &Path) -> CommandOutput {
6554 CommandOutput::success("")
6555 }
6556 fn check_model(&self, _path: &Path) -> CommandOutput {
6557 CommandOutput::success("")
6558 }
6559 fn profile_model(&self, _path: &Path, _warmup: u32, _measure: u32) -> CommandOutput {
6560 CommandOutput::success("")
6561 }
6562 fn profile_ci(
6563 &self,
6564 _path: &Path,
6565 _min_throughput: Option<f64>,
6566 _max_p99: Option<f64>,
6567 _warmup: u32,
6568 _measure: u32,
6569 ) -> CommandOutput {
6570 CommandOutput::success("")
6571 }
6572 fn diff_tensors(&self, _model_a: &Path, _model_b: &Path, _json: bool) -> CommandOutput {
6573 CommandOutput::success("")
6574 }
6575 fn compare_inference(
6576 &self,
6577 _model_a: &Path,
6578 _model_b: &Path,
6579 _prompt: &str,
6580 _max_tokens: u32,
6581 _tolerance: f64,
6582 ) -> CommandOutput {
6583 CommandOutput::success("")
6584 }
6585 fn profile_with_flamegraph(
6586 &self,
6587 _model_path: &Path,
6588 _output_path: &Path,
6589 _no_gpu: bool,
6590 ) -> CommandOutput {
6591 CommandOutput::success("")
6592 }
6593 fn profile_with_focus(
6594 &self,
6595 _model_path: &Path,
6596 _focus: &str,
6597 _no_gpu: bool,
6598 ) -> CommandOutput {
6599 CommandOutput::success("")
6600 }
6601 fn fingerprint_model(&self, _path: &Path, _json: bool) -> CommandOutput {
6602 CommandOutput::success("")
6603 }
6604 fn validate_stats(&self, _a: &Path, _b: &Path) -> CommandOutput {
6605 CommandOutput::success("")
6606 }
6607 fn validate_model_strict(&self, _path: &Path) -> CommandOutput {
6608 CommandOutput::success(r#"{"valid":true,"tensors_checked":100,"issues":[]}"#)
6609 }
6610 fn pull_model(&self, _hf_repo: &str) -> CommandOutput {
6611 CommandOutput::success("Path: /mock/model.safetensors")
6612 }
6613 fn inspect_model_json(&self, _model_path: &Path) -> CommandOutput {
6614 CommandOutput::success(
6615 r#"{"format":"SafeTensors","tensor_count":10,"tensor_names":[]}"#,
6616 )
6617 }
6618 fn run_ollama_inference(
6619 &self,
6620 _model_tag: &str,
6621 _prompt: &str,
6622 _temperature: f64,
6623 ) -> CommandOutput {
6624 CommandOutput::success("Output:\nThe answer is 4.\nCompleted in 1.0s")
6625 }
6626 fn pull_ollama_model(&self, _model_tag: &str) -> CommandOutput {
6627 CommandOutput::success("pulling manifest... done")
6628 }
6629 fn create_ollama_model(&self, _: &str, _: &Path) -> CommandOutput {
6630 CommandOutput::success("creating model... done")
6631 }
6632 fn serve_model(&self, _: &Path, _: u16) -> CommandOutput {
6633 CommandOutput::success(r#"{"status":"listening"}"#)
6634 }
6635 fn http_get(&self, _: &str) -> CommandOutput {
6636 CommandOutput::success(r#"{"models":[]}"#)
6637 }
6638 fn profile_memory(&self, _: &Path) -> CommandOutput {
6639 CommandOutput::success(r#"{"peak_rss_mb":1024}"#)
6640 }
6641 fn run_chat(&self, _model_path: &Path, _prompt: &str, _no_gpu: bool, _extra_args: &[&str]) -> CommandOutput {
6642 CommandOutput::success("Chat output")
6643 }
6644 fn http_post(&self, _url: &str, _body: &str) -> CommandOutput {
6645 CommandOutput::success("{}")
6646 }
6647 fn spawn_serve(&self, _model_path: &Path, _port: u16, _no_gpu: bool) -> CommandOutput {
6648 CommandOutput::success("12345")
6649 }
6650 }
6651
6652 let config = ExecutionConfig {
6653 model_path: Some("/test/model.gguf".to_string()),
6654 run_conversion_tests: false,
6655 run_golden_rule_test: false,
6656 ..Default::default()
6657 };
6658
6659 let mut executor = Executor::with_runner(config, Arc::new(TraceStdoutRunner));
6660
6661 let yaml = r#"
6662name: trace-stdout-test
6663version: "1.0.0"
6664model:
6665 hf_repo: "test/model"
6666 formats: [gguf]
6667test_matrix:
6668 modalities: [run]
6669 backends: [cpu]
6670 scenario_count: 1
6671"#;
6672 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
6673 let result = executor.execute(&playbook).expect("Execution failed");
6674 assert!(result.failed >= 1);
6675 let evidence = executor.evidence().all();
6677 assert!(!evidence.is_empty());
6678 let last = &evidence[evidence.len() - 1];
6680 if let Some(ref stderr) = last.stderr {
6681 assert!(stderr.contains("TRACE STDOUT") || stderr.contains("trace"));
6682 }
6683 }
6684
6685 #[test]
6690 fn test_resolve_model_path_fallback_to_extension() {
6691 let temp_dir = tempfile::tempdir().unwrap();
6692 let gguf_dir = temp_dir.path().join("gguf");
6693 std::fs::create_dir_all(&gguf_dir).unwrap();
6694
6695 let alt_model = gguf_dir.join("custom-name.gguf");
6697 std::fs::write(&alt_model, b"fake model").unwrap();
6698
6699 let config = ExecutionConfig {
6700 model_path: Some(temp_dir.path().to_string_lossy().to_string()),
6701 ..Default::default()
6702 };
6703 let executor = Executor::with_config(config);
6704
6705 let scenario = apr_qa_gen::QaScenario::new(
6706 apr_qa_gen::ModelId::new("test", "model"),
6707 apr_qa_gen::Modality::Run,
6708 apr_qa_gen::Backend::Cpu,
6709 apr_qa_gen::Format::Gguf,
6710 "test prompt".to_string(),
6711 0,
6712 );
6713
6714 let path = executor.resolve_model_path(&scenario);
6715 assert!(path.unwrap().contains("custom-name.gguf"));
6717 }
6718
6719 #[test]
6720 fn test_resolve_model_path_prefers_model_dot_ext() {
6721 let temp_dir = tempfile::tempdir().unwrap();
6722 let apr_dir = temp_dir.path().join("apr");
6723 std::fs::create_dir_all(&apr_dir).unwrap();
6724
6725 let model_file = apr_dir.join("model.apr");
6727 std::fs::write(&model_file, b"fake model").unwrap();
6728
6729 let config = ExecutionConfig {
6730 model_path: Some(temp_dir.path().to_string_lossy().to_string()),
6731 ..Default::default()
6732 };
6733 let executor = Executor::with_config(config);
6734
6735 let scenario = apr_qa_gen::QaScenario::new(
6736 apr_qa_gen::ModelId::new("test", "model"),
6737 apr_qa_gen::Modality::Run,
6738 apr_qa_gen::Backend::Cpu,
6739 apr_qa_gen::Format::Apr,
6740 "test prompt".to_string(),
6741 0,
6742 );
6743
6744 let path = executor.resolve_model_path(&scenario);
6745 assert!(path.unwrap().contains("model.apr"));
6746 }
6747
6748 #[test]
6753 fn test_resolve_model_path_file_matching_format() {
6754 let temp_dir = tempfile::tempdir().unwrap();
6755 let model_file = temp_dir.path().join("abc123.safetensors");
6756 std::fs::write(&model_file, b"fake model data").unwrap();
6757
6758 let config = ExecutionConfig {
6759 model_path: Some(model_file.to_string_lossy().to_string()),
6760 ..Default::default()
6761 };
6762 let executor = Executor::with_config(config);
6763
6764 let scenario = QaScenario::new(
6766 ModelId::new("test", "model"),
6767 Modality::Run,
6768 Backend::Cpu,
6769 Format::SafeTensors,
6770 "test".to_string(),
6771 0,
6772 );
6773 let path = executor.resolve_model_path(&scenario);
6774 assert!(path.is_some());
6775 assert!(path.unwrap().contains("abc123.safetensors"));
6776 }
6777
6778 #[test]
6779 fn test_resolve_model_path_file_nonmatching_format() {
6780 let temp_dir = tempfile::tempdir().unwrap();
6781 let model_file = temp_dir.path().join("abc123.safetensors");
6782 std::fs::write(&model_file, b"fake model data").unwrap();
6783
6784 let config = ExecutionConfig {
6785 model_path: Some(model_file.to_string_lossy().to_string()),
6786 ..Default::default()
6787 };
6788 let executor = Executor::with_config(config);
6789
6790 let scenario_gguf = QaScenario::new(
6792 ModelId::new("test", "model"),
6793 Modality::Run,
6794 Backend::Cpu,
6795 Format::Gguf,
6796 "test".to_string(),
6797 0,
6798 );
6799 assert!(executor.resolve_model_path(&scenario_gguf).is_none());
6800
6801 let scenario_apr = QaScenario::new(
6803 ModelId::new("test", "model"),
6804 Modality::Run,
6805 Backend::Cpu,
6806 Format::Apr,
6807 "test".to_string(),
6808 0,
6809 );
6810 assert!(executor.resolve_model_path(&scenario_apr).is_none());
6811 }
6812
6813 #[test]
6814 fn test_resolve_model_path_file_gguf() {
6815 let temp_dir = tempfile::tempdir().unwrap();
6816 let model_file = temp_dir.path().join("hash123.gguf");
6817 std::fs::write(&model_file, b"fake gguf").unwrap();
6818
6819 let config = ExecutionConfig {
6820 model_path: Some(model_file.to_string_lossy().to_string()),
6821 ..Default::default()
6822 };
6823 let executor = Executor::with_config(config);
6824
6825 let scenario = QaScenario::new(
6826 ModelId::new("test", "model"),
6827 Modality::Run,
6828 Backend::Cpu,
6829 Format::Gguf,
6830 "test".to_string(),
6831 0,
6832 );
6833 let path = executor.resolve_model_path(&scenario);
6834 assert!(path.is_some());
6835 assert!(path.unwrap().contains("hash123.gguf"));
6836 }
6837
6838 #[test]
6839 fn test_execute_scenario_skips_nonmatching_format() {
6840 let temp_dir = tempfile::tempdir().unwrap();
6841 let model_file = temp_dir.path().join("abc123.safetensors");
6842 std::fs::write(&model_file, b"fake model").unwrap();
6843
6844 let mock_runner = MockCommandRunner::new().with_inference_response("The answer is 4.");
6845
6846 let config = ExecutionConfig {
6847 model_path: Some(model_file.to_string_lossy().to_string()),
6848 ..Default::default()
6849 };
6850 let executor = Executor::with_runner(config, Arc::new(mock_runner));
6851
6852 let scenario = QaScenario::new(
6854 ModelId::new("test", "model"),
6855 Modality::Run,
6856 Backend::Cpu,
6857 Format::Gguf,
6858 "2+2=".to_string(),
6859 42,
6860 );
6861 let evidence = executor.execute_scenario(&scenario);
6862 assert_eq!(evidence.outcome, Outcome::Skipped);
6863 assert!(evidence.reason.contains("Format"));
6864 }
6865
6866 #[test]
6867 fn test_find_safetensors_dir_file_mode() {
6868 let temp_dir = tempfile::tempdir().unwrap();
6869
6870 let st_file = temp_dir.path().join("model.safetensors");
6872 std::fs::write(&st_file, b"fake").unwrap();
6873 let result = Executor::find_safetensors_dir(&st_file);
6874 assert!(result.is_some());
6875 assert_eq!(result.unwrap(), temp_dir.path());
6876
6877 let gguf_file = temp_dir.path().join("model.gguf");
6879 std::fs::write(&gguf_file, b"fake").unwrap();
6880 let result = Executor::find_safetensors_dir(&gguf_file);
6881 assert!(result.is_none());
6882 }
6883
6884 #[test]
6885 fn test_subprocess_execution_skip_flag() {
6886 let temp_dir = tempfile::tempdir().unwrap();
6887 let model_file = temp_dir.path().join("abc.safetensors");
6888 std::fs::write(&model_file, b"fake").unwrap();
6889
6890 let mock_runner = MockCommandRunner::new().with_inference_response("The answer is 4.");
6891
6892 let config = ExecutionConfig {
6893 model_path: Some(model_file.to_string_lossy().to_string()),
6894 ..Default::default()
6895 };
6896 let executor = Executor::with_runner(config, Arc::new(mock_runner));
6897
6898 let scenario_st = QaScenario::new(
6900 ModelId::new("test", "model"),
6901 Modality::Run,
6902 Backend::Cpu,
6903 Format::SafeTensors,
6904 "test".to_string(),
6905 0,
6906 );
6907 let (_, _, _, _, skipped) = executor.subprocess_execution(&scenario_st);
6908 assert!(!skipped);
6909
6910 let scenario_gguf = QaScenario::new(
6912 ModelId::new("test", "model"),
6913 Modality::Run,
6914 Backend::Cpu,
6915 Format::Gguf,
6916 "test".to_string(),
6917 0,
6918 );
6919 let (_, _, _, _, skipped) = executor.subprocess_execution(&scenario_gguf);
6920 assert!(skipped);
6921 }
6922
6923 #[test]
6928 fn test_executor_corroborated_with_stderr() {
6929 let mock_runner = MockCommandRunner::new()
6930 .with_inference_response_and_stderr("The answer is 4.", "Warning: some benign warning");
6931
6932 let config = ExecutionConfig {
6933 model_path: Some("/test/model.gguf".to_string()),
6934 run_conversion_tests: false,
6935 run_golden_rule_test: false,
6936 ..Default::default()
6937 };
6938
6939 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
6940
6941 let yaml = r#"
6942name: stderr-test
6943version: "1.0.0"
6944model:
6945 hf_repo: "test/model"
6946 formats: [gguf]
6947test_matrix:
6948 modalities: [run]
6949 backends: [cpu]
6950 scenario_count: 1
6951"#;
6952 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
6953 let _result = executor.execute(&playbook).expect("Execution failed");
6954
6955 let evidence = executor.evidence().all();
6956 assert!(!evidence.is_empty());
6957 let ev = evidence
6959 .iter()
6960 .find(|e| e.stderr.is_some())
6961 .expect("should have evidence with stderr");
6962 assert!(ev.stderr.as_ref().unwrap().contains("Warning"));
6963 }
6964
6965 #[test]
6970 fn test_executor_falsified_with_stderr() {
6971 let mock_runner = MockCommandRunner::new()
6972 .with_inference_response_and_stderr("", "Error: model failed")
6973 .with_exit_code(1);
6974
6975 let config = ExecutionConfig {
6976 model_path: Some("/test/model.gguf".to_string()),
6977 run_conversion_tests: false,
6978 run_golden_rule_test: false,
6979 failure_policy: FailurePolicy::CollectAll,
6980 ..Default::default()
6981 };
6982
6983 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
6984
6985 let yaml = r#"
6986name: falsified-stderr
6987version: "1.0.0"
6988model:
6989 hf_repo: "test/model"
6990 formats: [gguf]
6991test_matrix:
6992 modalities: [run]
6993 backends: [cpu]
6994 scenario_count: 1
6995"#;
6996 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
6997 let result = executor.execute(&playbook).expect("Execution failed");
6998 assert!(result.failed >= 1);
6999
7000 let evidence = executor.evidence().all();
7001 let ev = evidence
7002 .iter()
7003 .find(|e| e.stderr.is_some())
7004 .expect("should have evidence with stderr");
7005 assert!(ev.stderr.is_some());
7006 }
7007
7008 #[test]
7016 fn test_execute_profile_flamegraph_no_apr() {
7017 let executor = ToolExecutor::new("test-model.gguf".to_string(), true, 5000);
7018 let temp_dir = tempfile::tempdir().unwrap();
7019 let result = executor.execute_profile_flamegraph(temp_dir.path());
7020 assert!(!result.passed);
7022 assert_eq!(result.tool, "profile-flamegraph");
7023 assert_eq!(result.gate_id, "F-PROFILE-002");
7024 }
7025
7026 #[test]
7027 fn test_execute_profile_flamegraph_with_mock_success() {
7028 let mock_runner = MockCommandRunner::new();
7029 let executor = ToolExecutor::with_runner(
7030 "test-model.gguf".to_string(),
7031 true,
7032 5000,
7033 Arc::new(mock_runner),
7034 );
7035 let temp_dir = tempfile::tempdir().unwrap();
7036 let result = executor.execute_profile_flamegraph(temp_dir.path());
7037 assert_eq!(result.tool, "profile-flamegraph");
7039 assert_eq!(result.gate_id, "F-PROFILE-002");
7040 assert!(!result.passed); }
7042
7043 #[test]
7044 fn test_execute_profile_flamegraph_with_svg_file() {
7045 let mock_runner = MockCommandRunner::new();
7046 let executor = ToolExecutor::with_runner(
7047 "test-model.gguf".to_string(),
7048 false,
7049 5000,
7050 Arc::new(mock_runner),
7051 );
7052 let temp_dir = tempfile::tempdir().unwrap();
7053 let svg_path = temp_dir.path().join("profile_flamegraph.svg");
7055 std::fs::write(&svg_path, "<svg><rect/></svg>").unwrap();
7056 let result = executor.execute_profile_flamegraph(temp_dir.path());
7057 assert!(result.passed);
7058 assert!(result.stdout.contains("valid: true"));
7059 }
7060
7061 #[test]
7062 fn test_execute_profile_flamegraph_with_invalid_svg() {
7063 let mock_runner = MockCommandRunner::new();
7064 let executor = ToolExecutor::with_runner(
7065 "test-model.gguf".to_string(),
7066 true,
7067 5000,
7068 Arc::new(mock_runner),
7069 );
7070 let temp_dir = tempfile::tempdir().unwrap();
7071 let svg_path = temp_dir.path().join("profile_flamegraph.svg");
7073 std::fs::write(&svg_path, "not a valid svg at all").unwrap();
7074 let result = executor.execute_profile_flamegraph(temp_dir.path());
7075 assert!(!result.passed);
7076 assert!(result.stdout.contains("valid: false"));
7077 }
7078
7079 #[test]
7080 fn test_execute_profile_flamegraph_unsupported() {
7081 let mock_runner = MockCommandRunner::new().with_profile_flamegraph_failure();
7082 let executor = ToolExecutor::with_runner(
7083 "test-model.gguf".to_string(),
7084 true,
7085 5000,
7086 Arc::new(mock_runner),
7087 );
7088 let temp_dir = tempfile::tempdir().unwrap();
7089 let result = executor.execute_profile_flamegraph(temp_dir.path());
7090 assert!(!result.passed);
7091 }
7092
7093 #[test]
7094 fn test_execute_profile_focus_no_apr() {
7095 let executor = ToolExecutor::new("test-model.gguf".to_string(), true, 5000);
7096 let result = executor.execute_profile_focus("attention");
7097 assert!(!result.passed);
7098 assert_eq!(result.tool, "profile-focus");
7099 assert_eq!(result.gate_id, "F-PROFILE-003");
7100 }
7101
7102 #[test]
7103 fn test_execute_profile_focus_with_mock_success() {
7104 let mock_runner = MockCommandRunner::new();
7105 let executor = ToolExecutor::with_runner(
7106 "test-model.gguf".to_string(),
7107 false,
7108 5000,
7109 Arc::new(mock_runner),
7110 );
7111 let result = executor.execute_profile_focus("attention");
7112 assert!(result.passed);
7113 assert_eq!(result.tool, "profile-focus");
7114 assert_eq!(result.gate_id, "F-PROFILE-003");
7115 }
7116
7117 #[test]
7118 fn test_execute_profile_focus_unsupported() {
7119 let mock_runner = MockCommandRunner::new().with_profile_focus_failure();
7120 let executor = ToolExecutor::with_runner(
7121 "test-model.gguf".to_string(),
7122 true,
7123 5000,
7124 Arc::new(mock_runner),
7125 );
7126 let result = executor.execute_profile_focus("attention");
7127 assert!(!result.passed);
7128 }
7129
7130 #[test]
7131 fn test_execute_backend_equivalence_no_apr() {
7132 let executor = ToolExecutor::new("test-model.gguf".to_string(), false, 5000);
7133 let result = executor.execute_backend_equivalence();
7134 assert!(!result.passed);
7135 assert_eq!(result.tool, "backend-equivalence");
7136 assert_eq!(result.gate_id, "F-CONV-BE-001");
7137 }
7138
7139 #[test]
7140 fn test_execute_serve_lifecycle_no_apr() {
7141 let executor = ToolExecutor::new("test-model.gguf".to_string(), true, 5000);
7142 let result = executor.execute_serve_lifecycle();
7143 assert!(!result.passed);
7144 assert_eq!(result.tool, "serve-lifecycle");
7145 assert_eq!(result.gate_id, "F-INTEG-003");
7146 }
7147
7148 #[test]
7149 fn test_execute_all_with_serve() {
7150 let mock_runner = MockCommandRunner::new();
7151 let executor = ToolExecutor::with_runner(
7152 "test-model.gguf".to_string(),
7153 true,
7154 5000,
7155 Arc::new(mock_runner),
7156 );
7157 let results = executor.execute_all();
7159 assert!(!results.is_empty());
7160 assert!(!results.iter().any(|r| r.tool == "serve-lifecycle"));
7162 }
7163
7164 #[test]
7169 #[allow(clippy::too_many_lines)]
7170 fn test_executor_conversion_infrastructure_failure() {
7171 use crate::command::CommandOutput;
7172
7173 struct FailingConversionRunner;
7174 impl CommandRunner for FailingConversionRunner {
7175 fn run_inference(
7176 &self,
7177 _model_path: &Path,
7178 _prompt: &str,
7179 _max_tokens: u32,
7180 _no_gpu: bool,
7181 _extra_args: &[&str],
7182 ) -> CommandOutput {
7183 CommandOutput {
7184 stdout: "The answer is 4.".to_string(),
7185 stderr: String::new(),
7186 exit_code: 0,
7187 success: true,
7188 }
7189 }
7190 fn convert_model(&self, _source: &Path, _target: &Path) -> CommandOutput {
7191 CommandOutput::success("")
7192 }
7193 fn inspect_model(&self, _path: &Path) -> CommandOutput {
7194 CommandOutput::success("")
7195 }
7196 fn validate_model(&self, _path: &Path) -> CommandOutput {
7197 CommandOutput::success("")
7198 }
7199 fn bench_model(&self, _path: &Path) -> CommandOutput {
7200 CommandOutput::success("")
7201 }
7202 fn check_model(&self, _path: &Path) -> CommandOutput {
7203 CommandOutput::success("")
7204 }
7205 fn profile_model(&self, _path: &Path, _warmup: u32, _measure: u32) -> CommandOutput {
7206 CommandOutput::success("")
7207 }
7208 fn profile_ci(
7209 &self,
7210 _path: &Path,
7211 _min_throughput: Option<f64>,
7212 _max_p99: Option<f64>,
7213 _warmup: u32,
7214 _measure: u32,
7215 ) -> CommandOutput {
7216 CommandOutput::success("")
7217 }
7218 fn diff_tensors(&self, _model_a: &Path, _model_b: &Path, _json: bool) -> CommandOutput {
7219 CommandOutput::success("")
7220 }
7221 fn compare_inference(
7222 &self,
7223 _model_a: &Path,
7224 _model_b: &Path,
7225 _prompt: &str,
7226 _max_tokens: u32,
7227 _tolerance: f64,
7228 ) -> CommandOutput {
7229 CommandOutput::success("")
7230 }
7231 fn profile_with_flamegraph(
7232 &self,
7233 _model_path: &Path,
7234 _output_path: &Path,
7235 _no_gpu: bool,
7236 ) -> CommandOutput {
7237 CommandOutput::success("")
7238 }
7239 fn profile_with_focus(
7240 &self,
7241 _model_path: &Path,
7242 _focus: &str,
7243 _no_gpu: bool,
7244 ) -> CommandOutput {
7245 CommandOutput::success("")
7246 }
7247 fn fingerprint_model(&self, _path: &Path, _json: bool) -> CommandOutput {
7248 CommandOutput::success("")
7249 }
7250 fn validate_stats(&self, _a: &Path, _b: &Path) -> CommandOutput {
7251 CommandOutput::success("")
7252 }
7253 fn validate_model_strict(&self, _path: &Path) -> CommandOutput {
7254 CommandOutput::success(r#"{"valid":true,"tensors_checked":100,"issues":[]}"#)
7255 }
7256 fn pull_model(&self, _hf_repo: &str) -> CommandOutput {
7257 CommandOutput::success("Path: /mock/model.safetensors")
7258 }
7259 fn inspect_model_json(&self, _model_path: &Path) -> CommandOutput {
7260 CommandOutput::success(
7261 r#"{"format":"SafeTensors","tensor_count":10,"tensor_names":[]}"#,
7262 )
7263 }
7264 fn run_ollama_inference(
7265 &self,
7266 _model_tag: &str,
7267 _prompt: &str,
7268 _temperature: f64,
7269 ) -> CommandOutput {
7270 CommandOutput::success("Output:\nThe answer is 4.\nCompleted in 1.0s")
7271 }
7272 fn pull_ollama_model(&self, _model_tag: &str) -> CommandOutput {
7273 CommandOutput::success("pulling manifest... done")
7274 }
7275 fn create_ollama_model(&self, _: &str, _: &Path) -> CommandOutput {
7276 CommandOutput::success("creating model... done")
7277 }
7278 fn serve_model(&self, _: &Path, _: u16) -> CommandOutput {
7279 CommandOutput::success(r#"{"status":"listening"}"#)
7280 }
7281 fn http_get(&self, _: &str) -> CommandOutput {
7282 CommandOutput::success(r#"{"models":[]}"#)
7283 }
7284 fn profile_memory(&self, _: &Path) -> CommandOutput {
7285 CommandOutput::success(r#"{"peak_rss_mb":1024}"#)
7286 }
7287 fn run_chat(&self, _model_path: &Path, _prompt: &str, _no_gpu: bool, _extra_args: &[&str]) -> CommandOutput {
7288 CommandOutput::success("Chat output")
7289 }
7290 fn http_post(&self, _url: &str, _body: &str) -> CommandOutput {
7291 CommandOutput::success("{}")
7292 }
7293 fn spawn_serve(&self, _model_path: &Path, _port: u16, _no_gpu: bool) -> CommandOutput {
7294 CommandOutput::success("12345")
7295 }
7296 }
7297
7298 let config = ExecutionConfig {
7299 model_path: Some("/nonexistent/model.gguf".to_string()),
7300 run_conversion_tests: true,
7301 run_golden_rule_test: false,
7302 ..Default::default()
7303 };
7304
7305 let mut executor = Executor::with_runner(config, Arc::new(FailingConversionRunner));
7306
7307 let yaml = r#"
7308name: conv-infra-fail
7309version: "1.0.0"
7310model:
7311 hf_repo: "test/model"
7312 formats: [gguf]
7313test_matrix:
7314 modalities: [run]
7315 backends: [cpu]
7316 scenario_count: 1
7317"#;
7318 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
7319 let result = executor.execute(&playbook).expect("Execution failed");
7320 assert!(result.total_scenarios >= 1);
7323
7324 let runner = FailingConversionRunner;
7326 let p = Path::new("/dev/null");
7327 assert!(runner.validate_model(p).success);
7328 assert!(runner.bench_model(p).success);
7329 assert!(runner.check_model(p).success);
7330 assert!(runner.profile_model(p, 1, 1).success);
7331 assert!(runner.profile_ci(p, None, None, 1, 1).success);
7332 assert!(runner.diff_tensors(p, p, false).success);
7333 assert!(runner.compare_inference(p, p, "", 1, 0.0).success);
7334 assert!(runner.profile_with_flamegraph(p, p, false).success);
7335 assert!(runner.profile_with_focus(p, "", false).success);
7336 assert!(runner.fingerprint_model(p, false).success);
7337 assert!(runner.validate_stats(p, p).success);
7338 }
7339
7340 #[test]
7345 fn test_find_safetensors_dir_with_subdir() {
7346 use tempfile::TempDir;
7347 let dir = TempDir::new().expect("create temp dir");
7348 let st_dir = dir.path().join("safetensors");
7349 std::fs::create_dir(&st_dir).expect("create safetensors dir");
7350 std::fs::write(st_dir.join("model.safetensors"), "test").expect("write file");
7351
7352 let result = Executor::find_safetensors_dir(dir.path());
7353 assert!(result.is_some());
7354 assert_eq!(result.unwrap(), st_dir);
7355 }
7356
7357 #[test]
7358 fn test_find_safetensors_dir_direct() {
7359 use tempfile::TempDir;
7360 let dir = TempDir::new().expect("create temp dir");
7361 std::fs::write(dir.path().join("model.safetensors"), "test").expect("write file");
7362
7363 let result = Executor::find_safetensors_dir(dir.path());
7364 assert!(result.is_some());
7365 assert_eq!(result.unwrap(), dir.path());
7366 }
7367
7368 #[test]
7369 fn test_find_safetensors_dir_none() {
7370 use tempfile::TempDir;
7371 let dir = TempDir::new().expect("create temp dir");
7372 let result = Executor::find_safetensors_dir(dir.path());
7375 assert!(result.is_none());
7376 }
7377
7378 #[test]
7379 fn test_has_safetensors_files_true() {
7380 use tempfile::TempDir;
7381 let dir = TempDir::new().expect("create temp dir");
7382 std::fs::write(dir.path().join("model.safetensors"), "test").expect("write file");
7383
7384 assert!(Executor::has_safetensors_files(dir.path()));
7385 }
7386
7387 #[test]
7388 fn test_has_safetensors_files_false() {
7389 use tempfile::TempDir;
7390 let dir = TempDir::new().expect("create temp dir");
7391 std::fs::write(dir.path().join("model.gguf"), "test").expect("write file");
7392
7393 assert!(!Executor::has_safetensors_files(dir.path()));
7394 }
7395
7396 #[test]
7397 fn test_has_safetensors_files_nonexistent_dir() {
7398 let nonexistent = std::path::Path::new("/nonexistent/path/xyz123");
7399 assert!(!Executor::has_safetensors_files(nonexistent));
7400 }
7401
7402 #[test]
7407 fn test_validate_scenario_creation() {
7408 let model_id = ModelId::new("test", "model");
7409 let scenario = Executor::validate_scenario(&model_id);
7410
7411 assert_eq!(scenario.model.org, "test");
7412 assert_eq!(scenario.model.name, "model");
7413 assert_eq!(scenario.format, Format::SafeTensors);
7414 assert!(scenario.prompt.contains("G0 Validate"));
7415 }
7416
7417 #[test]
7418 fn test_pull_scenario_creation() {
7419 let model_id = ModelId::new("test", "model");
7420 let scenario = Executor::pull_scenario(&model_id);
7421
7422 assert_eq!(scenario.model.org, "test");
7423 assert_eq!(scenario.model.name, "model");
7424 assert_eq!(scenario.format, Format::SafeTensors);
7425 assert!(scenario.prompt.contains("G0 Pull"));
7426 }
7427
7428 #[test]
7429 fn test_g0_pull_pass() {
7430 let mock_runner = MockCommandRunner::new();
7431
7432 let config = ExecutionConfig {
7433 model_path: Some("/test/model.gguf".to_string()),
7434 run_conversion_tests: false,
7435 run_golden_rule_test: false,
7436 run_contract_tests: false,
7437 ..Default::default()
7438 };
7439
7440 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
7441 let model_id = ModelId::new("test", "model");
7442 let (passed, failed, pulled_path) = executor.run_g0_pull_check("test/model", &model_id);
7443
7444 assert_eq!(passed, 1);
7445 assert_eq!(failed, 0);
7446 assert_eq!(pulled_path.as_deref(), Some("/mock/model.safetensors"));
7447
7448 let evidence = executor.evidence().all();
7449 let pull_ev = evidence
7450 .iter()
7451 .find(|e| e.gate_id == "G0-PULL-001")
7452 .expect("should have G0-PULL evidence");
7453 assert!(pull_ev.outcome.is_pass());
7454 assert!(pull_ev.output.contains("G0 PASS"));
7455 }
7456
7457 #[test]
7458 fn test_g0_pull_fail() {
7459 let mock_runner = MockCommandRunner::new().with_pull_failure();
7460
7461 let config = ExecutionConfig {
7462 model_path: Some("/test/model.gguf".to_string()),
7463 run_conversion_tests: false,
7464 run_golden_rule_test: false,
7465 run_contract_tests: false,
7466 ..Default::default()
7467 };
7468
7469 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
7470 let model_id = ModelId::new("test", "model");
7471 let (passed, failed, pulled_path) = executor.run_g0_pull_check("test/model", &model_id);
7472
7473 assert_eq!(passed, 0);
7474 assert_eq!(failed, 1);
7475 assert!(pulled_path.is_none());
7476
7477 let evidence = executor.evidence().all();
7478 let pull_ev = evidence
7479 .iter()
7480 .find(|e| e.gate_id == "G0-PULL-001")
7481 .expect("should have G0-PULL evidence");
7482 assert!(!pull_ev.outcome.is_pass());
7483 assert!(pull_ev.reason.contains("G0 FAIL"));
7484 }
7485
7486 #[test]
7487 fn test_g0_pull_fail_stops_execution() {
7488 let mock_runner = MockCommandRunner::new().with_pull_failure();
7491
7492 let config = ExecutionConfig {
7493 model_path: None,
7494 run_conversion_tests: true,
7495 run_golden_rule_test: true,
7496 run_contract_tests: true,
7497 ..Default::default()
7498 };
7499
7500 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
7501
7502 let yaml = r#"
7503name: pull-fail-test
7504version: "1.0.0"
7505model:
7506 hf_repo: "test/model"
7507 formats: [gguf]
7508test_matrix:
7509 modalities: [run]
7510 backends: [cpu]
7511 scenario_count: 3
7512"#;
7513 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
7514 let result = executor.execute(&playbook).expect("Execution failed");
7515
7516 assert!(result.gateway_failed.is_some());
7518 assert!(
7519 result
7520 .gateway_failed
7521 .as_ref()
7522 .unwrap()
7523 .contains("G0-PULL-001")
7524 );
7525
7526 assert_eq!(result.passed, 0);
7528 assert_eq!(result.failed, 4);
7530 }
7531
7532 #[test]
7533 fn test_g0_pull_sets_model_path() {
7534 let mock_runner =
7536 MockCommandRunner::new().with_pull_model_path("/pulled/model.safetensors");
7537
7538 let config = ExecutionConfig {
7539 model_path: None, run_conversion_tests: false,
7541 run_golden_rule_test: false,
7542 run_contract_tests: false,
7543 ..Default::default()
7544 };
7545
7546 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
7547
7548 let yaml = r#"
7549name: pull-set-path-test
7550version: "1.0.0"
7551model:
7552 hf_repo: "test/model"
7553 formats: [gguf]
7554test_matrix:
7555 modalities: [run]
7556 backends: [cpu]
7557 scenario_count: 1
7558"#;
7559 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
7560 let result = executor.execute(&playbook).expect("Execution failed");
7561
7562 assert!(result.gateway_failed.is_none());
7564 assert!(result.passed >= 1);
7566 }
7567
7568 fn make_temp_model_dir() -> tempfile::TempDir {
7570 let dir = tempfile::TempDir::new().expect("create temp dir");
7571 let st_dir = dir.path().join("safetensors");
7572 std::fs::create_dir_all(&st_dir).expect("mkdir safetensors");
7573 std::fs::write(st_dir.join("model.safetensors"), b"fake").expect("write");
7574 dir
7575 }
7576
7577 #[test]
7578 fn test_g0_validate_pass() {
7579 let mock_runner = MockCommandRunner::new(); let dir = make_temp_model_dir();
7581
7582 let config = ExecutionConfig {
7583 model_path: Some(dir.path().to_string_lossy().to_string()),
7584 run_conversion_tests: false,
7585 run_golden_rule_test: false,
7586 run_contract_tests: false,
7587 ..Default::default()
7588 };
7589
7590 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
7591 let model_id = ModelId::new("test", "model");
7592 let (passed, failed) = executor.run_g0_validate_check(dir.path(), &model_id);
7593
7594 assert_eq!(passed, 1);
7595 assert_eq!(failed, 0);
7596
7597 let evidence = executor.evidence().all();
7598 let validate_ev = evidence
7599 .iter()
7600 .find(|e| e.gate_id == "G0-VALIDATE-001")
7601 .expect("should have G0-VALIDATE evidence");
7602 assert!(validate_ev.outcome.is_pass());
7603 assert!(validate_ev.output.contains("G0 PASS"));
7604 }
7605
7606 #[test]
7607 fn test_g0_validate_fail_corrupt_model() {
7608 let mock_runner = MockCommandRunner::new().with_validate_strict_failure();
7609 let dir = make_temp_model_dir();
7610
7611 let config = ExecutionConfig {
7612 model_path: Some(dir.path().to_string_lossy().to_string()),
7613 run_conversion_tests: false,
7614 run_golden_rule_test: false,
7615 run_contract_tests: false,
7616 ..Default::default()
7617 };
7618
7619 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
7620 let model_id = ModelId::new("test", "model");
7621 let (passed, failed) = executor.run_g0_validate_check(dir.path(), &model_id);
7622
7623 assert_eq!(passed, 0);
7624 assert_eq!(failed, 1);
7625
7626 let evidence = executor.evidence().all();
7627 let validate_ev = evidence
7628 .iter()
7629 .find(|e| e.gate_id == "G0-VALIDATE-001")
7630 .expect("should have G0-VALIDATE evidence");
7631 assert!(!validate_ev.outcome.is_pass());
7632 assert!(validate_ev.reason.contains("G0 FAIL"));
7633 }
7634
7635 #[test]
7636 fn test_g0_validate_fail_stops_execution() {
7637 let mock_runner = MockCommandRunner::new().with_validate_strict_failure();
7639 let dir = make_temp_model_dir();
7640
7641 let config = ExecutionConfig {
7642 model_path: Some(dir.path().to_string_lossy().to_string()),
7643 run_conversion_tests: true,
7644 run_golden_rule_test: true,
7645 run_contract_tests: true,
7646 ..Default::default()
7647 };
7648
7649 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
7650
7651 let yaml = r#"
7652name: validate-fail-test
7653version: "1.0.0"
7654model:
7655 hf_repo: "test/model"
7656 formats: [gguf]
7657test_matrix:
7658 modalities: [run]
7659 backends: [cpu]
7660 scenario_count: 3
7661"#;
7662 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
7663 let result = executor.execute(&playbook).expect("Execution failed");
7664
7665 assert!(result.gateway_failed.is_some());
7667 assert!(
7668 result
7669 .gateway_failed
7670 .as_ref()
7671 .unwrap()
7672 .contains("G0-VALIDATE-001")
7673 );
7674
7675 assert_eq!(result.passed, 0);
7677 assert_eq!(result.failed, 4);
7679 }
7680
7681 #[test]
7682 fn test_g0_validate_pass_continues_execution() {
7683 let mock_runner = MockCommandRunner::new(); let dir = make_temp_model_dir();
7686
7687 let config = ExecutionConfig {
7688 model_path: Some(dir.path().to_string_lossy().to_string()),
7689 run_conversion_tests: false,
7690 run_golden_rule_test: false,
7691 run_contract_tests: false,
7692 ..Default::default()
7693 };
7694
7695 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
7696
7697 let yaml = r#"
7698name: validate-pass-test
7699version: "1.0.0"
7700model:
7701 hf_repo: "test/model"
7702 formats: [gguf]
7703test_matrix:
7704 modalities: [run]
7705 backends: [cpu]
7706 scenario_count: 1
7707"#;
7708 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
7709 let result = executor.execute(&playbook).expect("Execution failed");
7710
7711 assert!(result.gateway_failed.is_none());
7713 assert!(result.total_scenarios >= 2);
7715 assert!(result.passed >= 1);
7716 }
7717
7718 #[test]
7719 fn test_g0_validate_no_model_path() {
7720 let mock_runner = MockCommandRunner::new();
7722
7723 let config = ExecutionConfig {
7724 model_path: None, run_conversion_tests: false,
7726 run_golden_rule_test: false,
7727 run_contract_tests: false,
7728 ..Default::default()
7729 };
7730
7731 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
7732
7733 let yaml = r#"
7734name: no-model-path-test
7735version: "1.0.0"
7736model:
7737 hf_repo: "test/model"
7738 formats: [gguf]
7739test_matrix:
7740 modalities: [run]
7741 backends: [cpu]
7742 scenario_count: 1
7743"#;
7744 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
7745 let result = executor.execute(&playbook).expect("Execution failed");
7746
7747 assert!(result.gateway_failed.is_none());
7749 assert_eq!(result.total_scenarios, 2);
7751 }
7752
7753 #[test]
7754 fn test_g0_validate_no_safetensors_files() {
7755 let dir = tempfile::TempDir::new().expect("create temp dir");
7757 let mock_runner = MockCommandRunner::new();
7758
7759 let config = ExecutionConfig {
7760 model_path: Some(dir.path().to_string_lossy().to_string()),
7761 ..Default::default()
7762 };
7763
7764 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
7765 let model_id = ModelId::new("test", "model");
7766 let (passed, failed) = executor.run_g0_validate_check(dir.path(), &model_id);
7767
7768 assert_eq!(passed, 0);
7769 assert_eq!(failed, 0);
7770 }
7771
7772 #[test]
7773 fn test_g0_validate_multiple_shards() {
7774 let dir = tempfile::TempDir::new().expect("create temp dir");
7776 let st_dir = dir.path().join("safetensors");
7777 std::fs::create_dir_all(&st_dir).expect("mkdir");
7778 std::fs::write(st_dir.join("model-00001-of-00002.safetensors"), b"shard1").expect("write");
7779 std::fs::write(st_dir.join("model-00002-of-00002.safetensors"), b"shard2").expect("write");
7780
7781 let mock_runner = MockCommandRunner::new();
7782 let config = ExecutionConfig {
7783 model_path: Some(dir.path().to_string_lossy().to_string()),
7784 ..Default::default()
7785 };
7786
7787 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
7788 let model_id = ModelId::new("test", "model");
7789 let (passed, failed) = executor.run_g0_validate_check(dir.path(), &model_id);
7790
7791 assert_eq!(passed, 2);
7793 assert_eq!(failed, 0);
7794 }
7795
7796 #[test]
7797 fn test_find_safetensors_files_single_file() {
7798 let dir = tempfile::TempDir::new().expect("create temp dir");
7799 let file = dir.path().join("model.safetensors");
7800 std::fs::write(&file, b"test").expect("write");
7801
7802 let files = Executor::find_safetensors_files(&file);
7803 assert_eq!(files.len(), 1);
7804 assert_eq!(files[0], file);
7805 }
7806
7807 #[test]
7808 fn test_find_safetensors_files_non_safetensors() {
7809 let dir = tempfile::TempDir::new().expect("create temp dir");
7810 let file = dir.path().join("model.gguf");
7811 std::fs::write(&file, b"test").expect("write");
7812
7813 let files = Executor::find_safetensors_files(&file);
7814 assert!(files.is_empty());
7815 }
7816
7817 #[test]
7818 fn test_find_safetensors_files_directory() {
7819 let dir = make_temp_model_dir();
7820 let files = Executor::find_safetensors_files(dir.path());
7821 assert_eq!(files.len(), 1);
7822 }
7823
7824 #[test]
7825 fn test_integrity_scenario_creation() {
7826 let model_id = ModelId::new("test", "model");
7827 let scenario = Executor::integrity_scenario(&model_id);
7828
7829 assert_eq!(scenario.model.org, "test");
7830 assert_eq!(scenario.model.name, "model");
7831 assert_eq!(scenario.format, Format::SafeTensors);
7832 assert!(scenario.prompt.contains("G0"));
7833 }
7834
7835 #[test]
7836 fn test_run_g0_integrity_check_no_safetensors() {
7837 use tempfile::TempDir;
7838 let dir = TempDir::new().expect("create temp dir");
7839 let mut executor = Executor::new();
7842 let model_id = ModelId::new("test", "model");
7843 let (passed, failed) = executor.run_g0_integrity_check(dir.path(), &model_id);
7844
7845 assert_eq!(passed, 0);
7847 assert_eq!(failed, 0);
7848 }
7849
7850 #[test]
7851 fn test_run_g0_integrity_check_missing_config() {
7852 use tempfile::TempDir;
7853 let dir = TempDir::new().expect("create temp dir");
7854
7855 create_mock_safetensors_for_test(dir.path(), 24, 896, 151_936);
7857
7858 let mut executor = Executor::new();
7859 let model_id = ModelId::new("test", "model");
7860 let (passed, failed) = executor.run_g0_integrity_check(dir.path(), &model_id);
7861
7862 assert_eq!(passed, 0);
7864 assert!(failed > 0);
7865
7866 let evidence = executor.evidence();
7868 assert!(
7869 evidence
7870 .all()
7871 .iter()
7872 .any(|e| e.gate_id.starts_with("G0-INTEGRITY"))
7873 );
7874 }
7875
7876 #[test]
7877 fn test_run_g0_integrity_check_pass() {
7878 use tempfile::TempDir;
7879 let dir = TempDir::new().expect("create temp dir");
7880
7881 create_test_config_for_executor(dir.path(), 24, 896, 151_936);
7883 create_mock_safetensors_for_test(dir.path(), 24, 896, 151_936);
7884
7885 let mut executor = Executor::new();
7886 let model_id = ModelId::new("test", "model");
7887 let (passed, failed) = executor.run_g0_integrity_check(dir.path(), &model_id);
7888
7889 assert_eq!(passed, 1);
7890 assert_eq!(failed, 0);
7891
7892 let evidence = executor.evidence();
7894 assert!(
7895 evidence
7896 .all()
7897 .iter()
7898 .any(|e| { e.gate_id.starts_with("G0-INTEGRITY") && e.outcome.is_pass() })
7899 );
7900 }
7901
7902 #[test]
7903 fn test_run_g0_integrity_check_layer_mismatch() {
7904 use tempfile::TempDir;
7905 let dir = TempDir::new().expect("create temp dir");
7906
7907 create_test_config_for_executor(dir.path(), 14, 896, 151_936);
7909 create_mock_safetensors_for_test(dir.path(), 24, 896, 151_936);
7910
7911 let mut executor = Executor::new();
7912 let model_id = ModelId::new("test", "model");
7913 let (passed, failed) = executor.run_g0_integrity_check(dir.path(), &model_id);
7914
7915 assert_eq!(passed, 0);
7916 assert!(failed > 0);
7917
7918 let evidence = executor.evidence();
7920 assert!(evidence.all().iter().any(|e| e.gate_id.contains("LAYERS")));
7921 }
7922
7923 fn create_test_config_for_executor(
7925 dir: &std::path::Path,
7926 layers: usize,
7927 hidden: usize,
7928 vocab: usize,
7929 ) {
7930 let config = format!(
7931 r#"{{"num_hidden_layers": {layers}, "hidden_size": {hidden}, "vocab_size": {vocab}}}"#
7932 );
7933 std::fs::write(dir.join("config.json"), config).expect("write config");
7934 }
7935
7936 #[allow(clippy::items_after_statements)]
7938 fn create_mock_safetensors_for_test(
7939 dir: &std::path::Path,
7940 layers: usize,
7941 hidden: usize,
7942 vocab: usize,
7943 ) {
7944 let mut header_obj = serde_json::Map::new();
7945
7946 let mut embed_info = serde_json::Map::new();
7948 embed_info.insert("shape".to_string(), serde_json::json!([vocab, hidden]));
7949 embed_info.insert(
7950 "dtype".to_string(),
7951 serde_json::Value::String("F32".to_string()),
7952 );
7953 embed_info.insert(
7954 "data_offsets".to_string(),
7955 serde_json::json!([0, vocab * hidden * 4]),
7956 );
7957 header_obj.insert(
7958 "model.embed_tokens.weight".to_string(),
7959 serde_json::Value::Object(embed_info),
7960 );
7961
7962 for i in 0..layers {
7964 let mut layer_info = serde_json::Map::new();
7965 layer_info.insert("shape".to_string(), serde_json::json!([hidden, hidden]));
7966 layer_info.insert(
7967 "dtype".to_string(),
7968 serde_json::Value::String("F32".to_string()),
7969 );
7970 layer_info.insert("data_offsets".to_string(), serde_json::json!([0, 0]));
7971 header_obj.insert(
7972 format!("model.layers.{i}.self_attn.q_proj.weight"),
7973 serde_json::Value::Object(layer_info),
7974 );
7975 }
7976
7977 let header_json = serde_json::to_string(&header_obj).expect("serialize header");
7978 let header_bytes = header_json.as_bytes();
7979 let header_len = header_bytes.len() as u64;
7980
7981 let path = dir.join("model.safetensors");
7982 let mut file = std::fs::File::create(path).expect("create safetensors");
7983 use std::io::Write;
7984 file.write_all(&header_len.to_le_bytes())
7985 .expect("write len");
7986 file.write_all(header_bytes).expect("write header");
7987 file.write_all(&[0u8; 1024]).expect("write data");
7988 }
7989
7990 #[test]
7995 fn test_execute_all_with_serve_true() {
7996 let mock_runner = MockCommandRunner::new();
7997 let executor = ToolExecutor::with_runner(
7998 "test-model.gguf".to_string(),
7999 true,
8000 5000,
8001 Arc::new(mock_runner),
8002 );
8003 let results = executor.execute_all_with_serve(true);
8004 assert!(!results.is_empty());
8005 assert!(results.iter().any(|r| r.tool == "serve-lifecycle"));
8007 }
8008
8009 #[test]
8010 fn test_run_g0_integrity_check_hidden_mismatch() {
8011 use tempfile::TempDir;
8012 let dir = TempDir::new().expect("create temp dir");
8013
8014 create_test_config_for_executor(dir.path(), 24, 1024, 151_936);
8016 create_mock_safetensors_for_test(dir.path(), 24, 896, 151_936);
8017
8018 let mut executor = Executor::new();
8019 let model_id = ModelId::new("test", "model");
8020 let (passed, failed) = executor.run_g0_integrity_check(dir.path(), &model_id);
8021
8022 assert_eq!(passed, 0);
8023 assert!(failed > 0);
8024
8025 let evidence = executor.evidence();
8026 assert!(evidence.all().iter().any(|e| e.gate_id.contains("HIDDEN")));
8027 }
8028
8029 #[test]
8030 fn test_run_g0_integrity_check_vocab_mismatch() {
8031 use tempfile::TempDir;
8032 let dir = TempDir::new().expect("create temp dir");
8033
8034 create_test_config_for_executor(dir.path(), 24, 896, 200_000);
8036 create_mock_safetensors_for_test(dir.path(), 24, 896, 151_936);
8037
8038 let mut executor = Executor::new();
8039 let model_id = ModelId::new("test", "model");
8040 let (passed, failed) = executor.run_g0_integrity_check(dir.path(), &model_id);
8041
8042 assert_eq!(passed, 0);
8043 assert!(failed > 0);
8044
8045 let evidence = executor.evidence();
8046 assert!(evidence.all().iter().any(|e| e.gate_id.contains("VOCAB")));
8047 }
8048
8049 #[test]
8052 fn test_run_g0_layout_check_no_contract() {
8053 use tempfile::TempDir;
8055 let dir = TempDir::new().expect("create temp dir");
8056
8057 let mut executor = Executor::new();
8058 let model_id = ModelId::new("test", "model");
8059 let (passed, failed) = executor.run_g0_layout_check(dir.path(), &model_id);
8060
8061 assert_eq!(passed, 0);
8063 assert_eq!(failed, 0);
8064 }
8065
8066 #[test]
8067 fn test_run_g0_layout_check_model_not_found() {
8068 use tempfile::TempDir;
8070 let dir = TempDir::new().expect("create temp dir");
8071
8072 let contract_path = dir.path().join("tensor-layout-v1.yaml");
8074 std::fs::write(
8075 &contract_path,
8076 r#"
8077metadata:
8078 version: "1.0"
8079 created: "2026-01-01"
8080 updated: "2026-01-01"
8081 author: "test"
8082 description: "test"
8083formats: {}
8084kernel:
8085 signature: "test"
8086 weight_shape: "[out, in]"
8087 computation: "y = Wx"
8088 byte_calculation: "out * in"
8089 block_sizes: {}
8090 QK_K: 256
8091tensors: {}
8092validation_rules: []
8093"#,
8094 )
8095 .expect("write contract");
8096
8097 let nonexistent_path = dir.path().join("does_not_exist.safetensors");
8099 let contract =
8100 crate::layout_contract::load_contract_from(&contract_path).expect("load contract");
8101 let result = crate::layout_contract::validate_model(&nonexistent_path, &contract)
8102 .expect("validation should return result");
8103
8104 assert!(!result.passed);
8106 assert!(!result.critical_failures.is_empty());
8107 }
8108
8109 #[test]
8110 fn test_layout_scenario_creation() {
8111 let model_id = ModelId::new("test", "model");
8112 let scenario = Executor::layout_scenario(&model_id);
8113
8114 assert_eq!(
8115 scenario.prompt,
8116 "G0 Layout: tensor shape contract validation"
8117 );
8118 assert_eq!(scenario.format, Format::SafeTensors);
8119 assert_eq!(scenario.backend, Backend::Cpu);
8120 assert_eq!(scenario.modality, Modality::Run);
8121 }
8122
8123 #[test]
8124 fn test_format_tensor_failure_with_expected_and_actual() {
8125 let tensor_result = crate::layout_contract::TensorValidationResult {
8126 tensor_name: "lm_head.weight".to_string(),
8127 rule_id: "F-LAYOUT-CONTRACT-002".to_string(),
8128 passed: false,
8129 details: "Shape mismatch".to_string(),
8130 expected: Some("[vocab, hidden]".to_string()),
8131 actual: Some("[hidden, vocab]".to_string()),
8132 };
8133
8134 let formatted = Executor::format_tensor_failure(&tensor_result);
8135 assert!(formatted.contains("F-LAYOUT-CONTRACT-002"));
8136 assert!(formatted.contains("Shape mismatch"));
8137 assert!(formatted.contains("Expected: [vocab, hidden]"));
8138 assert!(formatted.contains("Actual: [hidden, vocab]"));
8139 }
8140
8141 #[test]
8142 fn test_format_tensor_failure_without_expected() {
8143 let tensor_result = crate::layout_contract::TensorValidationResult {
8144 tensor_name: "test.weight".to_string(),
8145 rule_id: "F-LAYOUT-CONTRACT-001".to_string(),
8146 passed: false,
8147 details: "Missing transpose".to_string(),
8148 expected: None,
8149 actual: None,
8150 };
8151
8152 let formatted = Executor::format_tensor_failure(&tensor_result);
8153 assert!(formatted.contains("F-LAYOUT-CONTRACT-001"));
8154 assert!(formatted.contains("Missing transpose"));
8155 assert!(!formatted.contains("Expected:"));
8156 assert!(!formatted.contains("Actual:"));
8157 }
8158
8159 #[test]
8160 fn test_execute_inspect_verified_nonexistent_model() {
8161 let executor =
8163 ToolExecutor::new("/nonexistent/path/to/model.gguf".to_string(), false, 5000);
8164 let result = executor.execute_inspect_verified();
8165 assert!(!result.passed);
8167 assert_eq!(result.gate_id, "F-INSPECT-META-001");
8168 assert!(result.exit_code != 0);
8170 }
8171
8172 #[test]
8173 fn test_execute_scenario_stop_on_p0_gate() {
8174 let mock_runner = MockCommandRunner::new()
8176 .with_inference_failure()
8177 .with_exit_code(1);
8178
8179 let config = ExecutionConfig {
8180 model_path: Some("/test/model.gguf".to_string()),
8181 failure_policy: FailurePolicy::StopOnP0,
8182 run_conversion_tests: false,
8183 run_golden_rule_test: false,
8184 ..Default::default()
8185 };
8186
8187 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8188
8189 let yaml = r#"
8191name: p0-stop
8192version: "1.0.0"
8193model:
8194 hf_repo: "test/model"
8195 formats: [gguf]
8196test_matrix:
8197 modalities: [run]
8198 backends: [cpu]
8199 scenario_count: 3
8200"#;
8201 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
8202 let result = executor.execute(&playbook).expect("Execution failed");
8203
8204 assert!(result.failed >= 1);
8206 }
8207
8208 #[test]
8209 fn test_execute_scenario_corroborated_with_stderr_via_playbook() {
8210 let mock_runner = MockCommandRunner::new()
8214 .with_inference_response_and_stderr("correct", "warning: low memory");
8215
8216 let config = ExecutionConfig {
8217 model_path: Some("/test/model.gguf".to_string()),
8218 run_conversion_tests: false,
8219 run_golden_rule_test: false,
8220 failure_policy: FailurePolicy::CollectAll,
8221 ..Default::default()
8222 };
8223
8224 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8225
8226 let yaml = r#"
8227name: corroborated-stderr
8228version: "1.0.0"
8229model:
8230 hf_repo: "test/model"
8231 formats: [gguf]
8232test_matrix:
8233 modalities: [run]
8234 backends: [cpu]
8235 scenario_count: 1
8236"#;
8237 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
8238 let result = executor.execute(&playbook).expect("Execution failed");
8239
8240 assert!(result.passed >= 1);
8242
8243 let evidence = executor.evidence().all();
8245 assert!(
8246 evidence
8247 .iter()
8248 .any(|e| e.outcome.is_pass() && e.stderr.is_some()),
8249 "should have corroborated evidence with stderr"
8250 );
8251 }
8252
8253 #[test]
8254 fn test_run_conversion_tests_single_file_model() {
8255 let dir = tempfile::tempdir().expect("create temp dir");
8256 let model_path = dir.path().join("model.gguf");
8257 std::fs::write(&model_path, b"fake model").expect("write model");
8258
8259 let config = ExecutionConfig {
8260 model_path: Some(model_path.to_string_lossy().to_string()),
8261 run_conversion_tests: true,
8262 ..Default::default()
8263 };
8264
8265 let mut executor = Executor::with_config(config);
8266 let model_id = ModelId::new("test", "model");
8267 let (passed, failed) = executor.run_conversion_tests(&model_path, &model_id);
8269 assert_eq!(passed, 0);
8270 assert_eq!(failed, 0);
8271 }
8272
8273 #[test]
8274 fn test_run_golden_rule_single_file_model() {
8275 let dir = tempfile::tempdir().expect("create temp dir");
8276 let model_path = dir.path().join("model.gguf");
8277 std::fs::write(&model_path, b"fake model").expect("write model");
8278
8279 let config = ExecutionConfig {
8280 model_path: Some(model_path.to_string_lossy().to_string()),
8281 run_golden_rule_test: true,
8282 ..Default::default()
8283 };
8284
8285 let mut executor = Executor::with_config(config);
8286 let model_id = ModelId::new("test", "model");
8287 let (passed, failed) = executor.run_golden_rule_test(&model_path, &model_id);
8289 assert_eq!(passed, 0);
8290 assert_eq!(failed, 0);
8291 }
8292
8293 #[test]
8294 fn test_integrity_check_refuses_on_mismatch() {
8295 use crate::playbook::{PlaybookLockEntry, PlaybookLockFile, save_lock_file};
8296 use std::collections::HashMap;
8297
8298 let dir = tempfile::tempdir().expect("create temp dir");
8299 let lock_path = dir.path().join("playbook.lock.yaml");
8300
8301 let mut entries = HashMap::new();
8303 entries.insert(
8304 "integrity-test".to_string(),
8305 PlaybookLockEntry {
8306 sha256: "0000000000000000000000000000000000000000000000000000000000000000"
8307 .to_string(),
8308 locked_fields: vec!["name".to_string()],
8309 },
8310 );
8311 let lock_file = PlaybookLockFile { entries };
8312 save_lock_file(&lock_file, &lock_path).expect("save lock");
8313
8314 let config = ExecutionConfig {
8315 check_integrity: true,
8316 lock_file_path: Some(lock_path.to_string_lossy().to_string()),
8317 run_conversion_tests: false,
8318 run_golden_rule_test: false,
8319 ..Default::default()
8320 };
8321
8322 let mut executor = Executor::with_config(config);
8323 let yaml = r#"
8324name: integrity-test
8325version: "1.0.0"
8326model:
8327 hf_repo: "test/model"
8328 formats: [gguf]
8329test_matrix:
8330 modalities: [run]
8331 backends: [cpu]
8332 scenario_count: 1
8333"#;
8334 let playbook = Playbook::from_yaml(yaml).expect("parse");
8335 let result = executor.execute(&playbook).expect("execute");
8336
8337 assert!(result.gateway_failed.is_some() || result.failed > 0);
8341 }
8342
8343 #[test]
8344 fn test_integrity_check_disabled_by_default() {
8345 let config = ExecutionConfig {
8347 run_conversion_tests: false,
8348 run_golden_rule_test: false,
8349 ..Default::default()
8350 };
8351
8352 assert!(!config.check_integrity);
8353 assert!(config.lock_file_path.is_none());
8354
8355 let mock_runner = MockCommandRunner::new();
8356 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8357 let yaml = r#"
8358name: no-integrity
8359version: "1.0.0"
8360model:
8361 hf_repo: "test/model"
8362 formats: [gguf]
8363test_matrix:
8364 modalities: [run]
8365 backends: [cpu]
8366 scenario_count: 1
8367"#;
8368 let playbook = Playbook::from_yaml(yaml).expect("parse");
8369 let result = executor.execute(&playbook).expect("execute");
8370
8371 assert!(result.gateway_failed.is_none());
8373 }
8374
8375 #[test]
8376 fn test_integrity_check_missing_lock_file_warns() {
8377 let mock_runner = MockCommandRunner::new();
8379 let config = ExecutionConfig {
8380 check_integrity: true,
8381 lock_file_path: Some("/nonexistent/playbook.lock.yaml".to_string()),
8382 run_conversion_tests: false,
8383 run_golden_rule_test: false,
8384 ..Default::default()
8385 };
8386
8387 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8388 let yaml = r#"
8389name: missing-lock
8390version: "1.0.0"
8391model:
8392 hf_repo: "test/model"
8393 formats: [gguf]
8394test_matrix:
8395 modalities: [run]
8396 backends: [cpu]
8397 scenario_count: 1
8398"#;
8399 let playbook = Playbook::from_yaml(yaml).expect("parse");
8400 let result = executor.execute(&playbook).expect("execute");
8401
8402 assert!(result.gateway_failed.is_none());
8404 }
8405
8406 #[test]
8407 fn test_warn_implicit_skips_flag() {
8408 let mock_runner = MockCommandRunner::new();
8410 let config = ExecutionConfig {
8411 warn_implicit_skips: true,
8412 run_conversion_tests: false,
8413 run_golden_rule_test: false,
8414 ..Default::default()
8415 };
8416
8417 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8418 let yaml = r#"
8419name: skip-warn-test
8420version: "1.0.0"
8421model:
8422 hf_repo: "test/model"
8423 formats: [gguf]
8424test_matrix:
8425 modalities: [run]
8426 backends: [cpu]
8427 scenario_count: 1
8428"#;
8429 let playbook = Playbook::from_yaml(yaml).expect("parse");
8430 let result = executor.execute(&playbook).expect("execute");
8431
8432 assert!(result.gateway_failed.is_none());
8434 }
8435
8436 #[test]
8437 fn test_backward_compat_new_flags_off() {
8438 let config = ExecutionConfig::default();
8440 assert!(!config.check_integrity);
8441 assert!(!config.warn_implicit_skips);
8442 assert!(config.lock_file_path.is_none());
8443 }
8444
8445 #[test]
8450 fn test_hf_parity_disabled_by_default() {
8451 let config = ExecutionConfig::default();
8453 assert!(!config.run_hf_parity);
8454 assert!(config.hf_parity_corpus_path.is_none());
8455 assert!(config.hf_parity_model_family.is_none());
8456 }
8457
8458 #[test]
8459 fn test_hf_parity_skipped_when_missing_config() {
8460 let mock_runner = MockCommandRunner::new();
8462 let config = ExecutionConfig {
8463 run_hf_parity: true,
8464 hf_parity_corpus_path: None, hf_parity_model_family: None, run_conversion_tests: false,
8467 run_golden_rule_test: false,
8468 ..Default::default()
8469 };
8470
8471 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8472 let yaml = r#"
8473name: hf-parity-test
8474version: "1.0.0"
8475model:
8476 hf_repo: "test/model"
8477 formats: [gguf]
8478test_matrix:
8479 modalities: [run]
8480 backends: [cpu]
8481 scenario_count: 1
8482"#;
8483 let playbook = Playbook::from_yaml(yaml).expect("parse");
8484 let result = executor.execute(&playbook).expect("execute");
8485
8486 assert!(result.gateway_failed.is_none());
8488
8489 let has_skip_evidence = result
8491 .evidence
8492 .all()
8493 .iter()
8494 .any(|e| e.gate_id == "F-HF-PARITY-SKIP");
8495 assert!(has_skip_evidence, "Expected F-HF-PARITY-SKIP evidence");
8496 }
8497
8498 #[test]
8499 fn test_hf_parity_skipped_when_manifest_missing() {
8500 let mock_runner = MockCommandRunner::new();
8502 let config = ExecutionConfig {
8503 run_hf_parity: true,
8504 hf_parity_corpus_path: Some("/nonexistent/corpus".to_string()),
8505 hf_parity_model_family: Some("nonexistent-model/v1".to_string()),
8506 run_conversion_tests: false,
8507 run_golden_rule_test: false,
8508 ..Default::default()
8509 };
8510
8511 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8512 let yaml = r#"
8513name: hf-parity-missing-test
8514version: "1.0.0"
8515model:
8516 hf_repo: "test/model"
8517 formats: [gguf]
8518test_matrix:
8519 modalities: [run]
8520 backends: [cpu]
8521 scenario_count: 1
8522"#;
8523 let playbook = Playbook::from_yaml(yaml).expect("parse");
8524 let result = executor.execute(&playbook).expect("execute");
8525
8526 assert!(
8528 result.failed >= 1,
8529 "Expected at least 1 failed test for missing manifest"
8530 );
8531
8532 let has_parity_evidence = result
8534 .evidence
8535 .all()
8536 .iter()
8537 .any(|e| e.gate_id == "F-HF-PARITY-001");
8538 assert!(
8539 has_parity_evidence,
8540 "Expected F-HF-PARITY-001 evidence for missing manifest"
8541 );
8542 }
8543
8544 #[test]
8549 fn test_workspace_creates_directory_structure() {
8550 let dir = tempfile::tempdir().expect("create temp dir");
8551 let output_dir = dir.path().join("output");
8552
8553 let model_file = dir.path().join("abc123.safetensors");
8555 std::fs::write(&model_file, b"fake-safetensors-content").expect("write model");
8556
8557 let mock_runner = MockCommandRunner::new();
8558 let config = ExecutionConfig {
8559 output_dir: Some(output_dir.to_string_lossy().to_string()),
8560 run_conversion_tests: false,
8561 run_golden_rule_test: false,
8562 run_contract_tests: false,
8563 ..Default::default()
8564 };
8565
8566 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8567 let model_id = ModelId::new("test", "model");
8568 let formats = vec![Format::SafeTensors, Format::Apr];
8569
8570 let (workspace, passed, _failed) =
8571 executor.prepare_model_workspace(&model_file, &model_id, &formats);
8572
8573 let ws_path = Path::new(&workspace);
8575 assert!(ws_path.exists(), "Workspace directory should exist");
8576
8577 let st_dir = ws_path.join("safetensors");
8579 assert!(st_dir.exists(), "safetensors subdir should exist");
8580 let st_link = st_dir.join("model.safetensors");
8581 assert!(st_link.exists(), "model.safetensors symlink should exist");
8582
8583 let apr_dir = ws_path.join("apr");
8585 assert!(apr_dir.exists(), "apr subdir should exist");
8586
8587 assert!(passed >= 1, "At least one format conversion should pass");
8589 }
8590
8591 #[test]
8592 fn test_workspace_symlinks_config_files() {
8593 let dir = tempfile::tempdir().expect("create temp dir");
8594 let output_dir = dir.path().join("output");
8595
8596 let model_file = dir.path().join("abc123.safetensors");
8598 std::fs::write(&model_file, b"fake-model").expect("write model");
8599 std::fs::write(
8600 dir.path().join("abc123.config.json"),
8601 r#"{"num_hidden_layers": 24}"#,
8602 )
8603 .expect("write config");
8604 std::fs::write(
8605 dir.path().join("abc123.tokenizer.json"),
8606 r#"{"version": "1.0"}"#,
8607 )
8608 .expect("write tokenizer");
8609
8610 let mock_runner = MockCommandRunner::new();
8611 let config = ExecutionConfig {
8612 output_dir: Some(output_dir.to_string_lossy().to_string()),
8613 run_conversion_tests: false,
8614 run_golden_rule_test: false,
8615 run_contract_tests: false,
8616 ..Default::default()
8617 };
8618
8619 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8620 let model_id = ModelId::new("test", "model");
8621 let formats = vec![Format::SafeTensors];
8622
8623 let (workspace, _passed, _failed) =
8624 executor.prepare_model_workspace(&model_file, &model_id, &formats);
8625
8626 let ws_path = Path::new(&workspace);
8627 let st_dir = ws_path.join("safetensors");
8628
8629 assert!(
8631 st_dir.join("config.json").exists(),
8632 "config.json should be symlinked"
8633 );
8634 assert!(
8635 st_dir.join("tokenizer.json").exists(),
8636 "tokenizer.json should be symlinked"
8637 );
8638 }
8639
8640 #[test]
8641 fn test_workspace_conversion_failure_nonfatal() {
8642 let dir = tempfile::tempdir().expect("create temp dir");
8643 let output_dir = dir.path().join("output");
8644
8645 let model_file = dir.path().join("test.safetensors");
8646 std::fs::write(&model_file, b"fake-model").expect("write model");
8647
8648 let mock_runner = MockCommandRunner::new().with_convert_failure();
8650 let config = ExecutionConfig {
8651 output_dir: Some(output_dir.to_string_lossy().to_string()),
8652 run_conversion_tests: false,
8653 run_golden_rule_test: false,
8654 run_contract_tests: false,
8655 ..Default::default()
8656 };
8657
8658 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8659 let model_id = ModelId::new("test", "model");
8660 let formats = vec![Format::SafeTensors, Format::Apr, Format::Gguf];
8661
8662 let (workspace, passed, failed) =
8663 executor.prepare_model_workspace(&model_file, &model_id, &formats);
8664
8665 assert!(
8667 Path::new(&workspace).exists(),
8668 "Workspace should exist even with conversion failures"
8669 );
8670 assert!(
8672 Path::new(&workspace).join("safetensors").exists(),
8673 "safetensors dir should exist"
8674 );
8675
8676 assert_eq!(passed, 0, "No conversions should pass");
8678 assert_eq!(failed, 2, "Both APR and GGUF conversions should fail");
8679
8680 let evidence = executor.evidence().all();
8682 let apr_evidence = evidence.iter().any(|e| e.gate_id == "G0-FORMAT-APR-001");
8683 let gguf_evidence = evidence.iter().any(|e| e.gate_id == "G0-FORMAT-GGUF-001");
8684 assert!(apr_evidence, "Should have G0-FORMAT-APR-001 evidence");
8685 assert!(gguf_evidence, "Should have G0-FORMAT-GGUF-001 evidence");
8686 }
8687
8688 #[test]
8689 fn test_workspace_skipped_for_directory() {
8690 let mock_runner = MockCommandRunner::new();
8692 let config = ExecutionConfig {
8693 model_path: Some("/some/directory/path".to_string()),
8694 run_conversion_tests: false,
8695 run_golden_rule_test: false,
8696 run_contract_tests: false,
8697 ..Default::default()
8698 };
8699
8700 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8701 let yaml = r#"
8702name: workspace-skip-test
8703version: "1.0.0"
8704model:
8705 hf_repo: "test/model"
8706 formats: [safetensors, apr]
8707test_matrix:
8708 modalities: [run]
8709 backends: [cpu]
8710 scenario_count: 1
8711"#;
8712 let playbook = Playbook::from_yaml(yaml).expect("parse");
8713 let result = executor.execute(&playbook).expect("execute");
8714
8715 let has_format_evidence = result
8717 .evidence
8718 .all()
8719 .iter()
8720 .any(|e| e.gate_id.starts_with("G0-FORMAT"));
8721 assert!(
8722 !has_format_evidence,
8723 "No G0-FORMAT evidence expected for directory model path"
8724 );
8725 }
8726
8727 #[test]
8728 fn test_workspace_evidence_emitted() {
8729 let dir = tempfile::tempdir().expect("create temp dir");
8730 let output_dir = dir.path().join("output");
8731
8732 let model_file = dir.path().join("test.safetensors");
8733 std::fs::write(&model_file, b"fake-model").expect("write model");
8734
8735 let mock_runner = MockCommandRunner::new();
8736 let config = ExecutionConfig {
8737 output_dir: Some(output_dir.to_string_lossy().to_string()),
8738 run_conversion_tests: false,
8739 run_golden_rule_test: false,
8740 run_contract_tests: false,
8741 ..Default::default()
8742 };
8743
8744 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8745 let model_id = ModelId::new("test", "model");
8746 let formats = vec![Format::SafeTensors, Format::Apr, Format::Gguf];
8747
8748 let (_workspace, passed, failed) =
8749 executor.prepare_model_workspace(&model_file, &model_id, &formats);
8750
8751 assert_eq!(passed + failed, 2, "Should have evidence for APR and GGUF");
8753
8754 let evidence = executor.evidence().all();
8755 let format_evidence_count = evidence
8756 .iter()
8757 .filter(|e| e.gate_id.starts_with("G0-FORMAT"))
8758 .count();
8759 assert_eq!(
8760 format_evidence_count, 2,
8761 "Should have 2 G0-FORMAT evidence entries"
8762 );
8763 }
8764
8765 #[test]
8766 fn test_find_sibling_model_files() {
8767 let dir = tempfile::tempdir().expect("create temp dir");
8768
8769 let model_file = dir.path().join("abc123.safetensors");
8771 std::fs::write(&model_file, b"model").expect("write");
8772 std::fs::write(dir.path().join("abc123.config.json"), b"config").expect("write");
8773 std::fs::write(dir.path().join("abc123.tokenizer.json"), b"tokenizer").expect("write");
8774 std::fs::write(dir.path().join("def456.safetensors"), b"other").expect("write");
8776 std::fs::write(dir.path().join("def456.config.json"), b"other-config").expect("write");
8777
8778 let siblings = Executor::find_sibling_model_files(&model_file);
8779
8780 assert_eq!(siblings.len(), 2, "Should find exactly 2 sibling files");
8782
8783 let canonical_names: Vec<&str> = siblings.iter().map(|(_, n)| n.as_str()).collect();
8784 assert!(
8785 canonical_names.contains(&"config.json"),
8786 "Should find config.json"
8787 );
8788 assert!(
8789 canonical_names.contains(&"tokenizer.json"),
8790 "Should find tokenizer.json"
8791 );
8792 }
8793
8794 #[test]
8795 fn test_find_sibling_model_files_no_siblings() {
8796 let dir = tempfile::tempdir().expect("create temp dir");
8797
8798 let model_file = dir.path().join("lonely.safetensors");
8799 std::fs::write(&model_file, b"model").expect("write");
8800
8801 let siblings = Executor::find_sibling_model_files(&model_file);
8802 assert!(siblings.is_empty(), "Should find no siblings");
8803 }
8804
8805 #[test]
8806 fn test_find_sibling_model_files_non_safetensors() {
8807 let dir = tempfile::tempdir().expect("create temp dir");
8808
8809 let model_file = dir.path().join("model.gguf");
8810 std::fs::write(&model_file, b"model").expect("write");
8811
8812 let siblings = Executor::find_sibling_model_files(&model_file);
8813 assert!(
8814 siblings.is_empty(),
8815 "Should return empty for non-safetensors files"
8816 );
8817 }
8818
8819 #[test]
8820 fn test_workspace_execute_integration_with_single_file() {
8821 let dir = tempfile::tempdir().expect("create temp dir");
8824 let output_dir = dir.path().join("output");
8825
8826 let model_file = dir.path().join("test.safetensors");
8827 std::fs::write(&model_file, b"fake-model").expect("write model");
8828
8829 let mock_runner =
8830 MockCommandRunner::new().with_pull_model_path(model_file.to_string_lossy().to_string());
8831 let config = ExecutionConfig {
8832 model_path: Some(model_file.to_string_lossy().to_string()),
8833 output_dir: Some(output_dir.to_string_lossy().to_string()),
8834 run_conversion_tests: false,
8835 run_golden_rule_test: false,
8836 run_contract_tests: false,
8837 ..Default::default()
8838 };
8839
8840 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8841 let yaml = r#"
8842name: workspace-integration
8843version: "1.0.0"
8844model:
8845 hf_repo: "test/model"
8846 formats: [safetensors, apr]
8847test_matrix:
8848 modalities: [run]
8849 backends: [cpu]
8850 scenario_count: 1
8851"#;
8852 let playbook = Playbook::from_yaml(yaml).expect("parse");
8853 let result = executor.execute(&playbook).expect("execute");
8854
8855 let final_model_path = executor.config().model_path.as_deref().unwrap_or("");
8857 assert!(
8858 final_model_path.contains("workspace"),
8859 "model_path should point to workspace: {final_model_path}"
8860 );
8861 assert!(
8862 !final_model_path.ends_with(".safetensors"),
8863 "model_path should not be a file: {final_model_path}"
8864 );
8865
8866 let has_format_evidence = result
8868 .evidence
8869 .all()
8870 .iter()
8871 .any(|e| e.gate_id.starts_with("G0-FORMAT"));
8872 assert!(
8873 has_format_evidence,
8874 "Should have G0-FORMAT evidence for APR conversion"
8875 );
8876 }
8877
8878 #[test]
8881 fn test_g0_tensor_no_family_configured() {
8882 let mock_runner = MockCommandRunner::new();
8884 let dir = make_temp_model_dir();
8885
8886 let config = ExecutionConfig {
8887 model_path: Some(dir.path().to_string_lossy().to_string()),
8888 run_conversion_tests: false,
8889 run_golden_rule_test: false,
8890 run_contract_tests: false,
8891 ..Default::default()
8892 };
8893
8894 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8895
8896 let yaml = r#"
8898name: no-family-test
8899version: "1.0.0"
8900model:
8901 hf_repo: "test/model"
8902 formats: [safetensors]
8903test_matrix:
8904 modalities: [run]
8905 backends: [cpu]
8906 scenario_count: 1
8907"#;
8908 let playbook = Playbook::from_yaml(yaml).expect("parse");
8909 let result = executor.execute(&playbook).expect("execute");
8910
8911 let has_tensor_evidence = result
8913 .evidence
8914 .all()
8915 .iter()
8916 .any(|e| e.gate_id == "G0-TENSOR-001");
8917 assert!(
8918 !has_tensor_evidence,
8919 "Should NOT have G0-TENSOR evidence when family not configured"
8920 );
8921 }
8922
8923 #[test]
8924 fn test_g0_tensor_family_contract_not_found() {
8925 let mock_runner = MockCommandRunner::new();
8927 let dir = make_temp_model_dir();
8928
8929 let config = ExecutionConfig {
8930 model_path: Some(dir.path().to_string_lossy().to_string()),
8931 run_conversion_tests: false,
8932 run_golden_rule_test: false,
8933 run_contract_tests: false,
8934 ..Default::default()
8935 };
8936
8937 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8938 let model_id = ModelId::new("test", "model");
8939
8940 let (passed, failed) = executor.run_g0_tensor_template_check(
8942 dir.path(),
8943 &model_id,
8944 "nonexistent-family",
8945 "1b",
8946 Some("/nonexistent/path"),
8947 );
8948
8949 assert_eq!(passed, 0);
8951 assert_eq!(failed, 0);
8952
8953 let evidence = executor.evidence().all();
8954 let tensor_ev = evidence
8955 .iter()
8956 .find(|e| e.gate_id == "G0-TENSOR-001")
8957 .expect("should have G0-TENSOR evidence");
8958 assert!(tensor_ev.output.contains("G0 SKIP"));
8959 assert!(tensor_ev.output.contains("Family contract not found"));
8960 }
8961
8962 #[test]
8963 fn test_g0_tensor_no_safetensors_files() {
8964 let mock_runner = MockCommandRunner::new();
8966 let dir = tempfile::TempDir::new().expect("create temp dir");
8967
8968 let config = ExecutionConfig {
8969 model_path: Some(dir.path().to_string_lossy().to_string()),
8970 run_conversion_tests: false,
8971 run_golden_rule_test: false,
8972 run_contract_tests: false,
8973 ..Default::default()
8974 };
8975
8976 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
8977 let model_id = ModelId::new("test", "model");
8978
8979 let (passed, failed) = executor.run_g0_tensor_template_check(
8981 dir.path(),
8982 &model_id,
8983 "qwen2",
8984 "0.5b",
8985 Some("/nonexistent/path"), );
8987
8988 assert_eq!(passed, 0);
8990 assert_eq!(failed, 0);
8991 }
8992
8993 #[test]
8994 fn test_g0_tensor_inspect_returns_empty_names() {
8995 let mock_runner = MockCommandRunner::new().with_tensor_names(vec![]); let dir = make_temp_model_dir();
8998
8999 let config = ExecutionConfig {
9000 model_path: Some(dir.path().to_string_lossy().to_string()),
9001 run_conversion_tests: false,
9002 run_golden_rule_test: false,
9003 run_contract_tests: false,
9004 ..Default::default()
9005 };
9006
9007 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
9008 let model_id = ModelId::new("test", "model");
9009
9010 let (passed, failed) = executor.run_g0_tensor_template_check(
9013 dir.path(),
9014 &model_id,
9015 "qwen2",
9016 "0.5b",
9017 Some("/nonexistent/path"),
9018 );
9019
9020 assert_eq!(passed, 0);
9022 assert_eq!(failed, 0);
9023 }
9024
9025 #[test]
9026 fn test_g0_tensor_inspect_failure() {
9027 let mock_runner = MockCommandRunner::new().with_inspect_json_failure();
9029 let dir = make_temp_model_dir();
9030
9031 let contracts_dir = tempfile::TempDir::new().expect("create contracts dir");
9033 let family_yaml = r#"
9034family: testfamily
9035size_variants:
9036 1b:
9037 parameters: "1B"
9038 hidden_dim: 1024
9039 num_layers: 12
9040 num_heads: 8
9041tensor_template:
9042 embedding: "embed.weight"
9043"#;
9044 std::fs::write(contracts_dir.path().join("testfamily.yaml"), family_yaml)
9045 .expect("write family yaml");
9046
9047 let config = ExecutionConfig {
9048 model_path: Some(dir.path().to_string_lossy().to_string()),
9049 run_conversion_tests: false,
9050 run_golden_rule_test: false,
9051 run_contract_tests: false,
9052 ..Default::default()
9053 };
9054
9055 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
9056 let model_id = ModelId::new("test", "model");
9057
9058 let (passed, failed) = executor.run_g0_tensor_template_check(
9059 dir.path(),
9060 &model_id,
9061 "testfamily",
9062 "1b",
9063 Some(contracts_dir.path().to_str().expect("path")),
9064 );
9065
9066 assert_eq!(passed, 0);
9068 assert_eq!(failed, 1);
9069
9070 let evidence = executor.evidence().all();
9071 let tensor_ev = evidence
9072 .iter()
9073 .find(|e| e.gate_id == "G0-TENSOR-001")
9074 .expect("should have G0-TENSOR evidence");
9075 assert!(tensor_ev.reason.contains("G0 FAIL"));
9076 assert!(tensor_ev.reason.contains("Could not inspect"));
9077 }
9078
9079 #[test]
9080 fn test_g0_tensor_all_tensors_present() {
9081 let mock_runner = MockCommandRunner::new().with_tensor_names(vec![
9083 "embed.weight".to_string(),
9084 "model.layers.0.self_attn.q_proj.weight".to_string(),
9085 ]);
9086 let dir = make_temp_model_dir();
9087
9088 let contracts_dir = tempfile::TempDir::new().expect("create contracts dir");
9090 let family_yaml = r#"
9091family: testfamily
9092size_variants:
9093 1b:
9094 parameters: "1B"
9095 hidden_dim: 1024
9096 num_layers: 1
9097 num_heads: 8
9098tensor_template:
9099 embedding: "embed.weight"
9100"#;
9101 std::fs::write(contracts_dir.path().join("testfamily.yaml"), family_yaml)
9102 .expect("write family yaml");
9103
9104 let config = ExecutionConfig {
9105 model_path: Some(dir.path().to_string_lossy().to_string()),
9106 run_conversion_tests: false,
9107 run_golden_rule_test: false,
9108 run_contract_tests: false,
9109 ..Default::default()
9110 };
9111
9112 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
9113 let model_id = ModelId::new("test", "model");
9114
9115 let (passed, failed) = executor.run_g0_tensor_template_check(
9116 dir.path(),
9117 &model_id,
9118 "testfamily",
9119 "1b",
9120 Some(contracts_dir.path().to_str().expect("path")),
9121 );
9122
9123 assert_eq!(passed, 1);
9125 assert_eq!(failed, 0);
9126
9127 let evidence = executor.evidence().all();
9128 let tensor_ev = evidence
9129 .iter()
9130 .find(|e| e.gate_id == "G0-TENSOR-001")
9131 .expect("should have G0-TENSOR evidence");
9132 assert!(tensor_ev.output.contains("G0 PASS"));
9133 }
9134
9135 #[test]
9136 fn test_g0_tensor_missing_tensors() {
9137 let mock_runner = MockCommandRunner::new().with_tensor_names(vec![
9139 "some.other.tensor".to_string(), ]);
9141 let dir = make_temp_model_dir();
9142
9143 let contracts_dir = tempfile::TempDir::new().expect("create contracts dir");
9145 let family_yaml = r#"
9146family: testfamily
9147size_variants:
9148 1b:
9149 parameters: "1B"
9150 hidden_dim: 1024
9151 num_layers: 1
9152 num_heads: 8
9153tensor_template:
9154 embedding: "embed.weight"
9155"#;
9156 std::fs::write(contracts_dir.path().join("testfamily.yaml"), family_yaml)
9157 .expect("write family yaml");
9158
9159 let config = ExecutionConfig {
9160 model_path: Some(dir.path().to_string_lossy().to_string()),
9161 run_conversion_tests: false,
9162 run_golden_rule_test: false,
9163 run_contract_tests: false,
9164 ..Default::default()
9165 };
9166
9167 let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
9168 let model_id = ModelId::new("test", "model");
9169
9170 let (passed, failed) = executor.run_g0_tensor_template_check(
9171 dir.path(),
9172 &model_id,
9173 "testfamily",
9174 "1b",
9175 Some(contracts_dir.path().to_str().expect("path")),
9176 );
9177
9178 assert_eq!(passed, 0);
9180 assert_eq!(failed, 1);
9181
9182 let evidence = executor.evidence().all();
9183 let tensor_ev = evidence
9184 .iter()
9185 .find(|e| e.gate_id == "G0-TENSOR-001")
9186 .expect("should have G0-TENSOR evidence");
9187 assert!(tensor_ev.reason.contains("G0 FAIL"));
9188 assert!(tensor_ev.reason.contains("Missing"));
9189 assert!(tensor_ev.reason.contains("embed.weight"));
9190 }
9191
9192 #[test]
9195 fn test_parse_timing_ms_standard() {
9196 let output = "Output:\nHello\nCompleted in 1.5s\ntok/s: 25.0";
9197 assert!((parse_timing_ms(output).unwrap() - 1500.0).abs() < 0.1);
9198 }
9199
9200 #[test]
9201 fn test_parse_timing_ms_no_timing() {
9202 let output = "Just some output without timing";
9203 assert!(parse_timing_ms(output).is_none());
9204 }
9205
9206 #[test]
9207 fn test_parse_timing_ms_zero() {
9208 let output = "Completed in 0.0s";
9209 assert!((parse_timing_ms(output).unwrap()).abs() < 0.1);
9210 }
9211
9212 #[test]
9215 fn test_parse_throughput_json() {
9216 let output = r#"{"throughput_tps":25.0,"latency_p50_ms":78.2}"#;
9217 assert!((parse_throughput(output).unwrap() - 25.0).abs() < 0.1);
9218 }
9219
9220 #[test]
9221 fn test_parse_throughput_no_match() {
9222 let output = "no json here";
9223 assert!(parse_throughput(output).is_none());
9224 }
9225
9226 #[test]
9227 fn test_parse_throughput_integer() {
9228 let output = r#"{"throughput_tps":100,"other":0}"#;
9229 assert!((parse_throughput(output).unwrap() - 100.0).abs() < 0.1);
9230 }
9231
9232 #[test]
9235 fn test_ollama_parity_ttft_comparison() {
9236 let runner = MockCommandRunner::new().with_inference_response("Hello world");
9237 let runner = Arc::new(runner);
9238
9239 let config = ExecutionConfig {
9240 run_ollama_parity: true,
9241 model_path: Some("/mock/model".to_string()),
9242 ..Default::default()
9243 };
9244 let mut executor = Executor::with_runner(config, runner);
9245
9246 let yaml = r#"
9247name: test-ollama-ttft
9248version: "1.0.0"
9249model:
9250 hf_repo: "test/model"
9251test_matrix:
9252 modalities: [run]
9253 backends: [cpu]
9254 scenario_count: 1
9255ollama_parity:
9256 enabled: true
9257 model_tag: "test:latest"
9258 prompts: ["What is 2+2?"]
9259 temperature: 0.0
9260"#;
9261 let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
9262 let (passed, failed) =
9263 executor.run_ollama_parity_tests(Path::new("/mock/model"), &playbook);
9264 assert!(
9266 passed + failed >= 2,
9267 "Expected at least 2 evidence items, got passed={passed} failed={failed}"
9268 );
9269 }
9270
9271 #[test]
9274 fn test_ollama_gguf_loadability_success() {
9275 let runner = Arc::new(MockCommandRunner::new());
9276 let config = ExecutionConfig {
9277 run_ollama_parity: true,
9278 model_path: Some("/mock/model".to_string()),
9279 ..Default::default()
9280 };
9281 let mut executor = Executor::with_runner(config, runner);
9282
9283 let yaml = r#"
9284name: test-ollama-gguf
9285version: "1.0.0"
9286model:
9287 hf_repo: "test/model"
9288test_matrix:
9289 modalities: [run]
9290 backends: [cpu]
9291 scenario_count: 1
9292ollama_parity:
9293 enabled: true
9294 prompts: ["test"]
9295"#;
9296 let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
9297 let (passed, _failed) =
9298 executor.run_ollama_parity_tests(Path::new("/mock/model"), &playbook);
9299 assert!(passed >= 3, "Expected at least 3 passes, got {passed}");
9301 let evidence = executor.evidence().all();
9302 assert!(evidence.iter().any(|e| e.gate_id == "F-OLLAMA-005"));
9303 }
9304
9305 #[test]
9306 fn test_ollama_gguf_loadability_failure() {
9307 let runner = Arc::new(MockCommandRunner::new().with_ollama_create_failure());
9308 let config = ExecutionConfig {
9309 run_ollama_parity: true,
9310 model_path: Some("/mock/model".to_string()),
9311 ..Default::default()
9312 };
9313 let mut executor = Executor::with_runner(config, runner);
9314
9315 let yaml = r#"
9316name: test-ollama-gguf-fail
9317version: "1.0.0"
9318model:
9319 hf_repo: "test/model"
9320test_matrix:
9321 modalities: [run]
9322 backends: [cpu]
9323 scenario_count: 1
9324ollama_parity:
9325 enabled: true
9326 prompts: ["test"]
9327"#;
9328 let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
9329 let (_passed, failed) =
9330 executor.run_ollama_parity_tests(Path::new("/mock/model"), &playbook);
9331 assert!(
9332 failed >= 1,
9333 "Expected at least 1 failure for create failure"
9334 );
9335 let evidence = executor.evidence().all();
9336 let gguf_ev = evidence
9337 .iter()
9338 .find(|e| e.gate_id == "F-OLLAMA-005")
9339 .unwrap();
9340 assert!(!gguf_ev.outcome.is_pass());
9341 }
9342
9343 #[test]
9346 fn test_ollama_api_parity_success() {
9347 let runner = Arc::new(MockCommandRunner::new());
9348 let config = ExecutionConfig {
9349 run_ollama_parity: true,
9350 model_path: Some("/mock/model".to_string()),
9351 ..Default::default()
9352 };
9353 let mut executor = Executor::with_runner(config, runner);
9354
9355 let yaml = r#"
9356name: test-ollama-api
9357version: "1.0.0"
9358model:
9359 hf_repo: "test/model"
9360test_matrix:
9361 modalities: [run]
9362 backends: [cpu]
9363 scenario_count: 1
9364ollama_parity:
9365 enabled: true
9366 prompts: ["test"]
9367"#;
9368 let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
9369 let (passed, _failed) =
9370 executor.run_ollama_parity_tests(Path::new("/mock/model"), &playbook);
9371 assert!(passed >= 1);
9372 let evidence = executor.evidence().all();
9373 assert!(evidence.iter().any(|e| e.gate_id == "F-OLLAMA-004"));
9374 }
9375
9376 #[test]
9377 fn test_ollama_api_parity_failure() {
9378 let runner = Arc::new(MockCommandRunner::new().with_http_get_failure());
9379 let config = ExecutionConfig {
9380 run_ollama_parity: true,
9381 model_path: Some("/mock/model".to_string()),
9382 ..Default::default()
9383 };
9384 let mut executor = Executor::with_runner(config, runner);
9385
9386 let yaml = r#"
9387name: test-ollama-api-fail
9388version: "1.0.0"
9389model:
9390 hf_repo: "test/model"
9391test_matrix:
9392 modalities: [run]
9393 backends: [cpu]
9394 scenario_count: 1
9395ollama_parity:
9396 enabled: true
9397 prompts: ["test"]
9398"#;
9399 let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
9400 let (_passed, failed) =
9401 executor.run_ollama_parity_tests(Path::new("/mock/model"), &playbook);
9402 assert!(failed >= 1);
9403 let evidence = executor.evidence().all();
9404 let api_ev = evidence
9405 .iter()
9406 .find(|e| e.gate_id == "F-OLLAMA-004")
9407 .unwrap();
9408 assert!(!api_ev.outcome.is_pass());
9409 }
9410
9411 #[test]
9414 fn test_perf_003_gpu_cpu_ratio() {
9415 let runner = Arc::new(MockCommandRunner::new().with_tps(50.0));
9416 let config = ExecutionConfig {
9417 run_profile_ci: true,
9418 model_path: Some("/mock/model".to_string()),
9419 ..Default::default()
9420 };
9421 let mut executor = Executor::with_runner(config, runner);
9422
9423 let yaml = r#"
9424name: test-perf-003
9425version: "1.0.0"
9426model:
9427 hf_repo: "test/model"
9428test_matrix:
9429 modalities: [run]
9430 backends: [cpu, gpu]
9431 scenario_count: 1
9432profile_ci:
9433 enabled: true
9434 warmup: 1
9435 measure: 2
9436 formats: [safetensors]
9437 backends: [cpu, gpu]
9438"#;
9439 let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
9440 let model_id = playbook.model_id();
9441 let (passed, _failed) =
9442 executor.run_perf_gates(Path::new("/mock/model"), &model_id, &playbook);
9443 assert!(passed >= 2, "Expected at least 2 passes, got {passed}");
9445 let evidence = executor.evidence().all();
9446 assert!(evidence.iter().any(|e| e.gate_id == "F-PERF-003"));
9447 assert!(evidence.iter().any(|e| e.gate_id == "F-PERF-005"));
9448 }
9449
9450 #[test]
9453 fn test_perf_005_memory_profiling_failure() {
9454 let runner = Arc::new(MockCommandRunner::new().with_profile_memory_failure());
9455 let config = ExecutionConfig {
9456 run_profile_ci: true,
9457 model_path: Some("/mock/model".to_string()),
9458 ..Default::default()
9459 };
9460 let mut executor = Executor::with_runner(config, runner);
9461
9462 let yaml = r#"
9463name: test-perf-005-fail
9464version: "1.0.0"
9465model:
9466 hf_repo: "test/model"
9467test_matrix:
9468 modalities: [run]
9469 backends: [cpu]
9470 scenario_count: 1
9471profile_ci:
9472 enabled: true
9473 warmup: 1
9474 measure: 2
9475 backends: [cpu]
9476"#;
9477 let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
9478 let model_id = playbook.model_id();
9479 let (_passed, failed) =
9480 executor.run_perf_gates(Path::new("/mock/model"), &model_id, &playbook);
9481 assert!(failed >= 1);
9482 let evidence = executor.evidence().all();
9483 let mem_ev = evidence.iter().find(|e| e.gate_id == "F-PERF-005").unwrap();
9484 assert!(!mem_ev.outcome.is_pass());
9485 }
9486
9487 #[test]
9490 fn test_execute_with_ollama_parity_enabled() {
9491 let runner =
9492 MockCommandRunner::new().with_inference_response("Output:\nHello\nCompleted in 0.5s");
9493 let config = ExecutionConfig {
9494 run_ollama_parity: true,
9495 model_path: Some("/mock/model".to_string()),
9496 no_gpu: true,
9497 ..Default::default()
9498 };
9499 let mut executor = Executor::with_runner(config, Arc::new(runner));
9500
9501 let yaml = r#"
9502name: test-ollama-integration
9503version: "1.0.0"
9504model:
9505 hf_repo: "test/model"
9506test_matrix:
9507 modalities: [run]
9508 backends: [cpu]
9509 scenario_count: 1
9510ollama_parity:
9511 enabled: true
9512 prompts: ["What is 2+2?"]
9513"#;
9514 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
9515 let result = executor.execute(&playbook).expect("Execution failed");
9516 assert!(result.total_scenarios >= 1);
9517 let evidence = executor.evidence().all();
9518 assert!(evidence.iter().any(|e| e.gate_id == "F-OLLAMA-001"));
9519 }
9520
9521 #[test]
9524 fn test_execute_with_profile_ci_perf_gates() {
9525 let runner = MockCommandRunner::new()
9526 .with_tps(50.0)
9527 .with_inference_response("Output:\nHello\nCompleted in 0.5s");
9528 let config = ExecutionConfig {
9529 run_profile_ci: true,
9530 model_path: Some("/mock/model".to_string()),
9531 no_gpu: true,
9532 ..Default::default()
9533 };
9534 let mut executor = Executor::with_runner(config, Arc::new(runner));
9535
9536 let yaml = r#"
9537name: test-perf-integration
9538version: "1.0.0"
9539model:
9540 hf_repo: "test/model"
9541test_matrix:
9542 modalities: [run]
9543 backends: [cpu]
9544 scenario_count: 1
9545profile_ci:
9546 enabled: true
9547 warmup: 1
9548 measure: 2
9549 formats: [safetensors]
9550 backends: [cpu, gpu]
9551"#;
9552 let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
9553 let result = executor.execute(&playbook).expect("Execution failed");
9554 assert!(result.total_scenarios >= 1);
9555 let evidence = executor.evidence().all();
9556 assert!(evidence.iter().any(|e| e.gate_id == "F-PERF-003"));
9557 assert!(evidence.iter().any(|e| e.gate_id == "F-PERF-005"));
9558 }
9559
9560 #[test]
9563 fn test_resolve_model_path_file_sibling_gguf() {
9564 let temp_dir = tempfile::tempdir().unwrap();
9566 let st_file = temp_dir.path().join("model.safetensors");
9567 let gguf_file = temp_dir.path().join("model.gguf");
9568 std::fs::write(&st_file, b"fake safetensors").unwrap();
9569 std::fs::write(&gguf_file, b"fake gguf").unwrap();
9570
9571 let config = ExecutionConfig {
9572 model_path: Some(st_file.to_string_lossy().to_string()),
9573 ..Default::default()
9574 };
9575 let executor = Executor::with_config(config);
9576
9577 let scenario = QaScenario::new(
9578 ModelId::new("test", "model"),
9579 Modality::Run,
9580 Backend::Cpu,
9581 Format::Gguf,
9582 "test".to_string(),
9583 0,
9584 );
9585 let path = executor.resolve_model_path(&scenario);
9586 assert!(path.is_some(), "Should find sibling .gguf file");
9587 assert!(path.unwrap().contains("model.gguf"));
9588 }
9589
9590 #[test]
9591 fn test_resolve_model_path_file_sibling_apr() {
9592 let temp_dir = tempfile::tempdir().unwrap();
9594 let gguf_file = temp_dir.path().join("model.gguf");
9595 let apr_file = temp_dir.path().join("model.apr");
9596 std::fs::write(&gguf_file, b"fake gguf").unwrap();
9597 std::fs::write(&apr_file, b"fake apr").unwrap();
9598
9599 let config = ExecutionConfig {
9600 model_path: Some(gguf_file.to_string_lossy().to_string()),
9601 ..Default::default()
9602 };
9603 let executor = Executor::with_config(config);
9604
9605 let scenario = QaScenario::new(
9606 ModelId::new("test", "model"),
9607 Modality::Run,
9608 Backend::Cpu,
9609 Format::Apr,
9610 "test".to_string(),
9611 0,
9612 );
9613 let path = executor.resolve_model_path(&scenario);
9614 assert!(path.is_some(), "Should find sibling .apr file");
9615 assert!(path.unwrap().contains("model.apr"));
9616 }
9617
9618 #[test]
9619 fn test_resolve_model_path_file_sibling_not_found() {
9620 let temp_dir = tempfile::tempdir().unwrap();
9622 let st_file = temp_dir.path().join("model.safetensors");
9623 std::fs::write(&st_file, b"fake safetensors").unwrap();
9624
9625 let config = ExecutionConfig {
9626 model_path: Some(st_file.to_string_lossy().to_string()),
9627 ..Default::default()
9628 };
9629 let executor = Executor::with_config(config);
9630
9631 let scenario = QaScenario::new(
9632 ModelId::new("test", "model"),
9633 Modality::Run,
9634 Backend::Cpu,
9635 Format::Gguf,
9636 "test".to_string(),
9637 0,
9638 );
9639 assert!(
9640 executor.resolve_model_path(&scenario).is_none(),
9641 "No sibling .gguf exists, should return None"
9642 );
9643 }
9644
9645 #[test]
9646 fn test_resolve_model_path_file_sibling_fallback_different_stem() {
9647 let temp_dir = tempfile::tempdir().unwrap();
9650 let st_file = temp_dir.path().join("abc123.safetensors");
9651 let gguf_file = temp_dir.path().join("other-name.gguf");
9652 std::fs::write(&st_file, b"fake safetensors").unwrap();
9653 std::fs::write(&gguf_file, b"fake gguf").unwrap();
9654
9655 let config = ExecutionConfig {
9656 model_path: Some(st_file.to_string_lossy().to_string()),
9657 ..Default::default()
9658 };
9659 let executor = Executor::with_config(config);
9660
9661 let scenario = QaScenario::new(
9662 ModelId::new("test", "model"),
9663 Modality::Run,
9664 Backend::Cpu,
9665 Format::Gguf,
9666 "test".to_string(),
9667 0,
9668 );
9669 let path = executor.resolve_model_path(&scenario);
9670 assert!(
9671 path.is_none(),
9672 "Should NOT match unrelated model family"
9673 );
9674 }
9675
9676 #[test]
9677 fn test_resolve_model_path_file_sibling_prefix_match() {
9678 let temp_dir = tempfile::tempdir().unwrap();
9680 let gguf_file = temp_dir.path().join("qwen2.5-coder-7b-instruct-q4k.gguf");
9681 let apr_file = temp_dir.path().join("qwen2.5-coder-7b-instruct.apr");
9682 std::fs::write(&gguf_file, b"fake gguf").unwrap();
9683 std::fs::write(&apr_file, b"fake apr").unwrap();
9684
9685 let config = ExecutionConfig {
9686 model_path: Some(gguf_file.to_string_lossy().to_string()),
9687 ..Default::default()
9688 };
9689 let executor = Executor::with_config(config);
9690
9691 let scenario = QaScenario::new(
9692 ModelId::new("test", "model"),
9693 Modality::Run,
9694 Backend::Cpu,
9695 Format::Apr,
9696 "test".to_string(),
9697 0,
9698 );
9699 let path = executor.resolve_model_path(&scenario);
9700 assert!(
9701 path.is_some(),
9702 "Should find APR via model family prefix match"
9703 );
9704 assert!(path.unwrap().contains("qwen2.5-coder-7b-instruct.apr"));
9705 }
9706
9707 #[test]
9710 fn test_subprocess_execution_chat_modality() {
9711 let runner = MockCommandRunner::new();
9712 let config = ExecutionConfig {
9713 model_path: Some("/mock/model.gguf".to_string()),
9714 ..Default::default()
9715 };
9716 let executor = Executor::with_runner(config, Arc::new(runner));
9717
9718 let scenario = QaScenario::new(
9719 ModelId::new("test", "model"),
9720 Modality::Chat,
9721 Backend::Cpu,
9722 Format::Gguf,
9723 "What is 2+2?".to_string(),
9724 0,
9725 );
9726
9727 let (text, stderr, exit_code, _tps, skipped) = executor.subprocess_execution(&scenario);
9728 assert!(!skipped, "Chat scenario should not be skipped");
9729 assert_eq!(exit_code, 0);
9730 assert!(stderr.is_none() || stderr.as_deref() == Some(""));
9731 assert!(text.contains("4"), "Chat should return arithmetic answer");
9732 }
9733
9734 #[test]
9735 fn test_subprocess_execution_serve_modality() {
9736 let runner = MockCommandRunner::new();
9737 let config = ExecutionConfig {
9738 model_path: Some("/mock/model.gguf".to_string()),
9739 ..Default::default()
9740 };
9741 let executor = Executor::with_runner(config, Arc::new(runner));
9742
9743 let scenario = QaScenario::new(
9744 ModelId::new("test", "model"),
9745 Modality::Serve,
9746 Backend::Cpu,
9747 Format::Gguf,
9748 "What is 2+2?".to_string(),
9749 0,
9750 );
9751
9752 let (_text, _stderr, _exit_code, _tps, skipped) =
9753 executor.subprocess_execution(&scenario);
9754 assert!(!skipped, "Serve scenario should not be skipped");
9756 }
9757
9758 #[test]
9761 fn test_subprocess_execution_gpu_backend() {
9762 let runner = MockCommandRunner::new();
9764 let config = ExecutionConfig {
9765 model_path: Some("/mock/model.gguf".to_string()),
9766 no_gpu: true, ..Default::default()
9768 };
9769 let executor = Executor::with_runner(config, Arc::new(runner));
9770
9771 let scenario = QaScenario::new(
9772 ModelId::new("test", "model"),
9773 Modality::Run,
9774 Backend::Gpu,
9775 Format::Gguf,
9776 "test".to_string(),
9777 0,
9778 );
9779
9780 let (_text, _stderr, exit_code, _tps, skipped) =
9781 executor.subprocess_execution(&scenario);
9782 assert!(!skipped);
9783 assert_eq!(exit_code, 0);
9784 }
9787}