1use crate::error::Result;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::time::Instant;
10
11use super::{
12 executor::{ProtocolInput, ProtocolOutput},
13 validation::{DeepSeekValidationResult, ValidationVerdict},
14 validation_executor::{ValidatingProtocolExecutor, ValidationExecutorConfig, ValidationLevel},
15};
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct BenchmarkConfig {
20 pub scenarios: Vec<BenchmarkScenario>,
22 pub iterations: usize,
24 pub validation_levels: Vec<ValidationLevel>,
26 pub enable_statistics: bool,
28 pub timeout_secs: u64,
30}
31
32impl Default for BenchmarkConfig {
33 fn default() -> Self {
34 Self {
35 scenarios: vec![
36 BenchmarkScenario::BusinessDecision,
37 BenchmarkScenario::TechnicalArchitecture,
38 BenchmarkScenario::ComplianceAnalysis,
39 BenchmarkScenario::RiskAssessment,
40 ],
41 iterations: 10,
42 validation_levels: vec![
43 ValidationLevel::None,
44 ValidationLevel::Quick,
45 ValidationLevel::Standard,
46 ValidationLevel::Rigorous,
47 ],
48 enable_statistics: true,
49 timeout_secs: 300,
50 }
51 }
52}
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
56#[serde(rename_all = "snake_case")]
57pub enum BenchmarkScenario {
58 BusinessDecision,
60 TechnicalArchitecture,
62 ComplianceAnalysis,
64 RiskAssessment,
66 StrategicPlanning,
68 MultiPerspectiveAnalysis,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct ScenarioResult {
75 pub scenario: BenchmarkScenario,
76 pub validation_level: ValidationLevel,
77 pub iterations: usize,
78 pub average_duration_ms: f64,
79 pub average_confidence: f64,
80 pub average_validation_confidence: f64,
81 pub success_rate: f64,
82 pub validation_success_rate: f64,
83 pub token_usage: TokenUsageMetrics,
84 pub validation_findings: Vec<ValidationFindingStat>,
85}
86
87#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct TokenUsageMetrics {
90 pub average_input_tokens: f64,
91 pub average_output_tokens: f64,
92 pub average_total_tokens: f64,
93 pub average_cost_usd: f64,
94 pub token_per_second: f64,
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct ValidationFindingStat {
100 pub finding_category: String,
101 pub average_severity: f64,
102 pub frequency: f64,
103 pub average_confidence_impact: f64,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct BenchmarkResults {
109 pub config: BenchmarkConfig,
110 pub scenario_results: HashMap<String, ScenarioResult>,
111 pub summary: BenchmarkSummary,
112 pub timestamp: chrono::DateTime<chrono::Utc>,
113 pub version: String,
114}
115
116#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct BenchmarkSummary {
119 pub total_duration_seconds: f64,
120 pub total_iterations: usize,
121 pub overall_success_rate: f64,
122 pub average_confidence_gain: f64,
123 pub cost_per_validation: f64,
124 pub performance_improvements: Vec<String>,
125 pub recommendations: Vec<String>,
126}
127
128#[derive(Default)]
130pub struct DeepSeekBenchmarkRunner {
131 config: BenchmarkConfig,
132}
133
134impl DeepSeekBenchmarkRunner {
135 pub fn new(config: BenchmarkConfig) -> Self {
136 Self { config }
137 }
138
139 pub async fn run_benchmark(&self) -> Result<BenchmarkResults> {
141 let start_time = Instant::now();
142 let mut scenario_results = HashMap::new();
143 let mut total_iterations = 0;
144
145 for scenario in &self.config.scenarios {
146 for validation_level in &self.config.validation_levels {
147 let result = self.run_scenario(*scenario, *validation_level).await?;
148 total_iterations += result.iterations;
149 let key = format!("{:?}_{:?}", scenario, validation_level);
150 scenario_results.insert(key, result);
151 }
152 }
153
154 let total_duration = start_time.elapsed().as_secs_f64();
155
156 let summary = self.calculate_summary(&scenario_results, total_duration, total_iterations);
157
158 Ok(BenchmarkResults {
159 config: self.config.clone(),
160 scenario_results,
161 summary,
162 timestamp: chrono::Utc::now(),
163 version: env!("CARGO_PKG_VERSION").to_string(),
164 })
165 }
166
167 async fn run_scenario(
169 &self,
170 scenario: BenchmarkScenario,
171 validation_level: ValidationLevel,
172 ) -> Result<ScenarioResult> {
173 let mut durations = Vec::new();
174 let mut confidences = Vec::new();
175 let mut validation_confidences = Vec::new();
176 let mut success_count = 0;
177 let mut validation_success_count = 0;
178 let mut token_metrics = Vec::new();
179 let mut findings_stats = HashMap::new();
180
181 for i in 0..self.config.iterations {
182 let input = self.generate_scenario_input(scenario, i);
183
184 let start_time = Instant::now();
185 let result = self
186 .execute_validation_run(&input, validation_level)
187 .await?;
188 let duration = start_time.elapsed().as_millis() as f64;
189
190 durations.push(duration);
191 confidences.push(result.confidence);
192
193 if result.success {
194 success_count += 1;
195 }
196
197 if let Some(validation_data) = result.data.get("deepseek_validation") {
199 if let Ok(validation_result) =
200 serde_json::from_value::<DeepSeekValidationResult>(validation_data.clone())
201 {
202 validation_confidences.push(validation_result.validation_confidence);
203
204 if validation_result.verdict == ValidationVerdict::Validated {
205 validation_success_count += 1;
206 }
207
208 token_metrics.push(TokenUsageMetrics {
210 average_input_tokens: validation_result.tokens_used.input_tokens as f64,
211 average_output_tokens: validation_result.tokens_used.output_tokens as f64,
212 average_total_tokens: validation_result.tokens_used.total_tokens as f64,
213 average_cost_usd: validation_result.tokens_used.cost_usd,
214 token_per_second: validation_result.performance.tokens_per_second,
215 });
216
217 self.analyze_findings(&validation_result, &mut findings_stats);
219 }
220 }
221 }
222
223 let average_duration = durations.iter().sum::<f64>() / durations.len() as f64;
225 let average_confidence = confidences.iter().sum::<f64>() / confidences.len() as f64;
226 let average_validation_confidence = if validation_confidences.is_empty() {
227 0.0
228 } else {
229 validation_confidences.iter().sum::<f64>() / validation_confidences.len() as f64
230 };
231
232 let success_rate = success_count as f64 / self.config.iterations as f64;
233 let validation_success_rate =
234 validation_success_count as f64 / self.config.iterations as f64;
235
236 let token_usage = if token_metrics.is_empty() {
238 TokenUsageMetrics::default()
239 } else {
240 TokenUsageMetrics {
241 average_input_tokens: token_metrics
242 .iter()
243 .map(|m| m.average_input_tokens)
244 .sum::<f64>()
245 / token_metrics.len() as f64,
246 average_output_tokens: token_metrics
247 .iter()
248 .map(|m| m.average_output_tokens)
249 .sum::<f64>()
250 / token_metrics.len() as f64,
251 average_total_tokens: token_metrics
252 .iter()
253 .map(|m| m.average_total_tokens)
254 .sum::<f64>()
255 / token_metrics.len() as f64,
256 average_cost_usd: token_metrics
257 .iter()
258 .map(|m| m.average_cost_usd)
259 .sum::<f64>()
260 / token_metrics.len() as f64,
261 token_per_second: token_metrics
262 .iter()
263 .map(|m| m.token_per_second)
264 .sum::<f64>()
265 / token_metrics.len() as f64,
266 }
267 };
268
269 let validation_findings = findings_stats
271 .into_iter()
272 .map(|(category, stats)| ValidationFindingStat {
273 finding_category: category,
274 average_severity: stats.average_severity / stats.count as f64,
275 frequency: stats.count as f64 / self.config.iterations as f64,
276 average_confidence_impact: stats.total_confidence_impact / stats.count as f64,
277 })
278 .collect();
279
280 Ok(ScenarioResult {
281 scenario,
282 validation_level,
283 iterations: self.config.iterations,
284 average_duration_ms: average_duration,
285 average_confidence,
286 average_validation_confidence,
287 success_rate,
288 validation_success_rate,
289 token_usage,
290 validation_findings,
291 })
292 }
293
294 async fn execute_validation_run(
296 &self,
297 input: &ProtocolInput,
298 validation_level: ValidationLevel,
299 ) -> Result<ProtocolOutput> {
300 let config = ValidationExecutorConfig {
301 validation_level,
302 ..Default::default()
303 };
304
305 let executor = ValidatingProtocolExecutor::with_configs(Default::default(), config)?;
306
307 executor
309 .execute_profile_with_validation("balanced", input.clone())
310 .await
311 }
312
313 fn generate_scenario_input(
315 &self,
316 scenario: BenchmarkScenario,
317 iteration: usize,
318 ) -> ProtocolInput {
319 match scenario {
320 BenchmarkScenario::BusinessDecision => {
321 ProtocolInput::query(format!(
322 "Should we expand to the European market? Consider market size, competition, regulatory requirements, and potential ROI. Iteration: {}",
323 iteration
324 ))
325 }
326 BenchmarkScenario::TechnicalArchitecture => {
327 ProtocolInput::query(format!(
328 "Evaluate microservices vs monolithic architecture for a 10,000 user SaaS application. Consider scalability, maintainability, deployment complexity. Iteration: {}",
329 iteration
330 ))
331 }
332 BenchmarkScenario::ComplianceAnalysis => {
333 ProtocolInput::query(format!(
334 "Analyze GDPR compliance requirements for a customer analytics platform processing EU citizen data. Iteration: {}",
335 iteration
336 ))
337 }
338 BenchmarkScenario::RiskAssessment => {
339 ProtocolInput::query(format!(
340 "Assess cybersecurity risks for a cloud-based financial application handling sensitive customer data. Iteration: {}",
341 iteration
342 ))
343 }
344 BenchmarkScenario::StrategicPlanning => {
345 ProtocolInput::query(format!(
346 "Develop a 5-year strategic plan for a technology startup in the AI infrastructure space. Iteration: {}",
347 iteration
348 ))
349 }
350 BenchmarkScenario::MultiPerspectiveAnalysis => {
351 ProtocolInput::query(format!(
352 "Analyze the impact of remote work policies from technical, cultural, productivity, and security perspectives. Iteration: {}",
353 iteration
354 ))
355 }
356 }
357 }
358
359 fn analyze_findings(
361 &self,
362 validation_result: &DeepSeekValidationResult,
363 findings_stats: &mut HashMap<String, FindingStatsAccumulator>,
364 ) {
365 for finding in &validation_result.findings {
366 let category = format!("{:?}", finding.category);
367 let stats = findings_stats.entry(category).or_default();
368
369 stats.count += 1;
370 stats.average_severity += match finding.severity {
371 super::validation::Severity::Critical => 5.0,
372 super::validation::Severity::High => 4.0,
373 super::validation::Severity::Medium => 3.0,
374 super::validation::Severity::Low => 2.0,
375 super::validation::Severity::Info => 1.0,
376 };
377
378 stats.total_confidence_impact += validation_result.validation_confidence;
380 }
381 }
382
383 fn calculate_summary(
385 &self,
386 scenario_results: &HashMap<String, ScenarioResult>,
387 total_duration: f64,
388 total_iterations: usize,
389 ) -> BenchmarkSummary {
390 let mut total_success = 0.0_f64;
393 let mut total_confidence_gain = 0.0;
394 let mut total_cost = 0.0;
395
396 for result in scenario_results.values() {
397 total_success += result.success_rate * result.iterations as f64;
398 total_confidence_gain +=
399 result.average_validation_confidence - result.average_confidence;
400 total_cost += result.token_usage.average_cost_usd * result.iterations as f64;
401 }
402
403 let overall_success_rate = if total_iterations == 0 {
404 0.0
405 } else {
406 total_success / total_iterations as f64
407 };
408 let average_confidence_gain = total_confidence_gain / scenario_results.len() as f64;
409 let cost_per_validation = total_cost / total_iterations as f64;
410
411 let mut performance_improvements = Vec::new();
413 let mut recommendations = Vec::new();
414
415 if average_confidence_gain > 0.0 {
416 performance_improvements.push(format!(
417 "Average confidence improvement: +{:.1}%",
418 average_confidence_gain * 100.0
419 ));
420 }
421
422 if cost_per_validation < 0.05 {
423 performance_improvements.push(format!(
424 "Cost-effective validation: ${:.3} per analysis",
425 cost_per_validation
426 ));
427 }
428
429 if overall_success_rate > 0.85 {
430 recommendations.push("Ready for production deployment".to_string());
431 } else if overall_success_rate >= 0.70 {
432 recommendations.push("Suitable for development and testing".to_string());
434 } else {
435 recommendations.push("Further optimization recommended".to_string());
436 }
437
438 BenchmarkSummary {
439 total_duration_seconds: total_duration,
440 total_iterations,
441 overall_success_rate,
442 average_confidence_gain,
443 cost_per_validation,
444 performance_improvements,
445 recommendations,
446 }
447 }
448}
449
450#[derive(Debug, Clone, Default)]
452struct FindingStatsAccumulator {
453 count: usize,
454 average_severity: f64,
455 total_confidence_impact: f64,
456}
457
458impl Default for TokenUsageMetrics {
459 fn default() -> Self {
460 Self {
461 average_input_tokens: 0.0,
462 average_output_tokens: 0.0,
463 average_total_tokens: 0.0,
464 average_cost_usd: 0.0,
465 token_per_second: 0.0,
466 }
467 }
468}
469
470#[cfg(test)]
471mod tests {
472 use super::*;
473 use std::collections::HashMap;
474
475 #[test]
480 fn test_benchmark_config_default() {
481 let config = BenchmarkConfig::default();
482
483 assert_eq!(config.iterations, 10);
485
486 assert!(config.enable_statistics);
488
489 assert_eq!(config.timeout_secs, 300);
491
492 assert_eq!(config.scenarios.len(), 4);
494 assert!(config
495 .scenarios
496 .contains(&BenchmarkScenario::BusinessDecision));
497 assert!(config
498 .scenarios
499 .contains(&BenchmarkScenario::TechnicalArchitecture));
500 assert!(config
501 .scenarios
502 .contains(&BenchmarkScenario::ComplianceAnalysis));
503 assert!(config
504 .scenarios
505 .contains(&BenchmarkScenario::RiskAssessment));
506
507 assert_eq!(config.validation_levels.len(), 4);
509 assert!(config.validation_levels.contains(&ValidationLevel::None));
510 assert!(config.validation_levels.contains(&ValidationLevel::Quick));
511 assert!(config
512 .validation_levels
513 .contains(&ValidationLevel::Standard));
514 assert!(config
515 .validation_levels
516 .contains(&ValidationLevel::Rigorous));
517 }
518
519 #[test]
520 fn test_benchmark_config_custom() {
521 let config = BenchmarkConfig {
522 scenarios: vec![BenchmarkScenario::BusinessDecision],
523 iterations: 5,
524 validation_levels: vec![ValidationLevel::Standard],
525 enable_statistics: false,
526 timeout_secs: 60,
527 };
528
529 assert_eq!(config.scenarios.len(), 1);
530 assert_eq!(config.iterations, 5);
531 assert_eq!(config.validation_levels.len(), 1);
532 assert!(!config.enable_statistics);
533 assert_eq!(config.timeout_secs, 60);
534 }
535
536 #[test]
537 fn test_benchmark_config_serialization() {
538 let config = BenchmarkConfig::default();
539
540 let json = serde_json::to_string(&config).expect("Failed to serialize BenchmarkConfig");
542 assert!(json.contains("\"iterations\":10"));
543 assert!(json.contains("\"enable_statistics\":true"));
544
545 let deserialized: BenchmarkConfig =
547 serde_json::from_str(&json).expect("Failed to deserialize BenchmarkConfig");
548 assert_eq!(deserialized.iterations, config.iterations);
549 assert_eq!(deserialized.timeout_secs, config.timeout_secs);
550 }
551
552 #[test]
557 fn test_benchmark_scenario_enum_variants() {
558 let scenarios = [
560 BenchmarkScenario::BusinessDecision,
561 BenchmarkScenario::TechnicalArchitecture,
562 BenchmarkScenario::ComplianceAnalysis,
563 BenchmarkScenario::RiskAssessment,
564 BenchmarkScenario::StrategicPlanning,
565 BenchmarkScenario::MultiPerspectiveAnalysis,
566 ];
567
568 assert_eq!(scenarios.len(), 6);
569 }
570
571 #[test]
572 fn test_benchmark_scenario_equality() {
573 assert_eq!(
574 BenchmarkScenario::BusinessDecision,
575 BenchmarkScenario::BusinessDecision
576 );
577 assert_ne!(
578 BenchmarkScenario::BusinessDecision,
579 BenchmarkScenario::TechnicalArchitecture
580 );
581 }
582
583 #[test]
584 fn test_benchmark_scenario_serialization() {
585 let scenario = BenchmarkScenario::BusinessDecision;
586 let json = serde_json::to_string(&scenario).expect("Failed to serialize scenario");
587
588 assert_eq!(json, "\"business_decision\"");
590
591 let deserialized: BenchmarkScenario =
593 serde_json::from_str(&json).expect("Failed to deserialize scenario");
594 assert_eq!(deserialized, scenario);
595 }
596
597 #[test]
598 fn test_all_scenarios_serialize_correctly() {
599 let test_cases = vec![
600 (BenchmarkScenario::BusinessDecision, "\"business_decision\""),
601 (
602 BenchmarkScenario::TechnicalArchitecture,
603 "\"technical_architecture\"",
604 ),
605 (
606 BenchmarkScenario::ComplianceAnalysis,
607 "\"compliance_analysis\"",
608 ),
609 (BenchmarkScenario::RiskAssessment, "\"risk_assessment\""),
610 (
611 BenchmarkScenario::StrategicPlanning,
612 "\"strategic_planning\"",
613 ),
614 (
615 BenchmarkScenario::MultiPerspectiveAnalysis,
616 "\"multi_perspective_analysis\"",
617 ),
618 ];
619
620 for (scenario, expected_json) in test_cases {
621 let json = serde_json::to_string(&scenario).expect("Failed to serialize");
622 assert_eq!(
623 json, expected_json,
624 "Scenario {:?} serialization mismatch",
625 scenario
626 );
627 }
628 }
629
630 #[test]
635 fn test_scenario_input_generation_business_decision() {
636 let runner = DeepSeekBenchmarkRunner::default();
637 let input = runner.generate_scenario_input(BenchmarkScenario::BusinessDecision, 0);
638
639 let query = input.get_str("query").expect("Query field missing");
640 assert!(query.contains("European market"));
641 assert!(query.contains("Iteration: 0"));
642 }
643
644 #[test]
645 fn test_scenario_input_generation_technical_architecture() {
646 let runner = DeepSeekBenchmarkRunner::default();
647 let input = runner.generate_scenario_input(BenchmarkScenario::TechnicalArchitecture, 5);
648
649 let query = input.get_str("query").expect("Query field missing");
650 assert!(query.contains("microservices"));
651 assert!(query.contains("monolithic"));
652 assert!(query.contains("Iteration: 5"));
653 }
654
655 #[test]
656 fn test_scenario_input_generation_compliance_analysis() {
657 let runner = DeepSeekBenchmarkRunner::default();
658 let input = runner.generate_scenario_input(BenchmarkScenario::ComplianceAnalysis, 3);
659
660 let query = input.get_str("query").expect("Query field missing");
661 assert!(query.contains("GDPR"));
662 assert!(query.contains("EU citizen"));
663 assert!(query.contains("Iteration: 3"));
664 }
665
666 #[test]
667 fn test_scenario_input_generation_risk_assessment() {
668 let runner = DeepSeekBenchmarkRunner::default();
669 let input = runner.generate_scenario_input(BenchmarkScenario::RiskAssessment, 7);
670
671 let query = input.get_str("query").expect("Query field missing");
672 assert!(query.contains("cybersecurity"));
673 assert!(query.contains("financial application"));
674 assert!(query.contains("Iteration: 7"));
675 }
676
677 #[test]
678 fn test_scenario_input_generation_strategic_planning() {
679 let runner = DeepSeekBenchmarkRunner::default();
680 let input = runner.generate_scenario_input(BenchmarkScenario::StrategicPlanning, 2);
681
682 let query = input.get_str("query").expect("Query field missing");
683 assert!(query.contains("5-year strategic plan"));
684 assert!(query.contains("AI infrastructure"));
685 assert!(query.contains("Iteration: 2"));
686 }
687
688 #[test]
689 fn test_scenario_input_generation_multi_perspective() {
690 let runner = DeepSeekBenchmarkRunner::default();
691 let input = runner.generate_scenario_input(BenchmarkScenario::MultiPerspectiveAnalysis, 9);
692
693 let query = input.get_str("query").expect("Query field missing");
694 assert!(query.contains("remote work"));
695 assert!(query.contains("technical"));
696 assert!(query.contains("cultural"));
697 assert!(query.contains("Iteration: 9"));
698 }
699
700 #[test]
701 fn test_scenario_input_iteration_uniqueness() {
702 let runner = DeepSeekBenchmarkRunner::default();
703
704 let input0 = runner.generate_scenario_input(BenchmarkScenario::BusinessDecision, 0);
705 let input1 = runner.generate_scenario_input(BenchmarkScenario::BusinessDecision, 1);
706
707 let query0 = input0.get_str("query").unwrap();
708 let query1 = input1.get_str("query").unwrap();
709
710 assert_ne!(query0, query1);
712 assert!(query0.contains("Iteration: 0"));
713 assert!(query1.contains("Iteration: 1"));
714 }
715
716 #[test]
721 fn test_token_usage_metrics_default() {
722 let metrics = TokenUsageMetrics::default();
723
724 assert_eq!(metrics.average_input_tokens, 0.0);
725 assert_eq!(metrics.average_output_tokens, 0.0);
726 assert_eq!(metrics.average_total_tokens, 0.0);
727 assert_eq!(metrics.average_cost_usd, 0.0);
728 assert_eq!(metrics.token_per_second, 0.0);
729 }
730
731 #[test]
732 fn test_token_usage_metrics_custom() {
733 let metrics = TokenUsageMetrics {
734 average_input_tokens: 100.0,
735 average_output_tokens: 200.0,
736 average_total_tokens: 300.0,
737 average_cost_usd: 0.005,
738 token_per_second: 50.0,
739 };
740
741 assert_eq!(metrics.average_input_tokens, 100.0);
742 assert_eq!(metrics.average_output_tokens, 200.0);
743 assert_eq!(metrics.average_total_tokens, 300.0);
744 assert_eq!(metrics.average_cost_usd, 0.005);
745 assert_eq!(metrics.token_per_second, 50.0);
746 }
747
748 #[test]
749 fn test_token_usage_metrics_serialization() {
750 let metrics = TokenUsageMetrics {
751 average_input_tokens: 150.5,
752 average_output_tokens: 250.5,
753 average_total_tokens: 401.0,
754 average_cost_usd: 0.0075,
755 token_per_second: 100.0,
756 };
757
758 let json = serde_json::to_string(&metrics).expect("Failed to serialize");
759 let deserialized: TokenUsageMetrics =
760 serde_json::from_str(&json).expect("Failed to deserialize");
761
762 assert_eq!(
763 deserialized.average_input_tokens,
764 metrics.average_input_tokens
765 );
766 assert_eq!(
767 deserialized.average_output_tokens,
768 metrics.average_output_tokens
769 );
770 assert_eq!(
771 deserialized.average_total_tokens,
772 metrics.average_total_tokens
773 );
774 assert!((deserialized.average_cost_usd - metrics.average_cost_usd).abs() < 0.0001);
775 assert_eq!(deserialized.token_per_second, metrics.token_per_second);
776 }
777
778 #[test]
783 fn test_validation_finding_stat_creation() {
784 let stat = ValidationFindingStat {
785 finding_category: "LogicalFlow".to_string(),
786 average_severity: 3.5,
787 frequency: 0.25,
788 average_confidence_impact: 0.85,
789 };
790
791 assert_eq!(stat.finding_category, "LogicalFlow");
792 assert_eq!(stat.average_severity, 3.5);
793 assert_eq!(stat.frequency, 0.25);
794 assert_eq!(stat.average_confidence_impact, 0.85);
795 }
796
797 #[test]
798 fn test_validation_finding_stat_serialization() {
799 let stat = ValidationFindingStat {
800 finding_category: "Compliance".to_string(),
801 average_severity: 4.0,
802 frequency: 0.5,
803 average_confidence_impact: 0.75,
804 };
805
806 let json = serde_json::to_string(&stat).expect("Failed to serialize");
807 assert!(json.contains("\"finding_category\":\"Compliance\""));
808 assert!(json.contains("\"average_severity\":4.0"));
809
810 let deserialized: ValidationFindingStat =
811 serde_json::from_str(&json).expect("Failed to deserialize");
812 assert_eq!(deserialized.finding_category, stat.finding_category);
813 }
814
815 #[test]
820 fn test_scenario_result_creation() {
821 let result = ScenarioResult {
822 scenario: BenchmarkScenario::BusinessDecision,
823 validation_level: ValidationLevel::Standard,
824 iterations: 10,
825 average_duration_ms: 500.0,
826 average_confidence: 0.85,
827 average_validation_confidence: 0.90,
828 success_rate: 0.95,
829 validation_success_rate: 0.80,
830 token_usage: TokenUsageMetrics::default(),
831 validation_findings: vec![],
832 };
833
834 assert_eq!(result.scenario, BenchmarkScenario::BusinessDecision);
835 assert_eq!(result.validation_level, ValidationLevel::Standard);
836 assert_eq!(result.iterations, 10);
837 assert_eq!(result.average_duration_ms, 500.0);
838 assert_eq!(result.average_confidence, 0.85);
839 assert_eq!(result.success_rate, 0.95);
840 }
841
842 #[test]
843 fn test_scenario_result_with_findings() {
844 let findings = vec![
845 ValidationFindingStat {
846 finding_category: "LogicalFlow".to_string(),
847 average_severity: 2.5,
848 frequency: 0.3,
849 average_confidence_impact: 0.8,
850 },
851 ValidationFindingStat {
852 finding_category: "Compliance".to_string(),
853 average_severity: 4.0,
854 frequency: 0.1,
855 average_confidence_impact: 0.9,
856 },
857 ];
858
859 let result = ScenarioResult {
860 scenario: BenchmarkScenario::ComplianceAnalysis,
861 validation_level: ValidationLevel::Rigorous,
862 iterations: 5,
863 average_duration_ms: 1000.0,
864 average_confidence: 0.75,
865 average_validation_confidence: 0.88,
866 success_rate: 0.80,
867 validation_success_rate: 0.60,
868 token_usage: TokenUsageMetrics {
869 average_input_tokens: 200.0,
870 average_output_tokens: 400.0,
871 average_total_tokens: 600.0,
872 average_cost_usd: 0.01,
873 token_per_second: 75.0,
874 },
875 validation_findings: findings,
876 };
877
878 assert_eq!(result.validation_findings.len(), 2);
879 assert_eq!(
880 result.validation_findings[0].finding_category,
881 "LogicalFlow"
882 );
883 }
884
885 #[test]
890 fn test_benchmark_summary_creation() {
891 let summary = BenchmarkSummary {
892 total_duration_seconds: 120.5,
893 total_iterations: 100,
894 overall_success_rate: 0.92,
895 average_confidence_gain: 0.05,
896 cost_per_validation: 0.025,
897 performance_improvements: vec!["Improved by 10%".to_string()],
898 recommendations: vec!["Ready for production".to_string()],
899 };
900
901 assert_eq!(summary.total_duration_seconds, 120.5);
902 assert_eq!(summary.total_iterations, 100);
903 assert_eq!(summary.overall_success_rate, 0.92);
904 assert_eq!(summary.average_confidence_gain, 0.05);
905 assert_eq!(summary.cost_per_validation, 0.025);
906 assert_eq!(summary.performance_improvements.len(), 1);
907 assert_eq!(summary.recommendations.len(), 1);
908 }
909
910 #[test]
911 fn test_benchmark_summary_serialization() {
912 let summary = BenchmarkSummary {
913 total_duration_seconds: 60.0,
914 total_iterations: 50,
915 overall_success_rate: 0.88,
916 average_confidence_gain: 0.03,
917 cost_per_validation: 0.015,
918 performance_improvements: vec![],
919 recommendations: vec!["Test recommendation".to_string()],
920 };
921
922 let json = serde_json::to_string(&summary).expect("Failed to serialize");
923 let deserialized: BenchmarkSummary =
924 serde_json::from_str(&json).expect("Failed to deserialize");
925
926 assert_eq!(deserialized.total_iterations, summary.total_iterations);
927 assert_eq!(
928 deserialized.overall_success_rate,
929 summary.overall_success_rate
930 );
931 }
932
933 #[test]
938 fn test_benchmark_results_creation() {
939 let config = BenchmarkConfig::default();
940 let summary = BenchmarkSummary {
941 total_duration_seconds: 300.0,
942 total_iterations: 160,
943 overall_success_rate: 0.90,
944 average_confidence_gain: 0.04,
945 cost_per_validation: 0.02,
946 performance_improvements: vec![],
947 recommendations: vec!["Production ready".to_string()],
948 };
949
950 let results = BenchmarkResults {
951 config: config.clone(),
952 scenario_results: HashMap::new(),
953 summary,
954 timestamp: chrono::Utc::now(),
955 version: "0.1.0".to_string(),
956 };
957
958 assert_eq!(results.config.iterations, config.iterations);
959 assert!(results.scenario_results.is_empty());
960 assert_eq!(results.summary.total_iterations, 160);
961 assert!(!results.version.is_empty());
962 }
963
964 #[test]
965 fn test_benchmark_results_with_scenario_results() {
966 let mut scenario_results = HashMap::new();
967 scenario_results.insert(
968 "BusinessDecision_Standard".to_string(),
969 ScenarioResult {
970 scenario: BenchmarkScenario::BusinessDecision,
971 validation_level: ValidationLevel::Standard,
972 iterations: 10,
973 average_duration_ms: 450.0,
974 average_confidence: 0.82,
975 average_validation_confidence: 0.87,
976 success_rate: 0.90,
977 validation_success_rate: 0.70,
978 token_usage: TokenUsageMetrics::default(),
979 validation_findings: vec![],
980 },
981 );
982
983 let results = BenchmarkResults {
984 config: BenchmarkConfig::default(),
985 scenario_results,
986 summary: BenchmarkSummary {
987 total_duration_seconds: 45.0,
988 total_iterations: 10,
989 overall_success_rate: 0.90,
990 average_confidence_gain: 0.05,
991 cost_per_validation: 0.0,
992 performance_improvements: vec![],
993 recommendations: vec![],
994 },
995 timestamp: chrono::Utc::now(),
996 version: "0.1.0".to_string(),
997 };
998
999 assert_eq!(results.scenario_results.len(), 1);
1000 assert!(results
1001 .scenario_results
1002 .contains_key("BusinessDecision_Standard"));
1003 }
1004
1005 #[test]
1010 fn test_benchmark_runner_default() {
1011 let runner = DeepSeekBenchmarkRunner::default();
1012
1013 assert_eq!(runner.config.iterations, 10);
1015 assert!(runner.config.enable_statistics);
1016 }
1017
1018 #[test]
1019 fn test_benchmark_runner_with_custom_config() {
1020 let config = BenchmarkConfig {
1021 scenarios: vec![BenchmarkScenario::RiskAssessment],
1022 iterations: 3,
1023 validation_levels: vec![ValidationLevel::Quick],
1024 enable_statistics: false,
1025 timeout_secs: 30,
1026 };
1027
1028 let runner = DeepSeekBenchmarkRunner::new(config);
1029
1030 assert_eq!(runner.config.scenarios.len(), 1);
1031 assert_eq!(runner.config.iterations, 3);
1032 assert!(!runner.config.enable_statistics);
1033 }
1034
1035 #[test]
1040 fn test_calculate_summary_high_success_rate() {
1041 let runner = DeepSeekBenchmarkRunner::default();
1042
1043 let mut scenario_results = HashMap::new();
1044 scenario_results.insert(
1045 "Test_Standard".to_string(),
1046 ScenarioResult {
1047 scenario: BenchmarkScenario::BusinessDecision,
1048 validation_level: ValidationLevel::Standard,
1049 iterations: 10,
1050 average_duration_ms: 100.0,
1051 average_confidence: 0.80,
1052 average_validation_confidence: 0.85,
1053 success_rate: 0.90,
1054 validation_success_rate: 0.80,
1055 token_usage: TokenUsageMetrics {
1056 average_input_tokens: 100.0,
1057 average_output_tokens: 200.0,
1058 average_total_tokens: 300.0,
1059 average_cost_usd: 0.003,
1060 token_per_second: 100.0,
1061 },
1062 validation_findings: vec![],
1063 },
1064 );
1065
1066 let summary = runner.calculate_summary(&scenario_results, 10.0, 10);
1067
1068 assert!(summary.overall_success_rate > 0.85);
1070 assert!(summary
1071 .recommendations
1072 .iter()
1073 .any(|r| r.contains("production")));
1074 }
1075
1076 #[test]
1077 fn test_calculate_summary_medium_success_rate() {
1078 let runner = DeepSeekBenchmarkRunner::default();
1079
1080 let mut scenario_results = HashMap::new();
1081 scenario_results.insert(
1082 "Test_Standard".to_string(),
1083 ScenarioResult {
1084 scenario: BenchmarkScenario::BusinessDecision,
1085 validation_level: ValidationLevel::Standard,
1086 iterations: 10,
1087 average_duration_ms: 100.0,
1088 average_confidence: 0.70,
1089 average_validation_confidence: 0.72,
1090 success_rate: 0.75,
1091 validation_success_rate: 0.60,
1092 token_usage: TokenUsageMetrics::default(),
1093 validation_findings: vec![],
1094 },
1095 );
1096
1097 let summary = runner.calculate_summary(&scenario_results, 10.0, 10);
1098
1099 assert!(summary.overall_success_rate >= 0.70);
1102 assert!(summary.overall_success_rate <= 0.85);
1103 assert!(summary
1104 .recommendations
1105 .iter()
1106 .any(|r| r.contains("development") || r.contains("testing")));
1107 }
1108
1109 #[test]
1110 fn test_calculate_summary_low_success_rate() {
1111 let runner = DeepSeekBenchmarkRunner::default();
1112
1113 let mut scenario_results = HashMap::new();
1114 scenario_results.insert(
1115 "Test_Standard".to_string(),
1116 ScenarioResult {
1117 scenario: BenchmarkScenario::BusinessDecision,
1118 validation_level: ValidationLevel::Standard,
1119 iterations: 10,
1120 average_duration_ms: 100.0,
1121 average_confidence: 0.50,
1122 average_validation_confidence: 0.55,
1123 success_rate: 0.60,
1124 validation_success_rate: 0.40,
1125 token_usage: TokenUsageMetrics::default(),
1126 validation_findings: vec![],
1127 },
1128 );
1129
1130 let summary = runner.calculate_summary(&scenario_results, 10.0, 10);
1131
1132 assert!(summary.overall_success_rate < 0.70);
1134 assert!(summary
1135 .recommendations
1136 .iter()
1137 .any(|r| r.contains("optimization")));
1138 }
1139
1140 #[test]
1141 fn test_calculate_summary_confidence_gain() {
1142 let runner = DeepSeekBenchmarkRunner::default();
1143
1144 let mut scenario_results = HashMap::new();
1145 scenario_results.insert(
1146 "Test_Standard".to_string(),
1147 ScenarioResult {
1148 scenario: BenchmarkScenario::BusinessDecision,
1149 validation_level: ValidationLevel::Standard,
1150 iterations: 10,
1151 average_duration_ms: 100.0,
1152 average_confidence: 0.70,
1153 average_validation_confidence: 0.85, success_rate: 1.0,
1155 validation_success_rate: 1.0,
1156 token_usage: TokenUsageMetrics::default(),
1157 validation_findings: vec![],
1158 },
1159 );
1160
1161 let summary = runner.calculate_summary(&scenario_results, 10.0, 10);
1162
1163 assert!(summary.average_confidence_gain > 0.0);
1165 assert!(summary
1166 .performance_improvements
1167 .iter()
1168 .any(|p| p.contains("confidence improvement")));
1169 }
1170
1171 #[test]
1172 fn test_calculate_summary_cost_effective() {
1173 let runner = DeepSeekBenchmarkRunner::default();
1174
1175 let mut scenario_results = HashMap::new();
1176 scenario_results.insert(
1177 "Test_Standard".to_string(),
1178 ScenarioResult {
1179 scenario: BenchmarkScenario::BusinessDecision,
1180 validation_level: ValidationLevel::Standard,
1181 iterations: 10,
1182 average_duration_ms: 100.0,
1183 average_confidence: 0.80,
1184 average_validation_confidence: 0.85,
1185 success_rate: 1.0,
1186 validation_success_rate: 1.0,
1187 token_usage: TokenUsageMetrics {
1188 average_input_tokens: 100.0,
1189 average_output_tokens: 200.0,
1190 average_total_tokens: 300.0,
1191 average_cost_usd: 0.001, token_per_second: 100.0,
1193 },
1194 validation_findings: vec![],
1195 },
1196 );
1197
1198 let summary = runner.calculate_summary(&scenario_results, 10.0, 10);
1199
1200 assert!(summary.cost_per_validation < 0.05);
1202 assert!(summary
1203 .performance_improvements
1204 .iter()
1205 .any(|p| p.contains("Cost-effective")));
1206 }
1207
1208 #[test]
1209 fn test_calculate_summary_multiple_scenarios() {
1210 let runner = DeepSeekBenchmarkRunner::default();
1211
1212 let mut scenario_results = HashMap::new();
1213
1214 scenario_results.insert(
1216 "Business_Standard".to_string(),
1217 ScenarioResult {
1218 scenario: BenchmarkScenario::BusinessDecision,
1219 validation_level: ValidationLevel::Standard,
1220 iterations: 10,
1221 average_duration_ms: 100.0,
1222 average_confidence: 0.80,
1223 average_validation_confidence: 0.85,
1224 success_rate: 1.0,
1225 validation_success_rate: 0.90,
1226 token_usage: TokenUsageMetrics {
1227 average_input_tokens: 100.0,
1228 average_output_tokens: 200.0,
1229 average_total_tokens: 300.0,
1230 average_cost_usd: 0.002,
1231 token_per_second: 100.0,
1232 },
1233 validation_findings: vec![],
1234 },
1235 );
1236
1237 scenario_results.insert(
1238 "Technical_Rigorous".to_string(),
1239 ScenarioResult {
1240 scenario: BenchmarkScenario::TechnicalArchitecture,
1241 validation_level: ValidationLevel::Rigorous,
1242 iterations: 10,
1243 average_duration_ms: 200.0,
1244 average_confidence: 0.75,
1245 average_validation_confidence: 0.82,
1246 success_rate: 0.90,
1247 validation_success_rate: 0.80,
1248 token_usage: TokenUsageMetrics {
1249 average_input_tokens: 150.0,
1250 average_output_tokens: 250.0,
1251 average_total_tokens: 400.0,
1252 average_cost_usd: 0.003,
1253 token_per_second: 80.0,
1254 },
1255 validation_findings: vec![],
1256 },
1257 );
1258
1259 let summary = runner.calculate_summary(&scenario_results, 30.0, 20);
1260
1261 assert_eq!(summary.total_iterations, 20);
1262 assert_eq!(summary.total_duration_seconds, 30.0);
1263 assert!(summary.overall_success_rate > 0.0);
1265 assert!(summary.overall_success_rate <= 1.0);
1266 }
1267
1268 #[test]
1273 fn test_finding_stats_accumulator_default() {
1274 let stats = FindingStatsAccumulator::default();
1275
1276 assert_eq!(stats.count, 0);
1277 assert_eq!(stats.average_severity, 0.0);
1278 assert_eq!(stats.total_confidence_impact, 0.0);
1279 }
1280
1281 #[test]
1282 fn test_finding_stats_accumulator_accumulation() {
1283 let mut stats = FindingStatsAccumulator::default();
1284
1285 stats.count += 1;
1286 stats.average_severity += 4.0;
1287 stats.total_confidence_impact += 0.85;
1288
1289 assert_eq!(stats.count, 1);
1290 assert_eq!(stats.average_severity, 4.0);
1291 assert_eq!(stats.total_confidence_impact, 0.85);
1292
1293 stats.count += 1;
1295 stats.average_severity += 2.0;
1296 stats.total_confidence_impact += 0.90;
1297
1298 assert_eq!(stats.count, 2);
1299 assert_eq!(stats.average_severity, 6.0);
1300 assert_eq!(stats.total_confidence_impact, 1.75);
1301 }
1302
1303 #[test]
1308 fn test_benchmark_results_json_output_format() {
1309 let config = BenchmarkConfig {
1310 scenarios: vec![BenchmarkScenario::BusinessDecision],
1311 iterations: 5,
1312 validation_levels: vec![ValidationLevel::Standard],
1313 enable_statistics: true,
1314 timeout_secs: 60,
1315 };
1316
1317 let mut scenario_results = HashMap::new();
1318 scenario_results.insert(
1319 "BusinessDecision_Standard".to_string(),
1320 ScenarioResult {
1321 scenario: BenchmarkScenario::BusinessDecision,
1322 validation_level: ValidationLevel::Standard,
1323 iterations: 5,
1324 average_duration_ms: 250.0,
1325 average_confidence: 0.82,
1326 average_validation_confidence: 0.88,
1327 success_rate: 0.80,
1328 validation_success_rate: 0.60,
1329 token_usage: TokenUsageMetrics {
1330 average_input_tokens: 120.0,
1331 average_output_tokens: 280.0,
1332 average_total_tokens: 400.0,
1333 average_cost_usd: 0.0045,
1334 token_per_second: 88.0,
1335 },
1336 validation_findings: vec![ValidationFindingStat {
1337 finding_category: "LogicalFlow".to_string(),
1338 average_severity: 2.0,
1339 frequency: 0.4,
1340 average_confidence_impact: 0.85,
1341 }],
1342 },
1343 );
1344
1345 let results = BenchmarkResults {
1346 config,
1347 scenario_results,
1348 summary: BenchmarkSummary {
1349 total_duration_seconds: 12.5,
1350 total_iterations: 5,
1351 overall_success_rate: 0.80,
1352 average_confidence_gain: 0.06,
1353 cost_per_validation: 0.0045,
1354 performance_improvements: vec![
1355 "Average confidence improvement: +6.0%".to_string(),
1356 "Cost-effective validation: $0.005 per analysis".to_string(),
1357 ],
1358 recommendations: vec!["Suitable for development and testing".to_string()],
1359 },
1360 timestamp: chrono::Utc::now(),
1361 version: "0.1.0".to_string(),
1362 };
1363
1364 let json = serde_json::to_string_pretty(&results).expect("Failed to serialize results");
1365
1366 assert!(json.contains("\"config\""));
1368 assert!(json.contains("\"scenario_results\""));
1369 assert!(json.contains("\"summary\""));
1370 assert!(json.contains("\"timestamp\""));
1371 assert!(json.contains("\"version\""));
1372 assert!(json.contains("\"BusinessDecision_Standard\""));
1373 assert!(json.contains("\"average_duration_ms\""));
1374 assert!(json.contains("\"token_usage\""));
1375 assert!(json.contains("\"validation_findings\""));
1376 assert!(json.contains("\"performance_improvements\""));
1377 assert!(json.contains("\"recommendations\""));
1378 }
1379
1380 #[test]
1381 fn test_scenario_result_key_format() {
1382 let scenario = BenchmarkScenario::TechnicalArchitecture;
1383 let level = ValidationLevel::Rigorous;
1384
1385 let key = format!("{:?}_{:?}", scenario, level);
1386
1387 assert_eq!(key, "TechnicalArchitecture_Rigorous");
1388 }
1389
1390 #[test]
1395 fn test_empty_scenario_results_summary() {
1396 let _runner = DeepSeekBenchmarkRunner::default();
1397 let _scenario_results: HashMap<String, ScenarioResult> = HashMap::new();
1398
1399 let summary = BenchmarkSummary {
1403 total_duration_seconds: 0.0,
1404 total_iterations: 0,
1405 overall_success_rate: 0.0,
1406 average_confidence_gain: 0.0,
1407 cost_per_validation: 0.0,
1408 performance_improvements: vec![],
1409 recommendations: vec!["Further optimization recommended".to_string()],
1410 };
1411
1412 assert_eq!(summary.total_iterations, 0);
1413 assert!(!summary.recommendations.is_empty());
1414 }
1415
1416 #[test]
1417 fn test_zero_iterations_config() {
1418 let config = BenchmarkConfig {
1419 scenarios: vec![BenchmarkScenario::BusinessDecision],
1420 iterations: 0,
1421 validation_levels: vec![ValidationLevel::Standard],
1422 enable_statistics: true,
1423 timeout_secs: 60,
1424 };
1425
1426 assert_eq!(config.iterations, 0);
1427 }
1428
1429 #[test]
1430 fn test_empty_scenarios_config() {
1431 let config = BenchmarkConfig {
1432 scenarios: vec![],
1433 iterations: 10,
1434 validation_levels: vec![ValidationLevel::Standard],
1435 enable_statistics: true,
1436 timeout_secs: 60,
1437 };
1438
1439 assert!(config.scenarios.is_empty());
1440 }
1441
1442 #[test]
1443 fn test_empty_validation_levels_config() {
1444 let config = BenchmarkConfig {
1445 scenarios: vec![BenchmarkScenario::BusinessDecision],
1446 iterations: 10,
1447 validation_levels: vec![],
1448 enable_statistics: true,
1449 timeout_secs: 60,
1450 };
1451
1452 assert!(config.validation_levels.is_empty());
1453 }
1454
1455 #[test]
1456 fn test_large_iteration_count() {
1457 let config = BenchmarkConfig {
1458 scenarios: vec![BenchmarkScenario::BusinessDecision],
1459 iterations: 10000,
1460 validation_levels: vec![ValidationLevel::Standard],
1461 enable_statistics: true,
1462 timeout_secs: 3600,
1463 };
1464
1465 assert_eq!(config.iterations, 10000);
1466 }
1467
1468 #[test]
1469 fn test_perfect_success_rate_summary() {
1470 let runner = DeepSeekBenchmarkRunner::default();
1471
1472 let mut scenario_results = HashMap::new();
1473 scenario_results.insert(
1474 "Test_Standard".to_string(),
1475 ScenarioResult {
1476 scenario: BenchmarkScenario::BusinessDecision,
1477 validation_level: ValidationLevel::Standard,
1478 iterations: 10,
1479 average_duration_ms: 100.0,
1480 average_confidence: 0.95,
1481 average_validation_confidence: 0.98,
1482 success_rate: 1.0, validation_success_rate: 1.0,
1484 token_usage: TokenUsageMetrics::default(),
1485 validation_findings: vec![],
1486 },
1487 );
1488
1489 let summary = runner.calculate_summary(&scenario_results, 10.0, 10);
1490
1491 assert_eq!(summary.overall_success_rate, 1.0);
1492 assert!(summary
1493 .recommendations
1494 .iter()
1495 .any(|r| r.contains("production")));
1496 }
1497
1498 #[test]
1499 fn test_zero_success_rate_summary() {
1500 let runner = DeepSeekBenchmarkRunner::default();
1501
1502 let mut scenario_results = HashMap::new();
1503 scenario_results.insert(
1504 "Test_Standard".to_string(),
1505 ScenarioResult {
1506 scenario: BenchmarkScenario::BusinessDecision,
1507 validation_level: ValidationLevel::Standard,
1508 iterations: 10,
1509 average_duration_ms: 100.0,
1510 average_confidence: 0.30,
1511 average_validation_confidence: 0.25,
1512 success_rate: 0.0, validation_success_rate: 0.0,
1514 token_usage: TokenUsageMetrics::default(),
1515 validation_findings: vec![],
1516 },
1517 );
1518
1519 let summary = runner.calculate_summary(&scenario_results, 10.0, 10);
1520
1521 assert_eq!(summary.overall_success_rate, 0.0);
1522 assert!(summary
1523 .recommendations
1524 .iter()
1525 .any(|r| r.contains("optimization")));
1526 }
1527
1528 #[test]
1529 fn test_negative_confidence_gain() {
1530 let runner = DeepSeekBenchmarkRunner::default();
1531
1532 let mut scenario_results = HashMap::new();
1533 scenario_results.insert(
1534 "Test_Standard".to_string(),
1535 ScenarioResult {
1536 scenario: BenchmarkScenario::BusinessDecision,
1537 validation_level: ValidationLevel::Standard,
1538 iterations: 10,
1539 average_duration_ms: 100.0,
1540 average_confidence: 0.85,
1541 average_validation_confidence: 0.70, success_rate: 1.0,
1543 validation_success_rate: 1.0,
1544 token_usage: TokenUsageMetrics::default(),
1545 validation_findings: vec![],
1546 },
1547 );
1548
1549 let summary = runner.calculate_summary(&scenario_results, 10.0, 10);
1550
1551 assert!(summary.average_confidence_gain < 0.0);
1553 assert!(!summary
1555 .performance_improvements
1556 .iter()
1557 .any(|p| p.contains("confidence improvement")));
1558 }
1559
1560 #[test]
1565 fn test_benchmark_config_clone() {
1566 let config = BenchmarkConfig::default();
1567 let cloned = config.clone();
1568
1569 assert_eq!(config.iterations, cloned.iterations);
1570 assert_eq!(config.scenarios.len(), cloned.scenarios.len());
1571 }
1572
1573 #[test]
1574 fn test_benchmark_scenario_copy() {
1575 let scenario = BenchmarkScenario::BusinessDecision;
1576 let copied = scenario; assert_eq!(scenario, copied);
1579 }
1580
1581 #[test]
1582 fn test_scenario_result_clone() {
1583 let result = ScenarioResult {
1584 scenario: BenchmarkScenario::BusinessDecision,
1585 validation_level: ValidationLevel::Standard,
1586 iterations: 10,
1587 average_duration_ms: 100.0,
1588 average_confidence: 0.80,
1589 average_validation_confidence: 0.85,
1590 success_rate: 0.90,
1591 validation_success_rate: 0.80,
1592 token_usage: TokenUsageMetrics::default(),
1593 validation_findings: vec![],
1594 };
1595
1596 let cloned = result.clone();
1597
1598 assert_eq!(result.scenario, cloned.scenario);
1599 assert_eq!(result.iterations, cloned.iterations);
1600 }
1601
1602 #[test]
1603 fn test_debug_formatting() {
1604 let config = BenchmarkConfig::default();
1605 let debug_str = format!("{:?}", config);
1606
1607 assert!(debug_str.contains("BenchmarkConfig"));
1608 assert!(debug_str.contains("iterations"));
1609 }
1610
1611 #[test]
1616 fn test_validation_level_variants() {
1617 let levels = [
1618 ValidationLevel::None,
1619 ValidationLevel::Quick,
1620 ValidationLevel::Standard,
1621 ValidationLevel::Rigorous,
1622 ValidationLevel::Paranoid,
1623 ];
1624
1625 assert_eq!(levels.len(), 5);
1626 }
1627
1628 #[test]
1629 fn test_validation_level_default() {
1630 let level = ValidationLevel::default();
1631 assert_eq!(level, ValidationLevel::Standard);
1632 }
1633
1634 #[test]
1635 fn test_validation_level_serialization() {
1636 let level = ValidationLevel::Rigorous;
1637 let json = serde_json::to_string(&level).expect("Failed to serialize");
1638 assert_eq!(json, "\"rigorous\"");
1639 }
1640}