1use crate::common::risk::RiskLevel;
2use crate::laws::benford::BenfordResult;
3use crate::laws::normal::NormalResult;
4use crate::laws::pareto::ParetoResult;
5use crate::laws::poisson::PoissonResult;
6use crate::laws::zipf::ZipfResult;
7use diffx_core::{diff, DiffResult};
8use std::collections::HashMap;
9
10fn get_law_display_name(law: &str) -> &str {
12 match law {
13 "benf" => "Benford Law",
14 "pareto" => "Pareto Principle",
15 "zipf" => "Zipf Law",
16 "normal" => "Normal Distribution",
17 "poisson" => "Poisson Distribution",
18 _ => law,
19 }
20}
21
22#[derive(Debug, Clone)]
24pub struct IntegrationResult {
25 pub dataset_name: String,
26 pub numbers_analyzed: usize,
27 pub laws_executed: Vec<String>,
28
29 pub overall_quality_score: f64, pub consistency_score: f64, pub conflicts_detected: usize, pub recommendation_confidence: f64, pub benford_result: Option<BenfordResult>,
37 pub pareto_result: Option<ParetoResult>,
38 pub zipf_result: Option<ZipfResult>,
39 pub normal_result: Option<NormalResult>,
40 pub poisson_result: Option<PoissonResult>,
41
42 pub law_scores: HashMap<String, f64>, pub conflicts: Vec<Conflict>, pub recommendations: Recommendation, pub data_characteristics: DataCharacteristics, pub overall_assessment: OverallAssessment,
50 pub risk_level: RiskLevel,
51
52 pub focus: Option<String>, }
55
56#[derive(Debug, Clone)]
58pub struct Conflict {
59 pub conflict_type: ConflictType,
60 pub laws_involved: Vec<String>,
61 pub conflict_score: f64, pub description: String,
63 pub likely_cause: String,
64 pub resolution_suggestion: String,
65}
66
67#[derive(Debug, Clone, PartialEq, Eq, Hash)]
69pub enum ConflictType {
70 DistributionMismatch, QualityDisagreement, RiskLevelConflict, ScaleIncompatibility, MethodologicalConflict, ScoreDeviation, UnexpectedConsistency, }
78
79#[derive(Debug, Clone)]
81pub struct Recommendation {
82 pub primary_law: String, pub secondary_laws: Vec<String>, pub confidence: f64, pub rationale: String, pub alternative_combinations: Vec<LawCombination>, }
88
89#[derive(Debug, Clone)]
91pub struct LawCombination {
92 pub laws: Vec<String>,
93 pub purpose: String,
94 pub effectiveness_score: f64,
95 pub description: String,
96}
97
98#[derive(Debug, Clone)]
100pub struct DataCharacteristics {
101 pub data_type: DataType,
102 pub distribution_shape: DistributionShape,
103 pub outlier_presence: OutlierLevel,
104 pub scale_range: ScaleRange,
105 pub analysis_purpose: AnalysisPurpose,
106 pub sample_size_category: SampleSizeCategory,
107}
108
109#[derive(Debug, Clone, PartialEq)]
111pub enum DataType {
112 Continuous, Discrete, Mixed, Integer, Unknown, }
118
119#[derive(Debug, Clone, PartialEq)]
121pub enum DistributionShape {
122 Normal, Skewed, Multimodal, PowerLaw, Exponential, Uniform, Unknown, }
130
131#[derive(Debug, Clone, PartialEq)]
133pub enum OutlierLevel {
134 None, Low, Moderate, High, Extreme, }
140
141#[derive(Debug, Clone, PartialEq)]
143pub enum ScaleRange {
144 Narrow, Medium, Wide, Mixed, }
149
150#[derive(Debug, Clone, PartialEq)]
152pub enum AnalysisPurpose {
153 QualityAudit, FraudDetection, ConcentrationAnalysis, AnomalyDetection, DistributionFitting, GeneralAnalysis, }
160
161#[derive(Debug, Clone, PartialEq)]
163pub enum SampleSizeCategory {
164 Small, Medium, Large, VeryLarge, }
169
170#[derive(Debug, Clone, PartialEq)]
172pub enum OverallAssessment {
173 Excellent, Good, Mixed, Concerning, Problematic, }
179
180impl IntegrationResult {
181 pub fn new(dataset_name: String, numbers: &[f64]) -> Self {
182 Self {
183 dataset_name,
184 numbers_analyzed: numbers.len(),
185 laws_executed: Vec::new(),
186 overall_quality_score: 0.0,
187 consistency_score: 0.0,
188 conflicts_detected: 0,
189 recommendation_confidence: 0.0,
190 benford_result: None,
191 pareto_result: None,
192 zipf_result: None,
193 normal_result: None,
194 poisson_result: None,
195 law_scores: HashMap::new(),
196 conflicts: Vec::new(),
197 recommendations: Recommendation::empty(),
198 data_characteristics: DataCharacteristics::analyze(numbers),
199 overall_assessment: OverallAssessment::Mixed,
200 risk_level: RiskLevel::Medium,
201 focus: None,
202 }
203 }
204
205 pub fn add_law_result(&mut self, law_name: &str, result: LawResult) {
207 match law_name {
208 "benf" => {
209 if let LawResult::Benford(r) = result {
210 let score = 1.0 - (r.mean_absolute_deviation / 100.0);
214 self.law_scores.insert("benf".to_string(), score);
215 self.benford_result = Some(r);
216 }
217 }
218 "pareto" => {
219 if let LawResult::Pareto(r) = result {
220 self.law_scores
221 .insert("pareto".to_string(), r.concentration_index);
222 self.pareto_result = Some(r);
223 }
224 }
225 "zipf" => {
226 if let LawResult::Zipf(r) = result {
227 self.law_scores
228 .insert("zipf".to_string(), r.distribution_quality);
229 self.zipf_result = Some(r);
230 }
231 }
232 "normal" => {
233 if let LawResult::Normal(r) = result {
234 self.law_scores
235 .insert("normal".to_string(), r.normality_score);
236 self.normal_result = Some(r);
237 }
238 }
239 "poisson" => {
240 if let LawResult::Poisson(r) = result {
241 self.law_scores
242 .insert("poisson".to_string(), r.goodness_of_fit_score);
243 self.poisson_result = Some(r);
244 }
245 }
246 _ => {}
247 }
248
249 if !self.laws_executed.contains(&law_name.to_string()) {
250 self.laws_executed.push(law_name.to_string());
251 }
252 }
253
254 pub fn finalize_analysis(&mut self) {
256 self.calculate_overall_quality_score();
257 self.calculate_consistency_score();
258 self.detect_conflicts();
259 self.generate_recommendations();
260 self.assess_overall_quality();
261 self.determine_risk_level();
262 }
263
264 fn calculate_overall_quality_score(&mut self) {
265 if self.law_scores.is_empty() {
266 self.overall_quality_score = 0.0;
267 return;
268 }
269
270 let weights = self.get_adaptive_weights();
271 let mut weighted_sum = 0.0;
272 let mut total_weight = 0.0;
273
274 for (law, score) in &self.law_scores {
275 if let Some(&weight) = weights.get(law) {
276 weighted_sum += score * weight;
277 total_weight += weight;
278 }
279 }
280
281 self.overall_quality_score = if total_weight > 0.0 {
282 weighted_sum / total_weight
283 } else {
284 0.0
285 };
286 }
287
288 fn calculate_consistency_score(&mut self) {
289 if self.law_scores.len() < 2 {
290 self.consistency_score = 1.0;
291 return;
292 }
293
294 let scores: Vec<f64> = self.law_scores.values().cloned().collect();
295 let mean_score: f64 = scores.iter().sum::<f64>() / scores.len() as f64;
296
297 let variance: f64 = scores
298 .iter()
299 .map(|score| (score - mean_score).powi(2))
300 .sum::<f64>()
301 / scores.len() as f64;
302
303 let max_variance = 1.0;
305 self.consistency_score = 1.0 - (variance / max_variance).min(1.0);
306 }
307
308 fn detect_conflicts(&mut self) {
309 self.conflicts.clear();
310
311 self.detect_conflicts_with_diffx();
313
314 self.detect_score_conflicts();
316
317 self.conflicts_detected = self.conflicts.len();
318 }
319
320 fn detect_conflicts_with_diffx(&mut self) {
322 if self.law_scores.is_empty() {
323 return;
324 }
325
326 let average_score: f64 =
328 self.law_scores.values().sum::<f64>() / self.law_scores.len() as f64;
329 let mut expected_scores = HashMap::new();
330
331 for law in self.law_scores.keys() {
332 expected_scores.insert(law.clone(), average_score);
333 }
334
335 let expected_json = serde_json::to_value(&expected_scores).unwrap_or_default();
337 let actual_json = serde_json::to_value(&self.law_scores).unwrap_or_default();
338
339 let diff_results = diff(&expected_json, &actual_json, None, Some(0.01), None);
341
342 if diff_results.is_empty() {
343 if self.law_scores.len() > 1 {
345 let conflict = Conflict {
346 conflict_type: ConflictType::UnexpectedConsistency,
347 laws_involved: self.law_scores.keys().cloned().collect(),
348 conflict_score: 0.6,
349 description:
350 "All statistical laws show identical scores, indicating potential data or analysis issues"
351 .to_string(),
352 likely_cause: "Insufficient data diversity or analysis algorithm issues".to_string(),
353 resolution_suggestion: "Please review data quality and analysis methods".to_string(),
354 };
355 self.conflicts.push(conflict);
356 }
357 } else {
358 for diff_result in &diff_results {
360 match diff_result {
361 DiffResult::Modified(path, expected_val, actual_val) => {
362 if let (Some(expected), Some(actual)) =
363 (expected_val.as_f64(), actual_val.as_f64())
364 {
365 let deviation = (actual - expected).abs() / expected.max(0.01);
366
367 if deviation > 0.3 {
368 let law_name = path.trim_start_matches('"').trim_end_matches('"');
370 let conflict = Conflict {
371 conflict_type: ConflictType::ScoreDeviation,
372 laws_involved: vec![law_name.to_string()],
373 conflict_score: deviation.min(1.0),
374 description: format!(
375 "{} score {:.3} significantly deviates from expected {:.3} - deviation {:.1}%",
376 get_law_display_name(law_name), actual, expected, deviation * 100.0
377 ),
378 likely_cause: format!(
379 "{} may not be compatible with the data pattern",
380 get_law_display_name(law_name)
381 ),
382 resolution_suggestion: format!(
383 "Please review application conditions and data quality for {}",
384 get_law_display_name(law_name)
385 ),
386 };
387 self.conflicts.push(conflict);
388 }
389 }
390 }
391 DiffResult::Added(path, _val) | DiffResult::Removed(path, _val) => {
392 let law_name = path.trim_start_matches('"').trim_end_matches('"');
394 let conflict = Conflict {
395 conflict_type: ConflictType::MethodologicalConflict,
396 laws_involved: vec![law_name.to_string()],
397 conflict_score: 0.5,
398 description: format!(
399 "Unexpected change detected for {}",
400 get_law_display_name(law_name)
401 ),
402 likely_cause: "Analysis configuration or law selection inconsistency"
403 .to_string(),
404 resolution_suggestion: "Please verify the analysis target law settings"
405 .to_string(),
406 };
407 self.conflicts.push(conflict);
408 }
409 DiffResult::TypeChanged(path, _old, _new) => {
410 let law_name = path.trim_start_matches('"').trim_end_matches('"');
412 let conflict = Conflict {
413 conflict_type: ConflictType::MethodologicalConflict,
414 laws_involved: vec![law_name.to_string()],
415 conflict_score: 0.8,
416 description: format!(
417 "Score type changed for {}",
418 get_law_display_name(law_name)
419 ),
420 likely_cause: "Internal analysis error or data corruption".to_string(),
421 resolution_suggestion: "Please re-run the analysis".to_string(),
422 };
423 self.conflicts.push(conflict);
424 }
425 }
426 }
427 }
428 }
429
430 fn detect_score_conflicts(&mut self) {
432 let laws: Vec<String> = self.law_scores.keys().cloned().collect();
433
434 for i in 0..laws.len() {
436 for j in i + 1..laws.len() {
437 let law_a = &laws[i];
438 let law_b = &laws[j];
439
440 if let (Some(&score_a), Some(&score_b)) =
441 (self.law_scores.get(law_a), self.law_scores.get(law_b))
442 {
443 let law_a_profile = serde_json::json!({
445 "law_name": law_a,
446 "score": score_a,
447 "confidence_level": self.get_confidence_level(score_a),
448 "score_category": self.categorize_score(score_a),
449 "relative_rank": self.get_relative_rank(law_a)
450 });
451
452 let law_b_profile = serde_json::json!({
454 "law_name": law_b,
455 "score": score_b,
456 "confidence_level": self.get_confidence_level(score_b),
457 "score_category": self.categorize_score(score_b),
458 "relative_rank": self.get_relative_rank(law_b)
459 });
460
461 let diff_results = diff(&law_a_profile, &law_b_profile, None, Some(0.1), None);
463
464 let score_diff = (score_a - score_b).abs();
466 let max_score = score_a.max(score_b);
467
468 if max_score > 0.0 {
469 let conflict_ratio = score_diff / max_score;
470
471 let has_structural_conflict = !diff_results.is_empty()
473 && diff_results.iter().any(|result| {
474 if let DiffResult::Modified(path, _old_val, _new_val) = result {
475 if path.contains("confidence_level")
476 || path.contains("score_category")
477 {
478 return true;
479 }
480 }
481 false
482 });
483
484 if conflict_ratio > 0.5 || has_structural_conflict {
485 let enhanced_conflict_score = if has_structural_conflict {
486 conflict_ratio * 1.5 } else {
488 conflict_ratio
489 };
490
491 let conflict = self.create_enhanced_conflict(
492 law_a.clone(),
493 law_b.clone(),
494 enhanced_conflict_score.min(1.0),
495 score_a,
496 score_b,
497 &diff_results,
498 );
499 self.conflicts.push(conflict);
500 }
501 }
502 }
503 }
504 }
505 }
506
507 fn create_enhanced_conflict(
509 &self,
510 law_a: String,
511 law_b: String,
512 conflict_score: f64,
513 score_a: f64,
514 score_b: f64,
515 diff_results: &[DiffResult],
516 ) -> Conflict {
517 let conflict_type = self.classify_conflict_type(&law_a, &law_b);
518
519 let mut detailed_description = format!(
521 "{} and {} show significantly different evaluations (difference: {:.3})",
522 get_law_display_name(&law_a),
523 get_law_display_name(&law_b),
524 (score_a - score_b).abs()
525 );
526
527 if !diff_results.is_empty() {
528 detailed_description.push_str(" with structural differences in: ");
529 let diff_details: Vec<String> = diff_results
530 .iter()
531 .filter_map(|result| {
532 if let DiffResult::Modified(path, old_val, new_val) = result {
533 Some(format!("{path} ({old_val:?} → {new_val:?})"))
534 } else {
535 None
536 }
537 })
538 .collect();
539 detailed_description.push_str(&diff_details.join(", "));
540 }
541
542 let likely_cause =
543 self.diagnose_enhanced_conflict_cause(&law_a, &law_b, score_a, score_b, diff_results);
544 let resolution_suggestion =
545 self.suggest_enhanced_conflict_resolution(&law_a, &law_b, &conflict_type, diff_results);
546
547 Conflict {
548 conflict_type,
549 laws_involved: vec![law_a, law_b],
550 conflict_score,
551 description: detailed_description,
552 likely_cause,
553 resolution_suggestion,
554 }
555 }
556
557 fn get_confidence_level(&self, score: f64) -> String {
559 match score {
560 s if s >= 0.8 => "high".to_string(),
561 s if s >= 0.6 => "medium".to_string(),
562 s if s >= 0.4 => "low".to_string(),
563 _ => "very_low".to_string(),
564 }
565 }
566
567 fn categorize_score(&self, score: f64) -> String {
569 match score {
570 s if s >= 0.9 => "excellent".to_string(),
571 s if s >= 0.7 => "good".to_string(),
572 s if s >= 0.5 => "fair".to_string(),
573 s if s >= 0.3 => "poor".to_string(),
574 _ => "very_poor".to_string(),
575 }
576 }
577
578 fn get_relative_rank(&self, law_name: &str) -> usize {
580 let mut scores: Vec<(String, f64)> = self
581 .law_scores
582 .iter()
583 .map(|(name, &score)| (name.clone(), score))
584 .collect();
585 scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
586
587 scores
588 .iter()
589 .position(|(name, _)| name == law_name)
590 .unwrap_or(0)
591 + 1
592 }
593
594 fn diagnose_enhanced_conflict_cause(
596 &self,
597 law_a: &str,
598 law_b: &str,
599 score_a: f64,
600 score_b: f64,
601 diff_results: &[DiffResult],
602 ) -> String {
603 let mut cause = self.diagnose_conflict_cause(law_a, law_b, score_a, score_b);
604
605 if !diff_results.is_empty() {
606 cause.push_str(" Additionally, structural analysis reveals: ");
607 let structural_issues: Vec<String> = diff_results
608 .iter()
609 .filter_map(|result| {
610 if let DiffResult::Modified(path, _, _) = result {
611 if path.contains("confidence_level") {
612 Some("confidence level mismatch".to_string())
613 } else if path.contains("score_category") {
614 Some("score category divergence".to_string())
615 } else {
616 None
617 }
618 } else {
619 None
620 }
621 })
622 .collect();
623 cause.push_str(&structural_issues.join(", "));
624 }
625
626 cause
627 }
628
629 fn suggest_enhanced_conflict_resolution(
631 &self,
632 law_a: &str,
633 law_b: &str,
634 conflict_type: &ConflictType,
635 diff_results: &[DiffResult],
636 ) -> String {
637 let mut suggestion = self.suggest_conflict_resolution(law_a, law_b, conflict_type);
638
639 if !diff_results.is_empty() {
640 suggestion.push_str(" Consider deep structural analysis of data characteristics affecting confidence levels and score categories.");
641 }
642
643 suggestion
644 }
645
646 #[allow(dead_code)]
647 fn create_conflict(
648 &self,
649 law_a: String,
650 law_b: String,
651 conflict_score: f64,
652 score_a: f64,
653 score_b: f64,
654 ) -> Conflict {
655 let conflict_type = self.classify_conflict_type(&law_a, &law_b);
656 let description = format!(
657 "{} and {} show significantly different evaluations (difference: {:.3})",
658 get_law_display_name(&law_a),
659 get_law_display_name(&law_b),
660 (score_a - score_b).abs()
661 );
662 let likely_cause = self.diagnose_conflict_cause(&law_a, &law_b, score_a, score_b);
663 let resolution_suggestion =
664 self.suggest_conflict_resolution(&law_a, &law_b, &conflict_type);
665
666 Conflict {
667 conflict_type,
668 laws_involved: vec![law_a, law_b],
669 conflict_score,
670 description,
671 likely_cause,
672 resolution_suggestion,
673 }
674 }
675
676 fn classify_conflict_type(&self, law_a: &str, law_b: &str) -> ConflictType {
677 match (law_a, law_b) {
678 ("normal", "poisson") | ("poisson", "normal") => ConflictType::DistributionMismatch,
679 ("benf", _) | (_, "benf") => ConflictType::QualityDisagreement,
680 ("pareto", "zipf") | ("zipf", "pareto") => ConflictType::ScaleIncompatibility,
681 _ => ConflictType::MethodologicalConflict,
682 }
683 }
684
685 fn diagnose_conflict_cause(
686 &self,
687 law_a: &str,
688 law_b: &str,
689 score_a: f64,
690 score_b: f64,
691 ) -> String {
692 match (&self.data_characteristics.data_type, law_a, law_b) {
693 (DataType::Discrete, "normal", "poisson") if score_a < score_b => {
694 "Normal distribution applied to discrete data".to_string()
695 }
696 (DataType::Continuous, "poisson", "normal") if score_a < score_b => {
697 "Poisson distribution applied to continuous data".to_string()
698 }
699 (_, "benf", _) if score_a > score_b => {
700 "Data shows naturalness but different distribution characteristics".to_string()
701 }
702 _ => "Laws have different applicability ranges due to complex data characteristics"
703 .to_string(),
704 }
705 }
706
707 fn suggest_conflict_resolution(
708 &self,
709 _law_a: &str,
710 _law_b: &str,
711 conflict_type: &ConflictType,
712 ) -> String {
713 match conflict_type {
714 ConflictType::DistributionMismatch => {
715 "Select the optimal distribution for your data type".to_string()
716 }
717 ConflictType::QualityDisagreement => {
718 "For quality auditing, prioritize Benford's Law".to_string()
719 }
720 ConflictType::ScaleIncompatibility => {
721 "Check the scale characteristics of your data".to_string()
722 }
723 _ => "Use multiple laws in combination for comprehensive analysis".to_string(),
724 }
725 }
726
727 fn generate_recommendations(&mut self) {
728 let scored_laws = self.score_laws_for_recommendation();
729
730 if scored_laws.is_empty() {
731 self.recommendations = Recommendation::empty();
732 self.recommendation_confidence = 0.0;
733 return;
734 }
735
736 let primary_law = scored_laws[0].0.clone();
737 let secondary_laws: Vec<String> = scored_laws
738 .iter()
739 .skip(1)
740 .take(2)
741 .map(|(law, _)| law.clone())
742 .collect();
743
744 let confidence = self.calculate_recommendation_confidence(&scored_laws);
745 let rationale = self.generate_recommendation_rationale(&primary_law, &secondary_laws);
746 let alternatives = self.generate_alternative_combinations();
747
748 self.recommendations = Recommendation {
749 primary_law,
750 secondary_laws,
751 confidence,
752 rationale,
753 alternative_combinations: alternatives,
754 };
755
756 self.recommendation_confidence = confidence;
757 }
758
759 fn score_laws_for_recommendation(&self) -> Vec<(String, f64)> {
760 let mut scored_laws = Vec::new();
761 let weights = self.get_adaptive_weights();
762
763 for (law, &base_score) in &self.law_scores {
764 let weight = weights.get(law).unwrap_or(&1.0);
765 let compatibility_bonus = self.calculate_compatibility_bonus(law);
766 let purpose_bonus = self.calculate_purpose_bonus(law);
767
768 let total_score = base_score * weight + compatibility_bonus + purpose_bonus;
769 scored_laws.push((law.clone(), total_score));
770 }
771
772 scored_laws.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
773 scored_laws
774 }
775
776 fn get_adaptive_weights(&self) -> HashMap<String, f64> {
777 let mut weights = HashMap::new();
778
779 weights.insert("benf".to_string(), 1.0);
781 weights.insert("pareto".to_string(), 1.0);
782 weights.insert("zipf".to_string(), 1.0);
783 weights.insert("normal".to_string(), 1.0);
784 weights.insert("poisson".to_string(), 1.0);
785
786 match self.data_characteristics.data_type {
788 DataType::Continuous => {
789 weights.insert("normal".to_string(), 1.5);
790 weights.insert("poisson".to_string(), 0.5);
791 }
792 DataType::Discrete => {
793 weights.insert("poisson".to_string(), 1.5);
794 weights.insert("normal".to_string(), 0.5);
795 }
796 DataType::Integer => {
797 weights.insert("poisson".to_string(), 1.3);
798 weights.insert("normal".to_string(), 0.7);
799 }
800 _ => {}
801 }
802
803 match self.data_characteristics.analysis_purpose {
805 AnalysisPurpose::QualityAudit | AnalysisPurpose::FraudDetection => {
806 weights.insert("benf".to_string(), 2.0);
807 }
808 AnalysisPurpose::ConcentrationAnalysis => {
809 weights.insert("pareto".to_string(), 2.0);
810 weights.insert("zipf".to_string(), 1.5);
811 }
812 AnalysisPurpose::AnomalyDetection => {
813 weights.insert("normal".to_string(), 1.8);
814 weights.insert("poisson".to_string(), 1.5);
815 }
816 _ => {}
817 }
818
819 weights
820 }
821
822 fn calculate_compatibility_bonus(&self, law: &str) -> f64 {
823 match (law, &self.data_characteristics.data_type) {
824 ("normal", DataType::Continuous) => 0.2,
825 ("poisson", DataType::Discrete) => 0.2,
826 ("poisson", DataType::Integer) => 0.15,
827 ("benf", _) => 0.1, _ => 0.0,
829 }
830 }
831
832 fn calculate_purpose_bonus(&self, law: &str) -> f64 {
833 match (law, &self.data_characteristics.analysis_purpose) {
834 ("benf", AnalysisPurpose::QualityAudit) => 0.3,
835 ("benf", AnalysisPurpose::FraudDetection) => 0.3,
836 ("pareto", AnalysisPurpose::ConcentrationAnalysis) => 0.25,
837 ("normal", AnalysisPurpose::AnomalyDetection) => 0.25,
838 ("poisson", AnalysisPurpose::AnomalyDetection) => 0.2,
839 _ => 0.0,
840 }
841 }
842
843 fn calculate_recommendation_confidence(&self, scored_laws: &[(String, f64)]) -> f64 {
844 if scored_laws.len() < 2 {
845 return 0.5;
846 }
847
848 let top_score = scored_laws[0].1;
849 let second_score = scored_laws[1].1;
850
851 let score_gap = top_score - second_score;
852 let consistency_factor = self.consistency_score;
853 let conflict_penalty = self.conflicts_detected as f64 * 0.1;
854
855 ((score_gap + consistency_factor) / 2.0 - conflict_penalty).clamp(0.1, 1.0)
856 }
857
858 fn generate_recommendation_rationale(&self, primary: &str, secondary: &[String]) -> String {
859 let primary_reason = match primary {
860 "benf" => "excellent data naturalness and quality",
861 "pareto" => "optimal for concentration analysis",
862 "zipf" => "good fit for frequency distribution characteristics",
863 "normal" => "normality confirmed",
864 "poisson" => "matches event occurrence patterns",
865 _ => "high overall compatibility",
866 };
867
868 let secondary_reason = if !secondary.is_empty() {
869 format!(
870 ", complementary analysis possible with {}",
871 secondary.join(" and ")
872 )
873 } else {
874 String::new()
875 };
876
877 format!("{primary_reason}{secondary_reason}")
878 }
879
880 fn generate_alternative_combinations(&self) -> Vec<LawCombination> {
881 let mut combinations = Vec::new();
882
883 if self.law_scores.contains_key("benf") && self.law_scores.contains_key("normal") {
885 combinations.push(LawCombination {
886 laws: vec!["benf".to_string(), "normal".to_string()],
887 purpose: "Quality Audit".to_string(),
888 effectiveness_score: 0.85,
889 description: "Benford's Law for naturalness, Normal distribution for statistical quality assessment".to_string(),
890 });
891 }
892
893 if self.law_scores.contains_key("pareto") && self.law_scores.contains_key("zipf") {
895 combinations.push(LawCombination {
896 laws: vec!["pareto".to_string(), "zipf".to_string()],
897 purpose: "Concentration Analysis".to_string(),
898 effectiveness_score: 0.8,
899 description:
900 "Pareto principle for 80/20 rule, Zipf's Law for rank distribution analysis"
901 .to_string(),
902 });
903 }
904
905 if self.law_scores.contains_key("normal") && self.law_scores.contains_key("poisson") {
907 combinations.push(LawCombination {
908 laws: vec!["normal".to_string(), "poisson".to_string()],
909 purpose: "Anomaly Detection".to_string(),
910 effectiveness_score: 0.75,
911 description: "Normal distribution for outliers, Poisson distribution for rare event detection".to_string(),
912 });
913 }
914
915 combinations
916 }
917
918 fn assess_overall_quality(&mut self) {
919 let high_quality_count = self
920 .law_scores
921 .values()
922 .filter(|&&score| score > 0.8)
923 .count();
924
925 let low_quality_count = self
926 .law_scores
927 .values()
928 .filter(|&&score| score < 0.4)
929 .count();
930
931 let total_laws = self.law_scores.len();
932
933 self.overall_assessment = match (high_quality_count, low_quality_count, total_laws) {
934 (h, 0, t) if h == t => OverallAssessment::Excellent,
935 (h, l, t) if h >= t * 2 / 3 && l == 0 => OverallAssessment::Good,
936 (_, l, t) if l >= t / 2 => OverallAssessment::Problematic,
937 (_, l, _) if l > 0 && self.conflicts_detected > 2 => OverallAssessment::Concerning,
938 _ => OverallAssessment::Mixed,
939 };
940 }
941
942 fn determine_risk_level(&mut self) {
943 self.risk_level = match self.overall_assessment {
944 OverallAssessment::Excellent => RiskLevel::Low,
945 OverallAssessment::Good => RiskLevel::Low,
946 OverallAssessment::Mixed => RiskLevel::Medium,
947 OverallAssessment::Concerning => RiskLevel::High,
948 OverallAssessment::Problematic => RiskLevel::Critical,
949 };
950 }
951}
952
953impl Recommendation {
954 pub fn empty() -> Self {
955 Self {
956 primary_law: String::new(),
957 secondary_laws: Vec::new(),
958 confidence: 0.0,
959 rationale: String::new(),
960 alternative_combinations: Vec::new(),
961 }
962 }
963}
964
965impl DataCharacteristics {
966 pub fn analyze(numbers: &[f64]) -> Self {
967 let data_type = detect_data_type(numbers);
968 let distribution_shape = detect_distribution_shape(numbers);
969 let outlier_presence = detect_outliers(numbers);
970 let scale_range = detect_scale_range(numbers);
971 let sample_size_category = categorize_sample_size(numbers.len());
972
973 Self {
974 data_type,
975 distribution_shape,
976 outlier_presence,
977 scale_range,
978 analysis_purpose: AnalysisPurpose::GeneralAnalysis, sample_size_category,
980 }
981 }
982}
983
984#[derive(Debug, Clone)]
986pub enum LawResult {
987 Benford(BenfordResult),
988 Pareto(ParetoResult),
989 Zipf(ZipfResult),
990 Normal(NormalResult),
991 Poisson(PoissonResult),
992}
993
994fn detect_data_type(numbers: &[f64]) -> DataType {
997 let all_integers = numbers.iter().all(|&x| x.fract() == 0.0);
998 let all_non_negative = numbers.iter().all(|&x| x >= 0.0);
999
1000 if all_integers && all_non_negative {
1001 DataType::Integer
1002 } else if all_integers {
1003 DataType::Discrete
1004 } else {
1005 DataType::Continuous
1006 }
1007}
1008
1009fn detect_distribution_shape(numbers: &[f64]) -> DistributionShape {
1010 if numbers.len() < 10 {
1011 return DistributionShape::Unknown;
1012 }
1013
1014 let mean = numbers.iter().sum::<f64>() / numbers.len() as f64;
1015 let variance =
1016 numbers.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (numbers.len() - 1) as f64;
1017
1018 let skewness = calculate_skewness(numbers, mean, variance.sqrt());
1020
1021 if skewness.abs() < 0.5 {
1022 DistributionShape::Normal
1023 } else if skewness > 1.0 {
1024 DistributionShape::Skewed
1025 } else {
1026 DistributionShape::Unknown
1027 }
1028}
1029
1030fn calculate_skewness(numbers: &[f64], mean: f64, std_dev: f64) -> f64 {
1031 if std_dev == 0.0 {
1032 return 0.0;
1033 }
1034
1035 let n = numbers.len() as f64;
1036 let sum_cubed_deviations = numbers
1037 .iter()
1038 .map(|x| ((x - mean) / std_dev).powi(3))
1039 .sum::<f64>();
1040
1041 sum_cubed_deviations / n
1042}
1043
1044fn detect_outliers(numbers: &[f64]) -> OutlierLevel {
1045 if numbers.len() < 10 {
1046 return OutlierLevel::None;
1047 }
1048
1049 let mut sorted_numbers = numbers.to_vec();
1050 sorted_numbers.sort_by(|a, b| a.partial_cmp(b).unwrap());
1051
1052 let q1_idx = sorted_numbers.len() / 4;
1053 let q3_idx = (sorted_numbers.len() * 3) / 4;
1054
1055 let q1 = sorted_numbers[q1_idx];
1056 let q3 = sorted_numbers[q3_idx];
1057 let iqr = q3 - q1;
1058
1059 let lower_bound = q1 - 1.5 * iqr;
1060 let upper_bound = q3 + 1.5 * iqr;
1061
1062 let outlier_count = numbers
1063 .iter()
1064 .filter(|&&x| x < lower_bound || x > upper_bound)
1065 .count();
1066
1067 let outlier_ratio = outlier_count as f64 / numbers.len() as f64;
1068
1069 match outlier_ratio {
1070 0.0 => OutlierLevel::None,
1071 r if r < 0.05 => OutlierLevel::Low,
1072 r if r < 0.1 => OutlierLevel::Moderate,
1073 r if r < 0.2 => OutlierLevel::High,
1074 _ => OutlierLevel::Extreme,
1075 }
1076}
1077
1078fn detect_scale_range(numbers: &[f64]) -> ScaleRange {
1079 if numbers.is_empty() {
1080 return ScaleRange::Narrow;
1081 }
1082
1083 let min_val = numbers.iter().fold(f64::INFINITY, |a, &b| a.min(b));
1084 let max_val = numbers.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
1085
1086 if min_val <= 0.0 || max_val <= 0.0 {
1087 return ScaleRange::Mixed;
1088 }
1089
1090 let range_ratio = max_val / min_val;
1091
1092 match range_ratio {
1093 r if r < 100.0 => ScaleRange::Narrow, r if r < 10000.0 => ScaleRange::Medium, _ => ScaleRange::Wide, }
1097}
1098
1099fn categorize_sample_size(size: usize) -> SampleSizeCategory {
1100 match size {
1101 0..=29 => SampleSizeCategory::Small,
1102 30..=299 => SampleSizeCategory::Medium,
1103 300..=2999 => SampleSizeCategory::Large,
1104 _ => SampleSizeCategory::VeryLarge,
1105 }
1106}