1use crate::common::risk::RiskLevel;
2use crate::diffx_core_mock::{diff, DiffResult};
3use crate::laws::benford::BenfordResult;
4use crate::laws::normal::NormalResult;
5use crate::laws::pareto::ParetoResult;
6use crate::laws::poisson::PoissonResult;
7use crate::laws::zipf::ZipfResult;
8use std::collections::HashMap;
9
10fn get_law_display_name(law: &str) -> &str {
12 match law {
13 "benf" => "Benford Law",
14 "pareto" => "Pareto Principle",
15 "zipf" => "Zipf Law",
16 "normal" => "Normal Distribution",
17 "poisson" => "Poisson Distribution",
18 _ => law,
19 }
20}
21
22#[derive(Debug, Clone)]
24pub struct IntegrationResult {
25 pub dataset_name: String,
26 pub numbers_analyzed: usize,
27 pub laws_executed: Vec<String>,
28
29 pub overall_quality_score: f64, pub consistency_score: f64, pub conflicts_detected: usize, pub recommendation_confidence: f64, pub benford_result: Option<BenfordResult>,
37 pub pareto_result: Option<ParetoResult>,
38 pub zipf_result: Option<ZipfResult>,
39 pub normal_result: Option<NormalResult>,
40 pub poisson_result: Option<PoissonResult>,
41
42 pub law_scores: HashMap<String, f64>, pub conflicts: Vec<Conflict>, pub recommendations: Recommendation, pub data_characteristics: DataCharacteristics, pub overall_assessment: OverallAssessment,
50 pub risk_level: RiskLevel,
51
52 pub focus: Option<String>, }
55
56#[derive(Debug, Clone)]
58pub struct Conflict {
59 pub conflict_type: ConflictType,
60 pub laws_involved: Vec<String>,
61 pub conflict_score: f64, pub description: String,
63 pub likely_cause: String,
64 pub resolution_suggestion: String,
65}
66
67#[derive(Debug, Clone, PartialEq, Eq, Hash)]
69pub enum ConflictType {
70 DistributionMismatch, QualityDisagreement, RiskLevelConflict, ScaleIncompatibility, MethodologicalConflict, ScoreDeviation, UnexpectedConsistency, }
78
79#[derive(Debug, Clone)]
81pub struct Recommendation {
82 pub primary_law: String, pub secondary_laws: Vec<String>, pub confidence: f64, pub rationale: String, pub alternative_combinations: Vec<LawCombination>, }
88
89#[derive(Debug, Clone)]
91pub struct LawCombination {
92 pub laws: Vec<String>,
93 pub purpose: String,
94 pub effectiveness_score: f64,
95 pub description: String,
96}
97
98#[derive(Debug, Clone)]
100pub struct DataCharacteristics {
101 pub data_type: DataType,
102 pub distribution_shape: DistributionShape,
103 pub outlier_presence: OutlierLevel,
104 pub scale_range: ScaleRange,
105 pub analysis_purpose: AnalysisPurpose,
106 pub sample_size_category: SampleSizeCategory,
107}
108
109#[derive(Debug, Clone, PartialEq)]
111pub enum DataType {
112 Continuous, Discrete, Mixed, Integer, Unknown, }
118
119#[derive(Debug, Clone, PartialEq)]
121pub enum DistributionShape {
122 Normal, Skewed, Multimodal, PowerLaw, Exponential, Uniform, Unknown, }
130
131#[derive(Debug, Clone, PartialEq)]
133pub enum OutlierLevel {
134 None, Low, Moderate, High, Extreme, }
140
141#[derive(Debug, Clone, PartialEq)]
143pub enum ScaleRange {
144 Narrow, Medium, Wide, Mixed, }
149
150#[derive(Debug, Clone, PartialEq)]
152pub enum AnalysisPurpose {
153 QualityAudit, FraudDetection, ConcentrationAnalysis, AnomalyDetection, DistributionFitting, GeneralAnalysis, }
160
161#[derive(Debug, Clone, PartialEq)]
163pub enum SampleSizeCategory {
164 Small, Medium, Large, VeryLarge, }
169
170#[derive(Debug, Clone, PartialEq)]
172pub enum OverallAssessment {
173 Excellent, Good, Mixed, Concerning, Problematic, }
179
180impl IntegrationResult {
181 pub fn new(dataset_name: String, numbers: &[f64]) -> Self {
182 Self {
183 dataset_name,
184 numbers_analyzed: numbers.len(),
185 laws_executed: Vec::new(),
186 overall_quality_score: 0.0,
187 consistency_score: 0.0,
188 conflicts_detected: 0,
189 recommendation_confidence: 0.0,
190 benford_result: None,
191 pareto_result: None,
192 zipf_result: None,
193 normal_result: None,
194 poisson_result: None,
195 law_scores: HashMap::new(),
196 conflicts: Vec::new(),
197 recommendations: Recommendation::empty(),
198 data_characteristics: DataCharacteristics::analyze(numbers),
199 overall_assessment: OverallAssessment::Mixed,
200 risk_level: RiskLevel::Medium,
201 focus: None,
202 }
203 }
204
205 pub fn add_law_result(&mut self, law_name: &str, result: LawResult) {
207 match law_name {
208 "benf" => {
209 if let LawResult::Benford(r) = result {
210 let score = 1.0 - (r.mean_absolute_deviation / 100.0);
214 self.law_scores.insert("benf".to_string(), score);
215 self.benford_result = Some(r);
216 }
217 }
218 "pareto" => {
219 if let LawResult::Pareto(r) = result {
220 self.law_scores
221 .insert("pareto".to_string(), r.concentration_index);
222 self.pareto_result = Some(r);
223 }
224 }
225 "zipf" => {
226 if let LawResult::Zipf(r) = result {
227 self.law_scores
228 .insert("zipf".to_string(), r.distribution_quality);
229 self.zipf_result = Some(r);
230 }
231 }
232 "normal" => {
233 if let LawResult::Normal(r) = result {
234 self.law_scores
235 .insert("normal".to_string(), r.normality_score);
236 self.normal_result = Some(r);
237 }
238 }
239 "poisson" => {
240 if let LawResult::Poisson(r) = result {
241 self.law_scores
242 .insert("poisson".to_string(), r.goodness_of_fit_score);
243 self.poisson_result = Some(r);
244 }
245 }
246 _ => {}
247 }
248
249 if !self.laws_executed.contains(&law_name.to_string()) {
250 self.laws_executed.push(law_name.to_string());
251 }
252 }
253
254 pub fn finalize_analysis(&mut self) {
256 self.calculate_overall_quality_score();
257 self.calculate_consistency_score();
258 self.detect_conflicts();
259 self.generate_recommendations();
260 self.assess_overall_quality();
261 self.determine_risk_level();
262 }
263
264 fn calculate_overall_quality_score(&mut self) {
265 if self.law_scores.is_empty() {
266 self.overall_quality_score = 0.0;
267 return;
268 }
269
270 let weights = self.get_adaptive_weights();
271 let mut weighted_sum = 0.0;
272 let mut total_weight = 0.0;
273
274 for (law, score) in &self.law_scores {
275 if let Some(&weight) = weights.get(law) {
276 weighted_sum += score * weight;
277 total_weight += weight;
278 }
279 }
280
281 self.overall_quality_score = if total_weight > 0.0 {
282 weighted_sum / total_weight
283 } else {
284 0.0
285 };
286 }
287
288 fn calculate_consistency_score(&mut self) {
289 if self.law_scores.len() < 2 {
290 self.consistency_score = 1.0;
291 return;
292 }
293
294 let scores: Vec<f64> = self.law_scores.values().cloned().collect();
295 let mean_score: f64 = scores.iter().sum::<f64>() / scores.len() as f64;
296
297 let variance: f64 = scores
298 .iter()
299 .map(|score| (score - mean_score).powi(2))
300 .sum::<f64>()
301 / scores.len() as f64;
302
303 let max_variance = 1.0;
305 self.consistency_score = 1.0 - (variance / max_variance).min(1.0);
306 }
307
308 fn detect_conflicts(&mut self) {
309 self.conflicts.clear();
310
311 self.detect_conflicts_with_diffx();
313
314 self.detect_score_conflicts();
316
317 self.conflicts_detected = self.conflicts.len();
318 }
319
320 fn detect_conflicts_with_diffx(&mut self) {
322 if self.law_scores.is_empty() {
323 return;
324 }
325
326 let average_score: f64 =
328 self.law_scores.values().sum::<f64>() / self.law_scores.len() as f64;
329 let mut expected_scores = HashMap::new();
330
331 for law in self.law_scores.keys() {
332 expected_scores.insert(law.clone(), average_score);
333 }
334
335 let expected_json = serde_json::to_value(&expected_scores).unwrap_or_default();
337 let actual_json = serde_json::to_value(&self.law_scores).unwrap_or_default();
338
339 let diff_results = diff(&expected_json, &actual_json, None);
341
342 let results = match diff_results {
343 Ok(results) => results,
344 Err(_) => return, };
346
347 if results.is_empty() {
348 if self.law_scores.len() > 1 {
350 let conflict = Conflict {
351 conflict_type: ConflictType::UnexpectedConsistency,
352 laws_involved: self.law_scores.keys().cloned().collect(),
353 conflict_score: 0.6,
354 description:
355 "All statistical laws show identical scores, indicating potential data or analysis issues"
356 .to_string(),
357 likely_cause: "Insufficient data diversity or analysis algorithm issues".to_string(),
358 resolution_suggestion: "Please review data quality and analysis methods".to_string(),
359 };
360 self.conflicts.push(conflict);
361 }
362 } else {
363 for diff_result in &results {
365 match diff_result {
366 DiffResult::Modified(path, expected_val, actual_val) => {
367 if let (Some(expected), Some(actual)) =
368 (expected_val.as_f64(), actual_val.as_f64())
369 {
370 let deviation = (actual - expected).abs() / expected.max(0.01);
371
372 if deviation > 0.3 {
373 let law_name = path.trim_start_matches('"').trim_end_matches('"');
375 let conflict = Conflict {
376 conflict_type: ConflictType::ScoreDeviation,
377 laws_involved: vec![law_name.to_string()],
378 conflict_score: deviation.min(1.0),
379 description: format!(
380 "{} score {:.3} significantly deviates from expected {:.3} - deviation {:.1}%",
381 get_law_display_name(law_name), actual, expected, deviation * 100.0
382 ),
383 likely_cause: format!(
384 "{} may not be compatible with the data pattern",
385 get_law_display_name(law_name)
386 ),
387 resolution_suggestion: format!(
388 "Please review application conditions and data quality for {}",
389 get_law_display_name(law_name)
390 ),
391 };
392 self.conflicts.push(conflict);
393 }
394 }
395 }
396 DiffResult::Added(path, _val) | DiffResult::Removed(path, _val) => {
397 let law_name = path.trim_start_matches('"').trim_end_matches('"');
399 let conflict = Conflict {
400 conflict_type: ConflictType::MethodologicalConflict,
401 laws_involved: vec![law_name.to_string()],
402 conflict_score: 0.5,
403 description: format!(
404 "Unexpected change detected for {}",
405 get_law_display_name(law_name)
406 ),
407 likely_cause: "Analysis configuration or law selection inconsistency"
408 .to_string(),
409 resolution_suggestion: "Please verify the analysis target law settings"
410 .to_string(),
411 };
412 self.conflicts.push(conflict);
413 }
414 DiffResult::TypeChanged(path, _old, _new) => {
415 let law_name = path.trim_start_matches('"').trim_end_matches('"');
417 let conflict = Conflict {
418 conflict_type: ConflictType::MethodologicalConflict,
419 laws_involved: vec![law_name.to_string()],
420 conflict_score: 0.8,
421 description: format!(
422 "Score type changed for {}",
423 get_law_display_name(law_name)
424 ),
425 likely_cause: "Internal analysis error or data corruption".to_string(),
426 resolution_suggestion: "Please re-run the analysis".to_string(),
427 };
428 self.conflicts.push(conflict);
429 }
430 DiffResult::Unchanged => {
431 }
433 }
434 }
435 }
436 }
437
438 fn detect_score_conflicts(&mut self) {
440 let laws: Vec<String> = self.law_scores.keys().cloned().collect();
441
442 for i in 0..laws.len() {
444 for j in i + 1..laws.len() {
445 let law_a = &laws[i];
446 let law_b = &laws[j];
447
448 if let (Some(&score_a), Some(&score_b)) =
449 (self.law_scores.get(law_a), self.law_scores.get(law_b))
450 {
451 let law_a_profile = serde_json::json!({
453 "law_name": law_a,
454 "score": score_a,
455 "confidence_level": self.get_confidence_level(score_a),
456 "score_category": self.categorize_score(score_a),
457 "relative_rank": self.get_relative_rank(law_a)
458 });
459
460 let law_b_profile = serde_json::json!({
462 "law_name": law_b,
463 "score": score_b,
464 "confidence_level": self.get_confidence_level(score_b),
465 "score_category": self.categorize_score(score_b),
466 "relative_rank": self.get_relative_rank(law_b)
467 });
468
469 let diff_results = diff(&law_a_profile, &law_b_profile, None);
471
472 let score_diff = (score_a - score_b).abs();
474 let max_score = score_a.max(score_b);
475
476 if max_score > 0.0 {
477 let conflict_ratio = score_diff / max_score;
478
479 let has_structural_conflict = match &diff_results {
481 Ok(results) => {
482 !results.is_empty()
483 && results.iter().any(|result| {
484 if let DiffResult::Modified(path, _old_val, _new_val) =
485 result
486 {
487 if path.contains("confidence_level")
488 || path.contains("score_category")
489 {
490 return true;
491 }
492 }
493 false
494 })
495 }
496 Err(_) => false,
497 };
498
499 if conflict_ratio > 0.5 || has_structural_conflict {
500 let enhanced_conflict_score = if has_structural_conflict {
501 conflict_ratio * 1.5 } else {
503 conflict_ratio
504 };
505
506 let conflict = match &diff_results {
507 Ok(results) => self.create_enhanced_conflict(
508 law_a.clone(),
509 law_b.clone(),
510 enhanced_conflict_score.min(1.0),
511 score_a,
512 score_b,
513 results,
514 ),
515 Err(_) => Conflict {
516 conflict_type: self.classify_conflict_type(law_a, law_b),
517 laws_involved: vec![law_a.clone(), law_b.clone()],
518 conflict_score: enhanced_conflict_score.min(1.0),
519 description: format!(
520 "Enhanced conflict detected between {} and {} (score: {:.3}, diff analysis failed)",
521 law_a, law_b, enhanced_conflict_score.min(1.0)
522 ),
523 likely_cause: "Diff analysis failed".to_string(),
524 resolution_suggestion: "Review data or retry analysis".to_string(),
525 },
526 };
527 self.conflicts.push(conflict);
528 }
529 }
530 }
531 }
532 }
533 }
534
535 fn create_enhanced_conflict(
537 &self,
538 law_a: String,
539 law_b: String,
540 conflict_score: f64,
541 score_a: f64,
542 score_b: f64,
543 diff_results: &[DiffResult],
544 ) -> Conflict {
545 let conflict_type = self.classify_conflict_type(&law_a, &law_b);
546
547 let mut detailed_description = format!(
549 "{} and {} show significantly different evaluations (difference: {:.3})",
550 get_law_display_name(&law_a),
551 get_law_display_name(&law_b),
552 (score_a - score_b).abs()
553 );
554
555 if !diff_results.is_empty() {
556 detailed_description.push_str(" with structural differences in: ");
557 let diff_details: Vec<String> = diff_results
558 .iter()
559 .filter_map(|result| {
560 if let DiffResult::Modified(path, old_val, new_val) = result {
561 Some(format!("{path} ({old_val:?} → {new_val:?})"))
562 } else {
563 None
564 }
565 })
566 .collect();
567 detailed_description.push_str(&diff_details.join(", "));
568 }
569
570 let likely_cause =
571 self.diagnose_enhanced_conflict_cause(&law_a, &law_b, score_a, score_b, diff_results);
572 let resolution_suggestion =
573 self.suggest_enhanced_conflict_resolution(&law_a, &law_b, &conflict_type, diff_results);
574
575 Conflict {
576 conflict_type,
577 laws_involved: vec![law_a, law_b],
578 conflict_score,
579 description: detailed_description,
580 likely_cause,
581 resolution_suggestion,
582 }
583 }
584
585 fn get_confidence_level(&self, score: f64) -> String {
587 match score {
588 s if s >= 0.8 => "high".to_string(),
589 s if s >= 0.6 => "medium".to_string(),
590 s if s >= 0.4 => "low".to_string(),
591 _ => "very_low".to_string(),
592 }
593 }
594
595 fn categorize_score(&self, score: f64) -> String {
597 match score {
598 s if s >= 0.9 => "excellent".to_string(),
599 s if s >= 0.7 => "good".to_string(),
600 s if s >= 0.5 => "fair".to_string(),
601 s if s >= 0.3 => "poor".to_string(),
602 _ => "very_poor".to_string(),
603 }
604 }
605
606 fn get_relative_rank(&self, law_name: &str) -> usize {
608 let mut scores: Vec<(String, f64)> = self
609 .law_scores
610 .iter()
611 .map(|(name, &score)| (name.clone(), score))
612 .collect();
613 scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
614
615 scores
616 .iter()
617 .position(|(name, _)| name == law_name)
618 .unwrap_or(0)
619 + 1
620 }
621
622 fn diagnose_enhanced_conflict_cause(
624 &self,
625 law_a: &str,
626 law_b: &str,
627 score_a: f64,
628 score_b: f64,
629 diff_results: &[DiffResult],
630 ) -> String {
631 let mut cause = self.diagnose_conflict_cause(law_a, law_b, score_a, score_b);
632
633 if !diff_results.is_empty() {
634 cause.push_str(" Additionally, structural analysis reveals: ");
635 let structural_issues: Vec<String> = diff_results
636 .iter()
637 .filter_map(|result| {
638 if let DiffResult::Modified(path, _, _) = result {
639 if path.contains("confidence_level") {
640 Some("confidence level mismatch".to_string())
641 } else if path.contains("score_category") {
642 Some("score category divergence".to_string())
643 } else {
644 None
645 }
646 } else {
647 None
648 }
649 })
650 .collect();
651 cause.push_str(&structural_issues.join(", "));
652 }
653
654 cause
655 }
656
657 fn suggest_enhanced_conflict_resolution(
659 &self,
660 law_a: &str,
661 law_b: &str,
662 conflict_type: &ConflictType,
663 diff_results: &[DiffResult],
664 ) -> String {
665 let mut suggestion = self.suggest_conflict_resolution(law_a, law_b, conflict_type);
666
667 if !diff_results.is_empty() {
668 suggestion.push_str(" Consider deep structural analysis of data characteristics affecting confidence levels and score categories.");
669 }
670
671 suggestion
672 }
673
674 #[allow(dead_code)]
675 fn create_conflict(
676 &self,
677 law_a: String,
678 law_b: String,
679 conflict_score: f64,
680 score_a: f64,
681 score_b: f64,
682 ) -> Conflict {
683 let conflict_type = self.classify_conflict_type(&law_a, &law_b);
684 let description = format!(
685 "{} and {} show significantly different evaluations (difference: {:.3})",
686 get_law_display_name(&law_a),
687 get_law_display_name(&law_b),
688 (score_a - score_b).abs()
689 );
690 let likely_cause = self.diagnose_conflict_cause(&law_a, &law_b, score_a, score_b);
691 let resolution_suggestion =
692 self.suggest_conflict_resolution(&law_a, &law_b, &conflict_type);
693
694 Conflict {
695 conflict_type,
696 laws_involved: vec![law_a, law_b],
697 conflict_score,
698 description,
699 likely_cause,
700 resolution_suggestion,
701 }
702 }
703
704 fn classify_conflict_type(&self, law_a: &str, law_b: &str) -> ConflictType {
705 match (law_a, law_b) {
706 ("normal", "poisson") | ("poisson", "normal") => ConflictType::DistributionMismatch,
707 ("benf", _) | (_, "benf") => ConflictType::QualityDisagreement,
708 ("pareto", "zipf") | ("zipf", "pareto") => ConflictType::ScaleIncompatibility,
709 _ => ConflictType::MethodologicalConflict,
710 }
711 }
712
713 fn diagnose_conflict_cause(
714 &self,
715 law_a: &str,
716 law_b: &str,
717 score_a: f64,
718 score_b: f64,
719 ) -> String {
720 match (&self.data_characteristics.data_type, law_a, law_b) {
721 (DataType::Discrete, "normal", "poisson") if score_a < score_b => {
722 "Normal distribution applied to discrete data".to_string()
723 }
724 (DataType::Continuous, "poisson", "normal") if score_a < score_b => {
725 "Poisson distribution applied to continuous data".to_string()
726 }
727 (_, "benf", _) if score_a > score_b => {
728 "Data shows naturalness but different distribution characteristics".to_string()
729 }
730 _ => "Laws have different applicability ranges due to complex data characteristics"
731 .to_string(),
732 }
733 }
734
735 fn suggest_conflict_resolution(
736 &self,
737 _law_a: &str,
738 _law_b: &str,
739 conflict_type: &ConflictType,
740 ) -> String {
741 match conflict_type {
742 ConflictType::DistributionMismatch => {
743 "Select the optimal distribution for your data type".to_string()
744 }
745 ConflictType::QualityDisagreement => {
746 "For quality auditing, prioritize Benford's Law".to_string()
747 }
748 ConflictType::ScaleIncompatibility => {
749 "Check the scale characteristics of your data".to_string()
750 }
751 _ => "Use multiple laws in combination for comprehensive analysis".to_string(),
752 }
753 }
754
755 fn generate_recommendations(&mut self) {
756 let scored_laws = self.score_laws_for_recommendation();
757
758 if scored_laws.is_empty() {
759 self.recommendations = Recommendation::empty();
760 self.recommendation_confidence = 0.0;
761 return;
762 }
763
764 let primary_law = scored_laws[0].0.clone();
765 let secondary_laws: Vec<String> = scored_laws
766 .iter()
767 .skip(1)
768 .take(2)
769 .map(|(law, _)| law.clone())
770 .collect();
771
772 let confidence = self.calculate_recommendation_confidence(&scored_laws);
773 let rationale = self.generate_recommendation_rationale(&primary_law, &secondary_laws);
774 let alternatives = self.generate_alternative_combinations();
775
776 self.recommendations = Recommendation {
777 primary_law,
778 secondary_laws,
779 confidence,
780 rationale,
781 alternative_combinations: alternatives,
782 };
783
784 self.recommendation_confidence = confidence;
785 }
786
787 fn score_laws_for_recommendation(&self) -> Vec<(String, f64)> {
788 let mut scored_laws = Vec::new();
789 let weights = self.get_adaptive_weights();
790
791 for (law, &base_score) in &self.law_scores {
792 let weight = weights.get(law).unwrap_or(&1.0);
793 let compatibility_bonus = self.calculate_compatibility_bonus(law);
794 let purpose_bonus = self.calculate_purpose_bonus(law);
795
796 let total_score = base_score * weight + compatibility_bonus + purpose_bonus;
797 scored_laws.push((law.clone(), total_score));
798 }
799
800 scored_laws.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
801 scored_laws
802 }
803
804 fn get_adaptive_weights(&self) -> HashMap<String, f64> {
805 let mut weights = HashMap::new();
806
807 weights.insert("benf".to_string(), 1.0);
809 weights.insert("pareto".to_string(), 1.0);
810 weights.insert("zipf".to_string(), 1.0);
811 weights.insert("normal".to_string(), 1.0);
812 weights.insert("poisson".to_string(), 1.0);
813
814 match self.data_characteristics.data_type {
816 DataType::Continuous => {
817 weights.insert("normal".to_string(), 1.5);
818 weights.insert("poisson".to_string(), 0.5);
819 }
820 DataType::Discrete => {
821 weights.insert("poisson".to_string(), 1.5);
822 weights.insert("normal".to_string(), 0.5);
823 }
824 DataType::Integer => {
825 weights.insert("poisson".to_string(), 1.3);
826 weights.insert("normal".to_string(), 0.7);
827 }
828 _ => {}
829 }
830
831 match self.data_characteristics.analysis_purpose {
833 AnalysisPurpose::QualityAudit | AnalysisPurpose::FraudDetection => {
834 weights.insert("benf".to_string(), 2.0);
835 }
836 AnalysisPurpose::ConcentrationAnalysis => {
837 weights.insert("pareto".to_string(), 2.0);
838 weights.insert("zipf".to_string(), 1.5);
839 }
840 AnalysisPurpose::AnomalyDetection => {
841 weights.insert("normal".to_string(), 1.8);
842 weights.insert("poisson".to_string(), 1.5);
843 }
844 _ => {}
845 }
846
847 weights
848 }
849
850 fn calculate_compatibility_bonus(&self, law: &str) -> f64 {
851 match (law, &self.data_characteristics.data_type) {
852 ("normal", DataType::Continuous) => 0.2,
853 ("poisson", DataType::Discrete) => 0.2,
854 ("poisson", DataType::Integer) => 0.15,
855 ("benf", _) => 0.1, _ => 0.0,
857 }
858 }
859
860 fn calculate_purpose_bonus(&self, law: &str) -> f64 {
861 match (law, &self.data_characteristics.analysis_purpose) {
862 ("benf", AnalysisPurpose::QualityAudit) => 0.3,
863 ("benf", AnalysisPurpose::FraudDetection) => 0.3,
864 ("pareto", AnalysisPurpose::ConcentrationAnalysis) => 0.25,
865 ("normal", AnalysisPurpose::AnomalyDetection) => 0.25,
866 ("poisson", AnalysisPurpose::AnomalyDetection) => 0.2,
867 _ => 0.0,
868 }
869 }
870
871 fn calculate_recommendation_confidence(&self, scored_laws: &[(String, f64)]) -> f64 {
872 if scored_laws.len() < 2 {
873 return 0.5;
874 }
875
876 let top_score = scored_laws[0].1;
877 let second_score = scored_laws[1].1;
878
879 let score_gap = top_score - second_score;
880 let consistency_factor = self.consistency_score;
881 let conflict_penalty = self.conflicts_detected as f64 * 0.1;
882
883 ((score_gap + consistency_factor) / 2.0 - conflict_penalty).clamp(0.1, 1.0)
884 }
885
886 fn generate_recommendation_rationale(&self, primary: &str, secondary: &[String]) -> String {
887 let primary_reason = match primary {
888 "benf" => "excellent data naturalness and quality",
889 "pareto" => "optimal for concentration analysis",
890 "zipf" => "good fit for frequency distribution characteristics",
891 "normal" => "normality confirmed",
892 "poisson" => "matches event occurrence patterns",
893 _ => "high overall compatibility",
894 };
895
896 let secondary_reason = if !secondary.is_empty() {
897 format!(
898 ", complementary analysis possible with {}",
899 secondary.join(" and ")
900 )
901 } else {
902 String::new()
903 };
904
905 format!("{primary_reason}{secondary_reason}")
906 }
907
908 fn generate_alternative_combinations(&self) -> Vec<LawCombination> {
909 let mut combinations = Vec::new();
910
911 if self.law_scores.contains_key("benf") && self.law_scores.contains_key("normal") {
913 combinations.push(LawCombination {
914 laws: vec!["benf".to_string(), "normal".to_string()],
915 purpose: "Quality Audit".to_string(),
916 effectiveness_score: 0.85,
917 description: "Benford's Law for naturalness, Normal distribution for statistical quality assessment".to_string(),
918 });
919 }
920
921 if self.law_scores.contains_key("pareto") && self.law_scores.contains_key("zipf") {
923 combinations.push(LawCombination {
924 laws: vec!["pareto".to_string(), "zipf".to_string()],
925 purpose: "Concentration Analysis".to_string(),
926 effectiveness_score: 0.8,
927 description:
928 "Pareto principle for 80/20 rule, Zipf's Law for rank distribution analysis"
929 .to_string(),
930 });
931 }
932
933 if self.law_scores.contains_key("normal") && self.law_scores.contains_key("poisson") {
935 combinations.push(LawCombination {
936 laws: vec!["normal".to_string(), "poisson".to_string()],
937 purpose: "Anomaly Detection".to_string(),
938 effectiveness_score: 0.75,
939 description: "Normal distribution for outliers, Poisson distribution for rare event detection".to_string(),
940 });
941 }
942
943 combinations
944 }
945
946 fn assess_overall_quality(&mut self) {
947 let high_quality_count = self
948 .law_scores
949 .values()
950 .filter(|&&score| score > 0.8)
951 .count();
952
953 let low_quality_count = self
954 .law_scores
955 .values()
956 .filter(|&&score| score < 0.4)
957 .count();
958
959 let total_laws = self.law_scores.len();
960
961 self.overall_assessment = match (high_quality_count, low_quality_count, total_laws) {
962 (h, 0, t) if h == t => OverallAssessment::Excellent,
963 (h, l, t) if h >= t * 2 / 3 && l == 0 => OverallAssessment::Good,
964 (_, l, t) if l >= t / 2 => OverallAssessment::Problematic,
965 (_, l, _) if l > 0 && self.conflicts_detected > 2 => OverallAssessment::Concerning,
966 _ => OverallAssessment::Mixed,
967 };
968 }
969
970 fn determine_risk_level(&mut self) {
971 self.risk_level = match self.overall_assessment {
972 OverallAssessment::Excellent => RiskLevel::Low,
973 OverallAssessment::Good => RiskLevel::Low,
974 OverallAssessment::Mixed => RiskLevel::Medium,
975 OverallAssessment::Concerning => RiskLevel::High,
976 OverallAssessment::Problematic => RiskLevel::Critical,
977 };
978 }
979}
980
981impl Recommendation {
982 pub fn empty() -> Self {
983 Self {
984 primary_law: String::new(),
985 secondary_laws: Vec::new(),
986 confidence: 0.0,
987 rationale: String::new(),
988 alternative_combinations: Vec::new(),
989 }
990 }
991}
992
993impl DataCharacteristics {
994 pub fn analyze(numbers: &[f64]) -> Self {
995 let data_type = detect_data_type(numbers);
996 let distribution_shape = detect_distribution_shape(numbers);
997 let outlier_presence = detect_outliers(numbers);
998 let scale_range = detect_scale_range(numbers);
999 let sample_size_category = categorize_sample_size(numbers.len());
1000
1001 Self {
1002 data_type,
1003 distribution_shape,
1004 outlier_presence,
1005 scale_range,
1006 analysis_purpose: AnalysisPurpose::GeneralAnalysis, sample_size_category,
1008 }
1009 }
1010}
1011
1012#[derive(Debug, Clone)]
1014pub enum LawResult {
1015 Benford(BenfordResult),
1016 Pareto(ParetoResult),
1017 Zipf(ZipfResult),
1018 Normal(NormalResult),
1019 Poisson(PoissonResult),
1020}
1021
1022fn detect_data_type(numbers: &[f64]) -> DataType {
1025 let all_integers = numbers.iter().all(|&x| x.fract() == 0.0);
1026 let all_non_negative = numbers.iter().all(|&x| x >= 0.0);
1027
1028 if all_integers && all_non_negative {
1029 DataType::Integer
1030 } else if all_integers {
1031 DataType::Discrete
1032 } else {
1033 DataType::Continuous
1034 }
1035}
1036
1037fn detect_distribution_shape(numbers: &[f64]) -> DistributionShape {
1038 if numbers.len() < 10 {
1039 return DistributionShape::Unknown;
1040 }
1041
1042 let mean = numbers.iter().sum::<f64>() / numbers.len() as f64;
1043 let variance =
1044 numbers.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (numbers.len() - 1) as f64;
1045
1046 let skewness = calculate_skewness(numbers, mean, variance.sqrt());
1048
1049 if skewness.abs() < 0.5 {
1050 DistributionShape::Normal
1051 } else if skewness > 1.0 {
1052 DistributionShape::Skewed
1053 } else {
1054 DistributionShape::Unknown
1055 }
1056}
1057
1058fn calculate_skewness(numbers: &[f64], mean: f64, std_dev: f64) -> f64 {
1059 if std_dev == 0.0 {
1060 return 0.0;
1061 }
1062
1063 let n = numbers.len() as f64;
1064 let sum_cubed_deviations = numbers
1065 .iter()
1066 .map(|x| ((x - mean) / std_dev).powi(3))
1067 .sum::<f64>();
1068
1069 sum_cubed_deviations / n
1070}
1071
1072fn detect_outliers(numbers: &[f64]) -> OutlierLevel {
1073 if numbers.len() < 10 {
1074 return OutlierLevel::None;
1075 }
1076
1077 let mut sorted_numbers = numbers.to_vec();
1078 sorted_numbers.sort_by(|a, b| a.partial_cmp(b).unwrap());
1079
1080 let q1_idx = sorted_numbers.len() / 4;
1081 let q3_idx = (sorted_numbers.len() * 3) / 4;
1082
1083 let q1 = sorted_numbers[q1_idx];
1084 let q3 = sorted_numbers[q3_idx];
1085 let iqr = q3 - q1;
1086
1087 let lower_bound = q1 - 1.5 * iqr;
1088 let upper_bound = q3 + 1.5 * iqr;
1089
1090 let outlier_count = numbers
1091 .iter()
1092 .filter(|&&x| x < lower_bound || x > upper_bound)
1093 .count();
1094
1095 let outlier_ratio = outlier_count as f64 / numbers.len() as f64;
1096
1097 match outlier_ratio {
1098 0.0 => OutlierLevel::None,
1099 r if r < 0.05 => OutlierLevel::Low,
1100 r if r < 0.1 => OutlierLevel::Moderate,
1101 r if r < 0.2 => OutlierLevel::High,
1102 _ => OutlierLevel::Extreme,
1103 }
1104}
1105
1106fn detect_scale_range(numbers: &[f64]) -> ScaleRange {
1107 if numbers.is_empty() {
1108 return ScaleRange::Narrow;
1109 }
1110
1111 let min_val = numbers.iter().fold(f64::INFINITY, |a, &b| a.min(b));
1112 let max_val = numbers.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
1113
1114 if min_val <= 0.0 || max_val <= 0.0 {
1115 return ScaleRange::Mixed;
1116 }
1117
1118 let range_ratio = max_val / min_val;
1119
1120 match range_ratio {
1121 r if r < 100.0 => ScaleRange::Narrow, r if r < 10000.0 => ScaleRange::Medium, _ => ScaleRange::Wide, }
1125}
1126
1127fn categorize_sample_size(size: usize) -> SampleSizeCategory {
1128 match size {
1129 0..=29 => SampleSizeCategory::Small,
1130 30..=299 => SampleSizeCategory::Medium,
1131 300..=2999 => SampleSizeCategory::Large,
1132 _ => SampleSizeCategory::VeryLarge,
1133 }
1134}