datasynth_eval/process_mining/
variant_analysis.rs1use crate::error::EvalResult;
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone)]
11pub struct VariantData {
12 pub variant_id: String,
14 pub case_count: usize,
16 pub is_happy_path: bool,
18}
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct VariantThresholds {
23 pub min_entropy: f64,
25 pub max_happy_path_concentration: f64,
27 pub min_variant_count: usize,
29}
30
31impl Default for VariantThresholds {
32 fn default() -> Self {
33 Self {
34 min_entropy: 1.0,
35 max_happy_path_concentration: 0.95,
36 min_variant_count: 2,
37 }
38 }
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct VariantAnalysis {
44 pub variant_count: usize,
46 pub total_cases: usize,
48 pub variant_entropy: f64,
50 pub happy_path_concentration: f64,
52 pub top_variants: Vec<(String, f64)>,
54 pub passes: bool,
56 pub issues: Vec<String>,
58}
59
60pub struct VariantAnalyzer {
62 thresholds: VariantThresholds,
63}
64
65impl VariantAnalyzer {
66 pub fn new() -> Self {
68 Self {
69 thresholds: VariantThresholds::default(),
70 }
71 }
72
73 pub fn with_thresholds(thresholds: VariantThresholds) -> Self {
75 Self { thresholds }
76 }
77
78 pub fn analyze(&self, variants: &[VariantData]) -> EvalResult<VariantAnalysis> {
80 let mut issues = Vec::new();
81
82 if variants.is_empty() {
83 return Ok(VariantAnalysis {
84 variant_count: 0,
85 total_cases: 0,
86 variant_entropy: 0.0,
87 happy_path_concentration: 0.0,
88 top_variants: Vec::new(),
89 passes: true,
90 issues: Vec::new(),
91 });
92 }
93
94 let total_cases: usize = variants.iter().map(|v| v.case_count).sum();
95 let variant_count = variants.len();
96
97 let variant_entropy = if total_cases > 0 {
99 let mut entropy = 0.0_f64;
100 for v in variants {
101 if v.case_count > 0 {
102 let p = v.case_count as f64 / total_cases as f64;
103 entropy -= p * p.ln();
104 }
105 }
106 entropy
107 } else {
108 0.0
109 };
110
111 let happy_cases: usize = variants
113 .iter()
114 .filter(|v| v.is_happy_path)
115 .map(|v| v.case_count)
116 .sum();
117 let happy_path_concentration = if total_cases > 0 {
118 happy_cases as f64 / total_cases as f64
119 } else {
120 0.0
121 };
122
123 let mut sorted: Vec<&VariantData> = variants.iter().collect();
125 sorted.sort_by(|a, b| b.case_count.cmp(&a.case_count));
126 let top_variants: Vec<(String, f64)> = sorted
127 .iter()
128 .take(5)
129 .map(|v| {
130 (
131 v.variant_id.clone(),
132 if total_cases > 0 {
133 v.case_count as f64 / total_cases as f64
134 } else {
135 0.0
136 },
137 )
138 })
139 .collect();
140
141 if variant_count < self.thresholds.min_variant_count {
143 issues.push(format!(
144 "Only {} variants (minimum {})",
145 variant_count, self.thresholds.min_variant_count
146 ));
147 }
148 if variant_entropy < self.thresholds.min_entropy && variant_count > 1 {
149 issues.push(format!(
150 "Variant entropy {:.3} < {:.3}",
151 variant_entropy, self.thresholds.min_entropy
152 ));
153 }
154 if happy_path_concentration > self.thresholds.max_happy_path_concentration {
155 issues.push(format!(
156 "Happy path concentration {:.3} > {:.3}",
157 happy_path_concentration, self.thresholds.max_happy_path_concentration
158 ));
159 }
160
161 let passes = issues.is_empty();
162
163 Ok(VariantAnalysis {
164 variant_count,
165 total_cases,
166 variant_entropy,
167 happy_path_concentration,
168 top_variants,
169 passes,
170 issues,
171 })
172 }
173}
174
175impl Default for VariantAnalyzer {
176 fn default() -> Self {
177 Self::new()
178 }
179}
180
181#[cfg(test)]
182#[allow(clippy::unwrap_used)]
183mod tests {
184 use super::*;
185
186 #[test]
187 fn test_diverse_variants() {
188 let analyzer = VariantAnalyzer::new();
189 let variants = vec![
190 VariantData {
191 variant_id: "A->B->C".to_string(),
192 case_count: 50,
193 is_happy_path: true,
194 },
195 VariantData {
196 variant_id: "A->B->D->C".to_string(),
197 case_count: 30,
198 is_happy_path: false,
199 },
200 VariantData {
201 variant_id: "A->E->C".to_string(),
202 case_count: 20,
203 is_happy_path: false,
204 },
205 ];
206
207 let result = analyzer.analyze(&variants).unwrap();
208 assert!(result.passes);
209 assert_eq!(result.variant_count, 3);
210 assert!(result.variant_entropy > 0.0);
211 }
212
213 #[test]
214 fn test_all_happy_path() {
215 let analyzer = VariantAnalyzer::new();
216 let variants = vec![
217 VariantData {
218 variant_id: "A->B->C".to_string(),
219 case_count: 100,
220 is_happy_path: true,
221 },
222 VariantData {
223 variant_id: "A->B->D".to_string(),
224 case_count: 1,
225 is_happy_path: false,
226 },
227 ];
228
229 let result = analyzer.analyze(&variants).unwrap();
230 assert!(!result.passes);
231 assert!(result.happy_path_concentration > 0.95);
232 }
233
234 #[test]
235 fn test_single_variant() {
236 let analyzer = VariantAnalyzer::new();
237 let variants = vec![VariantData {
238 variant_id: "A->B".to_string(),
239 case_count: 100,
240 is_happy_path: true,
241 }];
242
243 let result = analyzer.analyze(&variants).unwrap();
244 assert!(!result.passes); }
246
247 #[test]
248 fn test_empty() {
249 let analyzer = VariantAnalyzer::new();
250 let result = analyzer.analyze(&[]).unwrap();
251 assert!(result.passes);
252 }
253}