1use serde::{Deserialize, Serialize};
54use std::collections::HashMap;
55use tracing::{debug, instrument};
56
57use crate::analyzers::profiler::{ColumnProfile, DetectedDataType};
58
59#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
61pub struct SuggestedConstraint {
62 pub check_type: String,
64 pub column: String,
66 pub parameters: HashMap<String, ConstraintParameter>,
68 pub confidence: f64,
70 pub rationale: String,
72 pub priority: SuggestionPriority,
74}
75
76#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
78pub enum ConstraintParameter {
79 Float(f64),
80 Integer(i64),
81 String(String),
82 Boolean(bool),
83}
84
85#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
87pub enum SuggestionPriority {
88 Critical, High, Medium, Low, }
93
94pub trait ConstraintSuggestionRule: Send + Sync {
96 fn apply(&self, profile: &ColumnProfile) -> Vec<SuggestedConstraint>;
98
99 fn name(&self) -> &str;
101
102 fn description(&self) -> &str;
104}
105
106pub struct SuggestionEngine {
108 rules: Vec<Box<dyn ConstraintSuggestionRule>>,
109 confidence_threshold: f64,
110 max_suggestions_per_column: usize,
111}
112
113impl SuggestionEngine {
114 pub fn new() -> Self {
116 Self {
117 rules: Vec::new(),
118 confidence_threshold: 0.5,
119 max_suggestions_per_column: 10,
120 }
121 }
122
123 pub fn add_rule(mut self, rule: Box<dyn ConstraintSuggestionRule>) -> Self {
125 self.rules.push(rule);
126 self
127 }
128
129 pub fn confidence_threshold(mut self, threshold: f64) -> Self {
131 self.confidence_threshold = threshold.clamp(0.0, 1.0);
132 self
133 }
134
135 pub fn max_suggestions_per_column(mut self, max: usize) -> Self {
137 self.max_suggestions_per_column = max;
138 self
139 }
140
141 #[instrument(skip(self, profile))]
143 pub fn suggest_constraints(&self, profile: &ColumnProfile) -> Vec<SuggestedConstraint> {
144 debug!(
145 column = profile.column_name,
146 rules_count = self.rules.len(),
147 "Generating constraint suggestions"
148 );
149
150 let mut all_suggestions = Vec::new();
151
152 for rule in &self.rules {
154 let rule_suggestions = rule.apply(profile);
155 debug!(
156 rule = rule.name(),
157 suggestions_count = rule_suggestions.len(),
158 "Applied suggestion rule"
159 );
160 all_suggestions.extend(rule_suggestions);
161 }
162
163 all_suggestions.retain(|s| s.confidence >= self.confidence_threshold);
165
166 all_suggestions.sort_by(|a, b| {
168 b.confidence
169 .partial_cmp(&a.confidence)
170 .unwrap_or(std::cmp::Ordering::Equal)
171 .then_with(|| priority_order(&b.priority).cmp(&priority_order(&a.priority)))
172 });
173
174 all_suggestions.truncate(self.max_suggestions_per_column);
176
177 debug!(
178 column = profile.column_name,
179 suggestions_count = all_suggestions.len(),
180 "Generated constraint suggestions"
181 );
182
183 all_suggestions
184 }
185
186 pub fn suggest_constraints_batch(
188 &self,
189 profiles: &[ColumnProfile],
190 ) -> HashMap<String, Vec<SuggestedConstraint>> {
191 profiles
192 .iter()
193 .map(|profile| {
194 (
195 profile.column_name.clone(),
196 self.suggest_constraints(profile),
197 )
198 })
199 .collect()
200 }
201}
202
203impl Default for SuggestionEngine {
204 fn default() -> Self {
205 Self::new()
206 }
207}
208
209fn priority_order(priority: &SuggestionPriority) -> u8 {
211 match priority {
212 SuggestionPriority::Critical => 0,
213 SuggestionPriority::High => 1,
214 SuggestionPriority::Medium => 2,
215 SuggestionPriority::Low => 3,
216 }
217}
218
219pub struct CompletenessRule {
221 high_completeness_threshold: f64,
222 medium_completeness_threshold: f64,
223}
224
225impl CompletenessRule {
226 pub fn new() -> Self {
228 Self {
229 high_completeness_threshold: 0.98,
230 medium_completeness_threshold: 0.90,
231 }
232 }
233
234 pub fn with_thresholds(high: f64, medium: f64) -> Self {
236 Self {
237 high_completeness_threshold: high.clamp(0.0, 1.0),
238 medium_completeness_threshold: medium.clamp(0.0, 1.0),
239 }
240 }
241}
242
243impl Default for CompletenessRule {
244 fn default() -> Self {
245 Self::new()
246 }
247}
248
249impl ConstraintSuggestionRule for CompletenessRule {
250 fn apply(&self, profile: &ColumnProfile) -> Vec<SuggestedConstraint> {
251 let completeness = 1.0 - profile.basic_stats.null_percentage;
252 let mut suggestions = Vec::new();
253
254 if completeness >= self.high_completeness_threshold {
255 suggestions.push(SuggestedConstraint {
256 check_type: "is_complete".to_string(),
257 column: profile.column_name.clone(),
258 parameters: HashMap::new(),
259 confidence: 0.9,
260 rationale: format!(
261 "Column is {:.1}%+ complete, suggesting completeness constraint",
262 completeness * 100.0
263 ),
264 priority: SuggestionPriority::High,
265 });
266 } else if completeness >= self.medium_completeness_threshold {
267 let mut params = HashMap::new();
268 params.insert(
269 "threshold".to_string(),
270 ConstraintParameter::Float(completeness - 0.02),
271 );
272
273 suggestions.push(SuggestedConstraint {
274 check_type: "has_completeness".to_string(),
275 column: profile.column_name.clone(),
276 parameters: params,
277 confidence: 0.8,
278 rationale: format!(
279 "Column has {:.1}% completeness, suggesting threshold constraint",
280 completeness * 100.0
281 ),
282 priority: SuggestionPriority::Medium,
283 });
284 } else if completeness < 0.5 {
285 suggestions.push(SuggestedConstraint {
287 check_type: "monitor_completeness".to_string(),
288 column: profile.column_name.clone(),
289 parameters: HashMap::new(),
290 confidence: 0.7,
291 rationale: format!(
292 "Column has only {:.1}% completeness, suggesting monitoring",
293 completeness * 100.0
294 ),
295 priority: SuggestionPriority::Critical,
296 });
297 }
298
299 suggestions
300 }
301
302 fn name(&self) -> &str {
303 "CompletenessRule"
304 }
305
306 fn description(&self) -> &str {
307 "Analyzes null percentage to suggest completeness constraints"
308 }
309}
310
311pub struct UniquenessRule {
313 high_uniqueness_threshold: f64,
314 medium_uniqueness_threshold: f64,
315}
316
317impl UniquenessRule {
318 pub fn new() -> Self {
320 Self {
321 high_uniqueness_threshold: 0.95,
322 medium_uniqueness_threshold: 0.80,
323 }
324 }
325
326 pub fn with_thresholds(high: f64, medium: f64) -> Self {
328 Self {
329 high_uniqueness_threshold: high.clamp(0.0, 1.0),
330 medium_uniqueness_threshold: medium.clamp(0.0, 1.0),
331 }
332 }
333}
334
335impl Default for UniquenessRule {
336 fn default() -> Self {
337 Self::new()
338 }
339}
340
341impl ConstraintSuggestionRule for UniquenessRule {
342 fn apply(&self, profile: &ColumnProfile) -> Vec<SuggestedConstraint> {
343 let total_rows = profile.basic_stats.row_count as f64;
344 let unique_ratio = if total_rows > 0.0 {
345 profile.basic_stats.approximate_cardinality as f64 / total_rows
346 } else {
347 0.0
348 };
349
350 let mut suggestions = Vec::new();
351
352 if unique_ratio >= self.high_uniqueness_threshold {
353 suggestions.push(SuggestedConstraint {
354 check_type: "is_unique".to_string(),
355 column: profile.column_name.clone(),
356 parameters: HashMap::new(),
357 confidence: 0.9,
358 rationale: format!(
359 "Column has {:.1}% unique values, suggesting uniqueness constraint",
360 unique_ratio * 100.0
361 ),
362 priority: SuggestionPriority::High,
363 });
364 } else if unique_ratio >= self.medium_uniqueness_threshold {
365 let mut params = HashMap::new();
366 params.insert(
367 "threshold".to_string(),
368 ConstraintParameter::Float(unique_ratio - 0.05),
369 );
370
371 suggestions.push(SuggestedConstraint {
372 check_type: "has_uniqueness".to_string(),
373 column: profile.column_name.clone(),
374 parameters: params,
375 confidence: 0.7,
376 rationale: format!(
377 "Column has {:.1}% unique values, suggesting uniqueness monitoring",
378 unique_ratio * 100.0
379 ),
380 priority: SuggestionPriority::Medium,
381 });
382 }
383
384 let column_lower = profile.column_name.to_lowercase();
386 if (column_lower.contains("id") || column_lower.contains("key")) && unique_ratio > 0.7 {
387 suggestions.push(SuggestedConstraint {
388 check_type: "primary_key_candidate".to_string(),
389 column: profile.column_name.clone(),
390 parameters: HashMap::new(),
391 confidence: 0.8,
392 rationale: "Column name suggests identifier and has high uniqueness".to_string(),
393 priority: SuggestionPriority::High,
394 });
395 }
396
397 suggestions
398 }
399
400 fn name(&self) -> &str {
401 "UniquenessRule"
402 }
403
404 fn description(&self) -> &str {
405 "Analyzes cardinality to suggest uniqueness constraints for potential keys"
406 }
407}
408
409pub struct PatternRule;
411
412impl PatternRule {
413 pub fn new() -> Self {
415 Self
416 }
417
418 fn is_email_pattern(&self, samples: &[String]) -> bool {
420 samples
421 .iter()
422 .take(10)
423 .all(|s| s.contains('@') && s.contains('.'))
424 }
425
426 fn is_date_pattern(&self, samples: &[String]) -> bool {
428 samples
429 .iter()
430 .take(10)
431 .all(|s| s.contains('-') || s.contains('/') || s.len() == 8)
432 }
433
434 fn is_phone_pattern(&self, samples: &[String]) -> bool {
436 samples
437 .iter()
438 .take(10)
439 .all(|s| s.chars().filter(|c| c.is_numeric()).count() >= 10)
440 }
441}
442
443impl Default for PatternRule {
444 fn default() -> Self {
445 Self::new()
446 }
447}
448
449impl ConstraintSuggestionRule for PatternRule {
450 fn apply(&self, profile: &ColumnProfile) -> Vec<SuggestedConstraint> {
451 let mut suggestions = Vec::new();
452
453 if profile.data_type == DetectedDataType::String
454 && !profile.basic_stats.sample_values.is_empty()
455 {
456 let samples = &profile.basic_stats.sample_values;
457
458 if self.is_email_pattern(samples) {
459 suggestions.push(SuggestedConstraint {
460 check_type: "matches_email_pattern".to_string(),
461 column: profile.column_name.clone(),
462 parameters: HashMap::new(),
463 confidence: 0.85,
464 rationale: "Sample values suggest email format".to_string(),
465 priority: SuggestionPriority::Medium,
466 });
467 }
468
469 if self.is_date_pattern(samples) {
470 suggestions.push(SuggestedConstraint {
471 check_type: "matches_date_pattern".to_string(),
472 column: profile.column_name.clone(),
473 parameters: HashMap::new(),
474 confidence: 0.75,
475 rationale: "Sample values suggest date format".to_string(),
476 priority: SuggestionPriority::Medium,
477 });
478 }
479
480 if self.is_phone_pattern(samples) {
481 suggestions.push(SuggestedConstraint {
482 check_type: "matches_phone_pattern".to_string(),
483 column: profile.column_name.clone(),
484 parameters: HashMap::new(),
485 confidence: 0.70,
486 rationale: "Sample values suggest phone number format".to_string(),
487 priority: SuggestionPriority::Low,
488 });
489 }
490 }
491
492 suggestions
493 }
494
495 fn name(&self) -> &str {
496 "PatternRule"
497 }
498
499 fn description(&self) -> &str {
500 "Identifies common data patterns like emails, dates, and phone numbers"
501 }
502}
503
504pub struct RangeRule;
506
507impl RangeRule {
508 pub fn new() -> Self {
510 Self
511 }
512}
513
514impl Default for RangeRule {
515 fn default() -> Self {
516 Self::new()
517 }
518}
519
520impl ConstraintSuggestionRule for RangeRule {
521 fn apply(&self, profile: &ColumnProfile) -> Vec<SuggestedConstraint> {
522 let mut suggestions = Vec::new();
523
524 match profile.data_type {
525 DetectedDataType::Integer | DetectedDataType::Double => {
526 if let (Some(ref min_str), Some(ref max_str)) = (
528 &profile.basic_stats.min_value,
529 &profile.basic_stats.max_value,
530 ) {
531 if let (Ok(min_val), Ok(max_val)) =
532 (min_str.parse::<f64>(), max_str.parse::<f64>())
533 {
534 let range = max_val - min_val;
535
536 if range > 0.0 && min_val >= 0.0 {
538 let mut min_params = HashMap::new();
539 min_params.insert(
540 "threshold".to_string(),
541 ConstraintParameter::Float(min_val),
542 );
543
544 suggestions.push(SuggestedConstraint {
545 check_type: "has_min".to_string(),
546 column: profile.column_name.clone(),
547 parameters: min_params,
548 confidence: 0.8,
549 rationale: format!("Minimum value observed: {min_val}"),
550 priority: SuggestionPriority::Medium,
551 });
552
553 let mut max_params = HashMap::new();
554 max_params.insert(
555 "threshold".to_string(),
556 ConstraintParameter::Float(max_val),
557 );
558
559 suggestions.push(SuggestedConstraint {
560 check_type: "has_max".to_string(),
561 column: profile.column_name.clone(),
562 parameters: max_params,
563 confidence: 0.8,
564 rationale: format!("Maximum value observed: {max_val}"),
565 priority: SuggestionPriority::Medium,
566 });
567
568 if min_val >= 0.0 {
570 suggestions.push(SuggestedConstraint {
571 check_type: "is_positive".to_string(),
572 column: profile.column_name.clone(),
573 parameters: HashMap::new(),
574 confidence: 0.9,
575 rationale: "All observed values are non-negative".to_string(),
576 priority: SuggestionPriority::High,
577 });
578 }
579 }
580 }
581 }
582
583 if let Some(distribution) = &profile.numeric_distribution {
585 if let Some(quantiles) = distribution.quantiles.get("P99") {
586 let mut outlier_params = HashMap::new();
587 outlier_params.insert(
588 "threshold".to_string(),
589 ConstraintParameter::Float(*quantiles),
590 );
591
592 suggestions.push(SuggestedConstraint {
593 check_type: "has_no_outliers".to_string(),
594 column: profile.column_name.clone(),
595 parameters: outlier_params,
596 confidence: 0.7,
597 rationale: "Suggests outlier detection based on P99".to_string(),
598 priority: SuggestionPriority::Low,
599 });
600 }
601 }
602 }
603 _ => {}
604 }
605
606 suggestions
607 }
608
609 fn name(&self) -> &str {
610 "RangeRule"
611 }
612
613 fn description(&self) -> &str {
614 "Suggests min/max constraints and outlier detection for numeric columns"
615 }
616}
617
618pub struct DataTypeRule;
620
621impl DataTypeRule {
622 pub fn new() -> Self {
624 Self
625 }
626}
627
628impl Default for DataTypeRule {
629 fn default() -> Self {
630 Self::new()
631 }
632}
633
634impl ConstraintSuggestionRule for DataTypeRule {
635 fn apply(&self, profile: &ColumnProfile) -> Vec<SuggestedConstraint> {
636 let mut suggestions = Vec::new();
637
638 match &profile.data_type {
639 DetectedDataType::Mixed => {
640 suggestions.push(SuggestedConstraint {
641 check_type: "has_consistent_type".to_string(),
642 column: profile.column_name.clone(),
643 parameters: HashMap::new(),
644 confidence: 0.9,
645 rationale: "Column has mixed data types, suggesting type consistency check"
646 .to_string(),
647 priority: SuggestionPriority::Critical,
648 });
649 }
650 DetectedDataType::Unknown => {
651 suggestions.push(SuggestedConstraint {
652 check_type: "validate_data_type".to_string(),
653 column: profile.column_name.clone(),
654 parameters: HashMap::new(),
655 confidence: 0.8,
656 rationale: "Unable to determine data type, suggesting validation".to_string(),
657 priority: SuggestionPriority::High,
658 });
659 }
660 detected_type => {
661 let mut params = HashMap::new();
662 params.insert(
663 "expected_type".to_string(),
664 ConstraintParameter::String(format!("{detected_type:?}")),
665 );
666
667 suggestions.push(SuggestedConstraint {
668 check_type: "has_data_type".to_string(),
669 column: profile.column_name.clone(),
670 parameters: params,
671 confidence: 0.85,
672 rationale: format!("Column consistently contains {detected_type:?} values"),
673 priority: SuggestionPriority::Medium,
674 });
675 }
676 }
677
678 suggestions
679 }
680
681 fn name(&self) -> &str {
682 "DataTypeRule"
683 }
684
685 fn description(&self) -> &str {
686 "Suggests data type validation constraints based on detected types"
687 }
688}
689
690pub struct CardinalityRule {
692 categorical_threshold: u64,
693 low_cardinality_threshold: u64,
694}
695
696impl CardinalityRule {
697 pub fn new() -> Self {
699 Self {
700 categorical_threshold: 50,
701 low_cardinality_threshold: 10,
702 }
703 }
704
705 pub fn with_thresholds(categorical: u64, low_cardinality: u64) -> Self {
707 Self {
708 categorical_threshold: categorical,
709 low_cardinality_threshold: low_cardinality,
710 }
711 }
712}
713
714impl Default for CardinalityRule {
715 fn default() -> Self {
716 Self::new()
717 }
718}
719
720impl ConstraintSuggestionRule for CardinalityRule {
721 fn apply(&self, profile: &ColumnProfile) -> Vec<SuggestedConstraint> {
722 let mut suggestions = Vec::new();
723 let cardinality = profile.basic_stats.approximate_cardinality;
724 let total_rows = profile.basic_stats.row_count;
725
726 if cardinality <= self.low_cardinality_threshold {
727 suggestions.push(SuggestedConstraint {
728 check_type: "is_categorical".to_string(),
729 column: profile.column_name.clone(),
730 parameters: HashMap::new(),
731 confidence: 0.9,
732 rationale: format!(
733 "Column has only {cardinality} distinct values, suggesting categorical constraint"
734 ),
735 priority: SuggestionPriority::High,
736 });
737
738 if let Some(histogram) = &profile.categorical_histogram {
740 let valid_values: Vec<String> =
741 histogram.buckets.iter().map(|b| b.value.clone()).collect();
742
743 let mut params = HashMap::new();
744 params.insert(
745 "valid_values".to_string(),
746 ConstraintParameter::String(valid_values.join(",")),
747 );
748
749 suggestions.push(SuggestedConstraint {
750 check_type: "is_in_set".to_string(),
751 column: profile.column_name.clone(),
752 parameters: params,
753 confidence: 0.85,
754 rationale: "Column has well-defined categorical values".to_string(),
755 priority: SuggestionPriority::Medium,
756 });
757 }
758 } else if cardinality <= self.categorical_threshold {
759 let mut params = HashMap::new();
760 params.insert(
761 "threshold".to_string(),
762 ConstraintParameter::Integer(cardinality as i64),
763 );
764
765 suggestions.push(SuggestedConstraint {
766 check_type: "has_max_cardinality".to_string(),
767 column: profile.column_name.clone(),
768 parameters: params,
769 confidence: 0.7,
770 rationale: format!(
771 "Column has {cardinality} distinct values, suggesting cardinality monitoring"
772 ),
773 priority: SuggestionPriority::Medium,
774 });
775 }
776
777 if total_rows > 0 && cardinality as f64 / total_rows as f64 > 0.8 {
779 suggestions.push(SuggestedConstraint {
780 check_type: "monitor_cardinality".to_string(),
781 column: profile.column_name.clone(),
782 parameters: HashMap::new(),
783 confidence: 0.6,
784 rationale: "High cardinality might indicate data quality issues".to_string(),
785 priority: SuggestionPriority::Low,
786 });
787 }
788
789 suggestions
790 }
791
792 fn name(&self) -> &str {
793 "CardinalityRule"
794 }
795
796 fn description(&self) -> &str {
797 "Detects categorical columns and suggests cardinality constraints"
798 }
799}
800
801#[cfg(test)]
802mod tests {
803 use super::*;
804 use crate::analyzers::profiler::{
805 BasicStatistics, CategoricalBucket, CategoricalHistogram, DetectedDataType,
806 NumericDistribution,
807 };
808 use std::collections::HashMap;
809
810 fn create_test_profile(column_name: &str, null_percentage: f64) -> ColumnProfile {
811 ColumnProfile {
812 column_name: column_name.to_string(),
813 data_type: DetectedDataType::String,
814 basic_stats: BasicStatistics {
815 row_count: 1000,
816 null_count: (1000.0 * null_percentage) as u64,
817 null_percentage,
818 approximate_cardinality: 500,
819 min_value: None,
820 max_value: None,
821 sample_values: vec!["A".to_string(), "B".to_string()],
822 },
823 categorical_histogram: None,
824 numeric_distribution: None,
825 profiling_time_ms: 100,
826 passes_executed: vec![1],
827 }
828 }
829
830 fn create_numeric_profile(column_name: &str, min_val: f64, max_val: f64) -> ColumnProfile {
831 let mut quantiles = HashMap::new();
832 quantiles.insert("P99".to_string(), max_val * 0.99);
833
834 ColumnProfile {
835 column_name: column_name.to_string(),
836 data_type: DetectedDataType::Double,
837 basic_stats: BasicStatistics {
838 row_count: 1000,
839 null_count: 0,
840 null_percentage: 0.0,
841 approximate_cardinality: 800,
842 min_value: Some(min_val.to_string()),
843 max_value: Some(max_val.to_string()),
844 sample_values: vec![min_val.to_string(), max_val.to_string()],
845 },
846 categorical_histogram: None,
847 numeric_distribution: Some(NumericDistribution {
848 mean: Some((min_val + max_val) / 2.0),
849 std_dev: Some(10.0),
850 variance: Some(100.0),
851 quantiles,
852 outlier_count: 0,
853 skewness: None,
854 kurtosis: None,
855 }),
856 profiling_time_ms: 100,
857 passes_executed: vec![1, 3],
858 }
859 }
860
861 fn create_categorical_profile(column_name: &str, cardinality: u64) -> ColumnProfile {
862 let buckets = vec![
863 CategoricalBucket {
864 value: "A".to_string(),
865 count: 400,
866 },
867 CategoricalBucket {
868 value: "B".to_string(),
869 count: 300,
870 },
871 CategoricalBucket {
872 value: "C".to_string(),
873 count: 200,
874 },
875 CategoricalBucket {
876 value: "D".to_string(),
877 count: 100,
878 },
879 ];
880
881 ColumnProfile {
882 column_name: column_name.to_string(),
883 data_type: DetectedDataType::String,
884 basic_stats: BasicStatistics {
885 row_count: 1000,
886 null_count: 0,
887 null_percentage: 0.0,
888 approximate_cardinality: cardinality,
889 min_value: None,
890 max_value: None,
891 sample_values: vec!["A".to_string(), "B".to_string()],
892 },
893 categorical_histogram: Some(CategoricalHistogram {
894 buckets,
895 total_count: 1000,
896 entropy: 1.5,
897 top_values: vec![("A".to_string(), 400), ("B".to_string(), 300)],
898 }),
899 numeric_distribution: None,
900 profiling_time_ms: 100,
901 passes_executed: vec![1, 2],
902 }
903 }
904
905 #[test]
906 fn test_completeness_rule_high_completeness() {
907 let rule = CompletenessRule::new();
908 let profile = create_test_profile("test_col", 0.01); let suggestions = rule.apply(&profile);
911 assert_eq!(suggestions.len(), 1);
912 assert_eq!(suggestions[0].check_type, "is_complete");
913 assert_eq!(suggestions[0].confidence, 0.9);
914 assert_eq!(suggestions[0].priority, SuggestionPriority::High);
915 }
916
917 #[test]
918 fn test_completeness_rule_medium_completeness() {
919 let rule = CompletenessRule::new();
920 let profile = create_test_profile("test_col", 0.05); let suggestions = rule.apply(&profile);
923 assert_eq!(suggestions.len(), 1);
924 assert_eq!(suggestions[0].check_type, "has_completeness");
925 assert_eq!(suggestions[0].confidence, 0.8);
926 assert_eq!(suggestions[0].priority, SuggestionPriority::Medium);
927
928 if let Some(ConstraintParameter::Float(threshold)) =
930 suggestions[0].parameters.get("threshold")
931 {
932 assert!(*threshold < 0.95);
933 assert!(*threshold > 0.90);
934 } else {
935 panic!("Expected threshold parameter");
936 }
937 }
938
939 #[test]
940 fn test_completeness_rule_low_completeness() {
941 let rule = CompletenessRule::new();
942 let profile = create_test_profile("test_col", 0.6); let suggestions = rule.apply(&profile);
945 assert_eq!(suggestions.len(), 1);
946 assert_eq!(suggestions[0].check_type, "monitor_completeness");
947 assert_eq!(suggestions[0].confidence, 0.7);
948 assert_eq!(suggestions[0].priority, SuggestionPriority::Critical);
949 }
950
951 #[test]
952 fn test_suggestion_engine_confidence_filtering() {
953 let engine = SuggestionEngine::new()
954 .confidence_threshold(0.85)
955 .add_rule(Box::new(CompletenessRule::new()));
956
957 let profile = create_test_profile("test_col", 0.05); let suggestions = engine.suggest_constraints(&profile);
959
960 assert_eq!(suggestions.len(), 0);
962 }
963
964 #[test]
965 fn test_suggestion_engine_max_suggestions() {
966 let engine = SuggestionEngine::new()
967 .max_suggestions_per_column(1)
968 .add_rule(Box::new(CompletenessRule::new()));
969
970 let profile = create_test_profile("test_col", 0.01); let suggestions = engine.suggest_constraints(&profile);
972
973 assert!(suggestions.len() <= 1);
974 }
975
976 #[test]
977 fn test_uniqueness_rule_high_uniqueness() {
978 let rule = UniquenessRule::new();
979 let mut profile = create_test_profile("test_col", 0.0);
980 profile.basic_stats.approximate_cardinality = 980; let suggestions = rule.apply(&profile);
983 assert_eq!(suggestions.len(), 1);
984 assert_eq!(suggestions[0].check_type, "is_unique");
985 assert_eq!(suggestions[0].confidence, 0.9);
986 }
987
988 #[test]
989 fn test_uniqueness_rule_id_column() {
990 let rule = UniquenessRule::new();
991 let mut profile = create_test_profile("user_id", 0.0);
992 profile.basic_stats.approximate_cardinality = 800; let suggestions = rule.apply(&profile);
995 assert!(suggestions
996 .iter()
997 .any(|s| s.check_type == "primary_key_candidate"));
998 }
999
1000 #[test]
1001 fn test_pattern_rule_email() {
1002 let rule = PatternRule::new();
1003 let mut profile = create_test_profile("email", 0.0);
1004 profile.basic_stats.sample_values = vec![
1005 "user@example.com".to_string(),
1006 "test@domain.org".to_string(),
1007 ];
1008
1009 let suggestions = rule.apply(&profile);
1010 assert!(suggestions
1011 .iter()
1012 .any(|s| s.check_type == "matches_email_pattern"));
1013 }
1014
1015 #[test]
1016 fn test_range_rule_numeric() {
1017 let rule = RangeRule::new();
1018 let profile = create_numeric_profile("age", 0.0, 100.0);
1019
1020 let suggestions = rule.apply(&profile);
1021 assert!(suggestions.iter().any(|s| s.check_type == "has_min"));
1022 assert!(suggestions.iter().any(|s| s.check_type == "has_max"));
1023 assert!(suggestions.iter().any(|s| s.check_type == "is_positive"));
1024 }
1025
1026 #[test]
1027 fn test_data_type_rule_mixed() {
1028 let rule = DataTypeRule::new();
1029 let mut profile = create_test_profile("mixed_col", 0.0);
1030 profile.data_type = DetectedDataType::Mixed;
1031
1032 let suggestions = rule.apply(&profile);
1033 assert_eq!(suggestions.len(), 1);
1034 assert_eq!(suggestions[0].check_type, "has_consistent_type");
1035 assert_eq!(suggestions[0].priority, SuggestionPriority::Critical);
1036 }
1037
1038 #[test]
1039 fn test_cardinality_rule_categorical() {
1040 let rule = CardinalityRule::new();
1041 let profile = create_categorical_profile("status", 4);
1042
1043 let suggestions = rule.apply(&profile);
1044 assert!(suggestions.iter().any(|s| s.check_type == "is_categorical"));
1045 assert!(suggestions.iter().any(|s| s.check_type == "is_in_set"));
1046 }
1047
1048 #[test]
1049 fn test_suggestion_engine_with_all_rules() {
1050 let engine = SuggestionEngine::new()
1051 .add_rule(Box::new(CompletenessRule::new()))
1052 .add_rule(Box::new(UniquenessRule::new()))
1053 .add_rule(Box::new(PatternRule::new()))
1054 .add_rule(Box::new(RangeRule::new()))
1055 .add_rule(Box::new(DataTypeRule::new()))
1056 .add_rule(Box::new(CardinalityRule::new()));
1057
1058 let profile = create_numeric_profile("price", 0.0, 999.99);
1059 let suggestions = engine.suggest_constraints(&profile);
1060
1061 assert!(!suggestions.is_empty());
1063
1064 for i in 1..suggestions.len() {
1066 assert!(suggestions[i - 1].confidence >= suggestions[i].confidence);
1067 }
1068 }
1069
1070 #[test]
1071 fn test_suggestion_batch_processing() {
1072 let engine = SuggestionEngine::new().add_rule(Box::new(CompletenessRule::new()));
1073
1074 let profiles = vec![
1075 create_test_profile("col1", 0.01),
1076 create_test_profile("col2", 0.05),
1077 ];
1078
1079 let batch_results = engine.suggest_constraints_batch(&profiles);
1080 assert_eq!(batch_results.len(), 2);
1081 assert!(batch_results.contains_key("col1"));
1082 assert!(batch_results.contains_key("col2"));
1083 }
1084}