1use crate::anomaly::{AnomalyDetector, AnomalyMethod};
6use crate::profiling::DataProfiler;
7use anyhow::Result;
8use serde::{Deserialize, Serialize};
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct QualityReport {
13 pub file_path: String,
14 pub timestamp: String,
15 pub overall_score: f64,
16 pub categories: QualityCategories,
17 pub issues: Vec<QualityIssue>,
18 pub recommendations: Vec<String>,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct QualityCategories {
24 pub completeness: f64,
25 pub accuracy: f64,
26 pub consistency: f64,
27 pub validity: f64,
28 pub uniqueness: f64,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct QualityIssue {
34 pub severity: IssueSeverity,
35 pub category: String,
36 pub description: String,
37 pub affected_rows: Option<usize>,
38 pub affected_columns: Option<Vec<String>>,
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
43pub enum IssueSeverity {
44 Critical,
45 High,
46 Medium,
47 Low,
48 Info,
49}
50
51pub struct QualityReportGenerator {
53 profiler: DataProfiler,
54}
55
56impl QualityReportGenerator {
57 pub fn new() -> Self {
58 Self {
59 profiler: DataProfiler::new(),
60 }
61 }
62
63 pub fn generate(&self, data: &[Vec<String>], file_path: &str) -> Result<QualityReport> {
65 let profile = self.profiler.profile(data, file_path)?;
67
68 let completeness = 100.0 - profile.null_percentage;
70 let uniqueness = 100.0 - profile.duplicate_percentage;
71
72 let mut issues = Vec::new();
74 let mut accuracy_score = 100.0;
75
76 for (col_idx, col_profile) in profile.columns.iter().enumerate() {
77 if col_profile.null_percentage > 50.0 {
78 issues.push(QualityIssue {
79 severity: IssueSeverity::High,
80 category: "Completeness".to_string(),
81 description: format!(
82 "Column '{}' has {:.1}% null values",
83 col_profile.name, col_profile.null_percentage
84 ),
85 affected_rows: None,
86 affected_columns: Some(vec![col_profile.name.clone()]),
87 });
88 accuracy_score -= 10.0;
89 }
90
91 if matches!(
93 col_profile.data_type,
94 crate::profiling::DataType::Integer | crate::profiling::DataType::Float
95 ) {
96 let detector = AnomalyDetector::new(AnomalyMethod::ZScore { threshold: 3.0 });
97 if let Ok(anomaly_result) = detector.detect(data, col_idx) {
98 if anomaly_result.anomaly_percentage > 5.0 {
99 issues.push(QualityIssue {
100 severity: IssueSeverity::Medium,
101 category: "Accuracy".to_string(),
102 description: format!(
103 "Column '{}' has {:.1}% anomalies",
104 col_profile.name, anomaly_result.anomaly_percentage
105 ),
106 affected_rows: Some(anomaly_result.total_anomalies),
107 affected_columns: Some(vec![col_profile.name.clone()]),
108 });
109 accuracy_score -= anomaly_result.anomaly_percentage;
110 }
111 }
112 }
113 }
114
115 let consistency = profile.data_quality_score;
116 let validity = 100.0 - (issues.len() as f64 * 5.0).min(50.0);
117
118 let categories = QualityCategories {
119 completeness,
120 accuracy: accuracy_score.max(0.0),
121 consistency,
122 validity,
123 uniqueness,
124 };
125
126 let overall_score = (categories.completeness
127 + categories.accuracy
128 + categories.consistency
129 + categories.validity
130 + categories.uniqueness)
131 / 5.0;
132
133 let recommendations = self.generate_recommendations(&profile, &issues);
135
136 Ok(QualityReport {
137 file_path: file_path.to_string(),
138 timestamp: chrono::Utc::now().to_rfc3339(),
139 overall_score,
140 categories,
141 issues,
142 recommendations,
143 })
144 }
145
146 fn generate_recommendations(
147 &self,
148 profile: &crate::profiling::DataProfile,
149 issues: &[QualityIssue],
150 ) -> Vec<String> {
151 let mut recommendations = Vec::new();
152
153 if profile.null_percentage > 10.0 {
154 recommendations.push(format!(
155 "Consider filling {}% null values using fillna command",
156 profile.null_percentage
157 ));
158 }
159
160 if profile.duplicate_percentage > 5.0 {
161 recommendations.push(format!(
162 "Remove {}% duplicate rows using dedupe command",
163 profile.duplicate_percentage
164 ));
165 }
166
167 for issue in issues {
168 match issue.severity {
169 IssueSeverity::Critical | IssueSeverity::High => {
170 recommendations.push(format!(
171 "Fix {} issue: {}",
172 issue.category, issue.description
173 ));
174 }
175 _ => {}
176 }
177 }
178
179 recommendations
180 }
181
182 pub fn save_report(&self, report: &QualityReport, path: &str) -> Result<()> {
184 let json = serde_json::to_string_pretty(report)?;
185 std::fs::write(path, json)?;
186 Ok(())
187 }
188}