1use crate::common::string;
7use anyhow::Result;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13#[serde(tag = "type")]
14pub enum ValidationRule {
15 NotNull,
17 Regex { pattern: String },
19 Range { min: Option<f64>, max: Option<f64> },
21 Enum { values: Vec<String> },
23 Length {
25 min: Option<usize>,
26 max: Option<usize>,
27 },
28 Email,
30 Url,
32 Numeric,
34 Date { format: String },
36 Custom { expression: String },
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct ValidationResult {
43 pub is_valid: bool,
44 pub errors: Vec<ValidationError>,
45 pub warnings: Vec<ValidationWarning>,
46 pub stats: ValidationStats,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct ValidationError {
52 pub row: usize,
53 pub column: String,
54 pub value: String,
55 pub rule: String,
56 pub message: String,
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct ValidationWarning {
62 pub row: usize,
63 pub column: String,
64 pub value: String,
65 pub message: String,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct ValidationStats {
71 pub total_rows: usize,
72 pub valid_rows: usize,
73 pub invalid_rows: usize,
74 pub total_errors: usize,
75 pub total_warnings: usize,
76 pub columns_validated: usize,
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct ValidationConfig {
82 pub rules: HashMap<String, Vec<ValidationRule>>,
83 pub strict_mode: bool,
84 pub stop_on_first_error: bool,
85 pub max_errors: Option<usize>,
86}
87
88impl Default for ValidationConfig {
89 fn default() -> Self {
90 Self {
91 rules: HashMap::new(),
92 strict_mode: false,
93 stop_on_first_error: false,
94 max_errors: None,
95 }
96 }
97}
98
99pub struct DataValidator {
101 config: ValidationConfig,
102}
103
104impl DataValidator {
105 pub fn new(config: ValidationConfig) -> Self {
107 Self { config }
108 }
109
110 pub fn from_config_file(path: &str) -> Result<Self> {
112 let content = std::fs::read_to_string(path)?;
113 let config: ValidationConfig = serde_json::from_str(&content)?;
114 Ok(Self::new(config))
115 }
116
117 pub fn validate(&self, data: &[Vec<String>]) -> Result<ValidationResult> {
119 if data.is_empty() {
120 return Ok(ValidationResult {
121 is_valid: true,
122 errors: Vec::new(),
123 warnings: Vec::new(),
124 stats: ValidationStats {
125 total_rows: 0,
126 valid_rows: 0,
127 invalid_rows: 0,
128 total_errors: 0,
129 total_warnings: 0,
130 columns_validated: 0,
131 },
132 });
133 }
134
135 let header = &data[0];
136 let mut errors = Vec::new();
137 let warnings = Vec::new();
138 let mut valid_rows = 0;
139
140 for (row_idx, row) in data.iter().enumerate().skip(1) {
141 let mut row_valid = true;
142
143 for (col_idx, cell_value) in row.iter().enumerate() {
144 if let Some(column_name) = header.get(col_idx) {
145 if let Some(rules) = self.config.rules.get(column_name) {
146 for rule in rules {
147 match self.validate_value(cell_value, rule) {
148 Ok(()) => {} Err(e) => {
150 let error = ValidationError {
151 row: row_idx,
152 column: column_name.clone(),
153 value: cell_value.clone(),
154 rule: format!("{:?}", rule),
155 message: e.to_string(),
156 };
157 errors.push(error);
158 row_valid = false;
159
160 if self.config.stop_on_first_error {
161 break;
162 }
163
164 if let Some(max) = self.config.max_errors {
165 if errors.len() >= max {
166 break;
167 }
168 }
169 }
170 }
171 }
172 }
173 }
174 }
175
176 if row_valid {
177 valid_rows += 1;
178 }
179
180 if self.config.stop_on_first_error && !errors.is_empty() {
181 break;
182 }
183
184 if let Some(max) = self.config.max_errors {
185 if errors.len() >= max {
186 break;
187 }
188 }
189 }
190
191 let total_rows = data.len() - 1; let invalid_rows = total_rows - valid_rows;
193 let is_valid = if self.config.strict_mode {
194 errors.is_empty() && warnings.is_empty()
195 } else {
196 errors.is_empty()
197 };
198
199 let total_errors = errors.len();
200 let total_warnings = warnings.len();
201
202 Ok(ValidationResult {
203 is_valid,
204 errors,
205 warnings,
206 stats: ValidationStats {
207 total_rows,
208 valid_rows,
209 invalid_rows,
210 total_errors,
211 total_warnings,
212 columns_validated: self.config.rules.len(),
213 },
214 })
215 }
216
217 fn validate_value(&self, value: &str, rule: &ValidationRule) -> Result<()> {
219 match rule {
220 ValidationRule::NotNull => {
221 if string::is_empty_or_whitespace(value) {
222 return Err(anyhow::anyhow!("Value cannot be null or empty"));
223 }
224 }
225 ValidationRule::Regex { pattern } => {
226 let re = regex::Regex::new(pattern)?;
227 if !re.is_match(value) {
228 return Err(anyhow::anyhow!("Value does not match pattern: {}", pattern));
229 }
230 }
231 ValidationRule::Range { min, max } => {
232 if let Some(num) = string::to_number(value) {
233 if let Some(min_val) = min {
234 if num < *min_val {
235 return Err(anyhow::anyhow!(
236 "Value {} is below minimum {}",
237 num,
238 min_val
239 ));
240 }
241 }
242 if let Some(max_val) = max {
243 if num > *max_val {
244 return Err(anyhow::anyhow!(
245 "Value {} is above maximum {}",
246 num,
247 max_val
248 ));
249 }
250 }
251 } else {
252 return Err(anyhow::anyhow!("Value is not numeric"));
253 }
254 }
255 ValidationRule::Enum { values } => {
256 if !values.contains(&value.to_string()) {
257 return Err(anyhow::anyhow!(
258 "Value '{}' is not in allowed values: {:?}",
259 value,
260 values
261 ));
262 }
263 }
264 ValidationRule::Length { min, max } => {
265 let len = value.len();
266 if let Some(min_len) = min {
267 if len < *min_len {
268 return Err(anyhow::anyhow!(
269 "Length {} is below minimum {}",
270 len,
271 min_len
272 ));
273 }
274 }
275 if let Some(max_len) = max {
276 if len > *max_len {
277 return Err(anyhow::anyhow!(
278 "Length {} is above maximum {}",
279 len,
280 max_len
281 ));
282 }
283 }
284 }
285 ValidationRule::Email => {
286 let email_regex =
287 regex::Regex::new(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")?;
288 if !email_regex.is_match(value) {
289 return Err(anyhow::anyhow!("Invalid email format"));
290 }
291 }
292 ValidationRule::Url => {
293 let url_regex = regex::Regex::new(r"^https?://[^\s/$.?#].[^\s]*$")?;
294 if !url_regex.is_match(value) {
295 return Err(anyhow::anyhow!("Invalid URL format"));
296 }
297 }
298 ValidationRule::Numeric => {
299 if !string::is_numeric(value) {
300 return Err(anyhow::anyhow!("Value is not numeric"));
301 }
302 }
303 ValidationRule::Date { format } => {
304 chrono::NaiveDate::parse_from_str(value, format)
305 .map_err(|_| anyhow::anyhow!("Invalid date format for {}", format))?;
306 }
307 ValidationRule::Custom { expression } => {
308 if expression.contains("not_empty") && string::is_empty_or_whitespace(value) {
311 return Err(anyhow::anyhow!("Custom validation failed: {}", expression));
312 }
313 }
314 }
315
316 Ok(())
317 }
318
319 pub fn generate_report(&self, result: &ValidationResult) -> String {
321 let mut report = String::new();
322
323 report.push_str("# Data Validation Report\n\n");
324
325 report.push_str("## Summary\n\n");
327 report.push_str(&format!(
328 "- **Total Rows**: {}\n\
329 - **Valid Rows**: {}\n\
330 - **Invalid Rows**: {}\n\
331 - **Total Errors**: {}\n\
332 - **Total Warnings**: {}\n\
333 - **Columns Validated**: {}\n\
334 - **Overall Status**: {}\n\n",
335 result.stats.total_rows,
336 result.stats.valid_rows,
337 result.stats.invalid_rows,
338 result.stats.total_errors,
339 result.stats.total_warnings,
340 result.stats.columns_validated,
341 if result.is_valid {
342 "✅ PASSED"
343 } else {
344 "❌ FAILED"
345 }
346 ));
347
348 if !result.errors.is_empty() {
350 report.push_str("## Errors\n\n");
351 for error in &result.errors {
352 report.push_str(&format!(
353 "- **Row {}**, Column `{}`: {} (value: `{}`)\n",
354 error.row + 1,
355 error.column,
356 error.message,
357 error.value
358 ));
359 }
360 report.push('\n');
361 }
362
363 if !result.warnings.is_empty() {
365 report.push_str("## Warnings\n\n");
366 for warning in &result.warnings {
367 report.push_str(&format!(
368 "- **Row {}**, Column `{}`: {} (value: `{}`)\n",
369 warning.row + 1,
370 warning.column,
371 warning.message,
372 warning.value
373 ));
374 }
375 report.push('\n');
376 }
377
378 report
379 }
380
381 pub fn save_result(&self, result: &ValidationResult, path: &str) -> Result<()> {
383 let json = serde_json::to_string_pretty(result)?;
384 std::fs::write(path, json)?;
385 Ok(())
386 }
387}
388
389pub fn create_sample_config() -> ValidationConfig {
391 let mut rules = HashMap::new();
392
393 rules.insert(
395 "email".to_string(),
396 vec![ValidationRule::Email, ValidationRule::NotNull],
397 );
398
399 rules.insert(
401 "age".to_string(),
402 vec![
403 ValidationRule::Numeric,
404 ValidationRule::Range {
405 min: Some(0.0),
406 max: Some(150.0),
407 },
408 ],
409 );
410
411 rules.insert(
413 "name".to_string(),
414 vec![
415 ValidationRule::NotNull,
416 ValidationRule::Length {
417 min: Some(1),
418 max: Some(100),
419 },
420 ],
421 );
422
423 rules.insert(
425 "status".to_string(),
426 vec![ValidationRule::Enum {
427 values: vec![
428 "active".to_string(),
429 "inactive".to_string(),
430 "pending".to_string(),
431 ],
432 }],
433 );
434
435 ValidationConfig {
436 rules,
437 strict_mode: false,
438 stop_on_first_error: false,
439 max_errors: Some(1000),
440 }
441}
442
443#[cfg(test)]
444mod tests {
445 use super::*;
446
447 #[test]
448 fn test_validation_not_null() {
449 let validator = DataValidator::new(ValidationConfig::default());
450
451 assert!(
453 validator
454 .validate_value("test", &ValidationRule::NotNull)
455 .is_ok()
456 );
457
458 assert!(
460 validator
461 .validate_value("", &ValidationRule::NotNull)
462 .is_err()
463 );
464 assert!(
465 validator
466 .validate_value(" ", &ValidationRule::NotNull)
467 .is_err()
468 );
469 }
470
471 #[test]
472 fn test_validation_numeric() {
473 let validator = DataValidator::new(ValidationConfig::default());
474
475 assert!(
477 validator
478 .validate_value("123", &ValidationRule::Numeric)
479 .is_ok()
480 );
481 assert!(
482 validator
483 .validate_value("-45.67", &ValidationRule::Numeric)
484 .is_ok()
485 );
486
487 assert!(
489 validator
490 .validate_value("abc", &ValidationRule::Numeric)
491 .is_err()
492 );
493 assert!(
494 validator
495 .validate_value("", &ValidationRule::Numeric)
496 .is_err()
497 );
498 }
499
500 #[test]
501 fn test_validation_range() {
502 let validator = DataValidator::new(ValidationConfig::default());
503 let rule = ValidationRule::Range {
504 min: Some(0.0),
505 max: Some(100.0),
506 };
507
508 assert!(validator.validate_value("50", &rule).is_ok());
510 assert!(validator.validate_value("0", &rule).is_ok());
511 assert!(validator.validate_value("100", &rule).is_ok());
512
513 assert!(validator.validate_value("-1", &rule).is_err());
515 assert!(validator.validate_value("101", &rule).is_err());
516 }
517
518 #[test]
519 fn test_validation_enum() {
520 let validator = DataValidator::new(ValidationConfig::default());
521 let rule = ValidationRule::Enum {
522 values: vec!["red".to_string(), "green".to_string(), "blue".to_string()],
523 };
524
525 assert!(validator.validate_value("red", &rule).is_ok());
527 assert!(validator.validate_value("green", &rule).is_ok());
528
529 assert!(validator.validate_value("yellow", &rule).is_err());
531 }
532}