term_guard/core/check.rs
1//! Check type and builder for grouping constraints.
2#![allow(deprecated)] // Allow deprecated constraints for backward compatibility
3#![allow(clippy::expect_used)] // Allow expect() in builder methods as they would require API changes
4//!
5//! This module provides the [`Check`] type and [`CheckBuilder`] for creating validation checks.
6//! Starting with v0.2.0, we provide a new unified API through the `builder_extensions` module
7//! that offers improved consistency and performance.
8//!
9//! ## New Unified API Example (v0.2.0+)
10//!
11//! ```rust
12//! use term_guard::core::{Check, Level};
13//! use term_guard::core::builder_extensions::{CompletenessOptions, StatisticalOptions};
14//! use term_guard::constraints::{Assertion, FormatType, FormatOptions};
15//!
16//! # use term_guard::prelude::*;
17//! # fn example() -> Result<Check> {
18//! let check = Check::builder("user_validation")
19//! .level(Level::Error)
20//! // Completeness with options
21//! .completeness("user_id", CompletenessOptions::full().into_constraint_options())
22//! .completeness("email", CompletenessOptions::threshold(0.95).into_constraint_options())
23//! // Format validation
24//! .has_format("email", FormatType::Email, 0.95, FormatOptions::default())
25//! // Combined statistics (single query)
26//! .statistics(
27//! "age",
28//! StatisticalOptions::new()
29//! .min(Assertion::GreaterThanOrEqual(18.0))
30//! .max(Assertion::LessThan(100.0))
31//! .mean(Assertion::Between(25.0, 65.0))
32//! )?
33//! // Convenience method
34//! .primary_key(vec!["user_id"])
35//! .build();
36//! # Ok(check)
37//! # }
38//! ```
39
40use super::{constraint::BoxedConstraint, Constraint, Level};
41use crate::constraints::{
42 ApproxCountDistinctConstraint, Assertion, ColumnCountConstraint, CorrelationConstraint,
43 CustomSqlConstraint, DataTypeConstraint, FormatConstraint, FormatOptions, FormatType,
44 HistogramAssertion, HistogramConstraint, NullHandling, QuantileConstraint, SizeConstraint,
45 UniquenessConstraint, UniquenessOptions, UniquenessType,
46};
47use std::sync::Arc;
48
49/// A validation check containing one or more constraints.
50///
51/// A `Check` groups related constraints together and assigns them a severity level.
52/// Checks are the building blocks of validation suites. When a check runs, all its
53/// constraints are evaluated, and the check fails if any constraint fails.
54///
55/// # Examples
56///
57/// ## Basic Check
58///
59/// ```rust
60/// use term_guard::core::{Check, Level};
61///
62/// let check = Check::builder("user_data_quality")
63/// .level(Level::Error)
64/// .description("Validates user data quality")
65/// .build();
66/// ```
67///
68/// ## Check with Constraints
69///
70/// ```rust
71/// use term_guard::core::{Check, Level, ConstraintOptions};
72/// use term_guard::constraints::{Assertion, UniquenessType, StatisticType, FormatType, FormatOptions};
73///
74/// let check = Check::builder("customer_validation")
75/// .level(Level::Error)
76/// .description("Ensure customer data integrity")
77/// // Completeness checks using unified API
78/// .completeness("customer_id", ConstraintOptions::new().with_threshold(1.0))
79/// .completeness("email", ConstraintOptions::new().with_threshold(0.99))
80/// // Uniqueness checks using unified API
81/// .validates_uniqueness(vec!["customer_id"], 1.0)
82/// .validates_uniqueness(vec!["email", "region"], 1.0)
83/// // Pattern validation using format
84/// .has_format("phone", FormatType::Regex(r"^\+?\d{3}-\d{3}-\d{4}$".to_string()), 0.95, FormatOptions::default())
85/// // Range checks using statistic
86/// .statistic("age", StatisticType::Min, Assertion::GreaterThanOrEqual(18.0))
87/// .statistic("age", StatisticType::Max, Assertion::LessThanOrEqual(120.0))
88/// .build();
89/// ```
90///
91/// ## Data Quality Check
92///
93/// ```rust
94/// use term_guard::core::{Check, Level};
95/// use term_guard::constraints::{Assertion, StatisticType};
96///
97/// let check = Check::builder("data_types_and_formats")
98/// .level(Level::Warning)
99/// // Ensure consistent data types using the unified API
100/// .has_consistent_data_type("order_date", 0.99)
101/// .has_consistent_data_type("product_id", 0.95)
102/// // String length validation
103/// .has_min_length("password", 8)
104/// .has_max_length("username", 20)
105/// // Check for PII
106/// .validates_credit_card("comments", 0.0, true) // Should be 0%
107/// .validates_email("email_field", 0.98)
108/// // Statistical checks
109/// .statistic("order_value", StatisticType::Mean, Assertion::Between(50.0, 500.0))
110/// .statistic("response_time", StatisticType::StandardDeviation, Assertion::LessThan(100.0))
111/// .build();
112/// ```
113///
114/// ## Enhanced Format Validation
115///
116/// ```rust
117/// use term_guard::core::{Check, Level};
118/// use term_guard::constraints::FormatOptions;
119///
120/// let check = Check::builder("enhanced_format_validation")
121/// .level(Level::Error)
122/// // Basic format validation
123/// .validates_email("email", 0.95)
124/// .validates_url("website", 0.90, false)
125/// .validates_phone("phone", 0.85, Some("US"))
126/// // Enhanced format validation with options
127/// .validates_email_with_options(
128/// "secondary_email",
129/// 0.80,
130/// FormatOptions::lenient() // Case insensitive, trimming, nulls allowed
131/// )
132/// .validates_url_with_options(
133/// "dev_url",
134/// 0.75,
135/// true, // allow localhost
136/// FormatOptions::case_insensitive().trim_before_check(true)
137/// )
138/// .validates_regex_with_options(
139/// "product_code",
140/// r"^[A-Z]{2}\d{4}$",
141/// 0.98,
142/// FormatOptions::strict() // Case sensitive, no nulls, no trimming
143/// )
144/// .build();
145/// ```
146#[derive(Debug, Clone)]
147pub struct Check {
148 /// The name of the check
149 name: String,
150 /// The severity level of the check
151 level: Level,
152 /// Optional description of what this check validates
153 description: Option<String>,
154 /// The constraints that make up this check
155 constraints: Vec<Arc<dyn Constraint>>,
156}
157
158impl Check {
159 /// Creates a new builder for constructing a check.
160 ///
161 /// # Arguments
162 ///
163 /// * `name` - The name of the check
164 ///
165 /// # Examples
166 ///
167 /// ```rust
168 /// use term_guard::core::Check;
169 ///
170 /// let builder = Check::builder("data_quality");
171 /// ```
172 pub fn builder(name: impl Into<String>) -> CheckBuilder {
173 CheckBuilder::new(name)
174 }
175
176 /// Returns the name of the check.
177 pub fn name(&self) -> &str {
178 &self.name
179 }
180
181 /// Returns the severity level of the check.
182 pub fn level(&self) -> Level {
183 self.level
184 }
185
186 /// Returns the description of the check if available.
187 pub fn description(&self) -> Option<&str> {
188 self.description.as_deref()
189 }
190
191 /// Returns the constraints in this check.
192 pub fn constraints(&self) -> &[Arc<dyn Constraint>] {
193 &self.constraints
194 }
195}
196
197/// Builder for constructing `Check` instances.
198///
199/// # Examples
200///
201/// ```rust
202/// use term_guard::core::{Check, Level};
203///
204/// let check = Check::builder("completeness_check")
205/// .level(Level::Error)
206/// .description("Ensures all required fields are present")
207/// .build();
208/// ```
209#[derive(Debug)]
210pub struct CheckBuilder {
211 name: String,
212 level: Level,
213 description: Option<String>,
214 constraints: Vec<Arc<dyn Constraint>>,
215}
216
217impl CheckBuilder {
218 /// Creates a new check builder with the given name.
219 pub fn new(name: impl Into<String>) -> Self {
220 Self {
221 name: name.into(),
222 level: Level::default(),
223 description: None,
224 constraints: Vec::new(),
225 }
226 }
227
228 /// Sets the severity level for the check.
229 ///
230 /// # Arguments
231 ///
232 /// * `level` - The severity level
233 ///
234 /// # Examples
235 ///
236 /// ```rust
237 /// use term_guard::core::{Check, Level};
238 ///
239 /// let check = Check::builder("critical_check")
240 /// .level(Level::Error)
241 /// .build();
242 /// ```
243 pub fn level(mut self, level: Level) -> Self {
244 self.level = level;
245 self
246 }
247
248 /// Sets the description for the check.
249 ///
250 /// # Arguments
251 ///
252 /// * `description` - A description of what this check validates
253 pub fn description(mut self, description: impl Into<String>) -> Self {
254 self.description = Some(description.into());
255 self
256 }
257
258 /// Adds a constraint to the check.
259 ///
260 /// # Arguments
261 ///
262 /// * `constraint` - The constraint to add
263 pub fn constraint(mut self, constraint: impl Constraint + 'static) -> Self {
264 self.constraints.push(Arc::new(constraint));
265 self
266 }
267
268 /// Adds a boxed constraint to the check.
269 ///
270 /// # Arguments
271 ///
272 /// * `constraint` - The boxed constraint to add
273 pub fn boxed_constraint(mut self, constraint: BoxedConstraint) -> Self {
274 self.constraints.push(Arc::from(constraint));
275 self
276 }
277
278 /// Adds an Arc constraint to the check.
279 ///
280 /// # Arguments
281 ///
282 /// * `constraint` - The Arc constraint to add
283 pub fn arc_constraint(mut self, constraint: Arc<dyn Constraint>) -> Self {
284 self.constraints.push(constraint);
285 self
286 }
287
288 /// Adds multiple constraints to the check.
289 ///
290 /// # Arguments
291 ///
292 /// * `constraints` - An iterator of constraints to add
293 pub fn constraints<I>(mut self, constraints: I) -> Self
294 where
295 I: IntoIterator<Item = BoxedConstraint>,
296 {
297 self.constraints
298 .extend(constraints.into_iter().map(Arc::from));
299 self
300 }
301
302 // Builder methods
303
304 /// Adds a constraint that checks the dataset size (row count).
305 ///
306 /// # Arguments
307 ///
308 /// * `assertion` - The assertion to evaluate against the row count
309 ///
310 /// # Examples
311 ///
312 /// ```rust
313 /// use term_guard::core::{Check, Level};
314 /// use term_guard::constraints::Assertion;
315 ///
316 /// let check = Check::builder("size_validation")
317 /// .level(Level::Error)
318 /// .has_size(Assertion::GreaterThan(1000.0))
319 /// .build();
320 /// ```
321 pub fn has_size(mut self, assertion: Assertion) -> Self {
322 self.constraints
323 .push(Arc::new(SizeConstraint::new(assertion)));
324 self
325 }
326
327 /// Adds a constraint that checks the number of columns in the dataset.
328 ///
329 /// This constraint validates that the dataset has the expected number of columns
330 /// by examining the schema.
331 ///
332 /// # Arguments
333 ///
334 /// * `assertion` - The assertion to evaluate against the column count
335 ///
336 /// # Examples
337 ///
338 /// ```rust
339 /// use term_guard::core::{Check, Level};
340 /// use term_guard::constraints::Assertion;
341 ///
342 /// let check = Check::builder("schema_validation")
343 /// .level(Level::Error)
344 /// .has_column_count(Assertion::Equals(15.0))
345 /// .has_column_count(Assertion::GreaterThanOrEqual(10.0))
346 /// .build();
347 /// ```
348 pub fn has_column_count(mut self, assertion: Assertion) -> Self {
349 self.constraints
350 .push(Arc::new(ColumnCountConstraint::new(assertion)));
351 self
352 }
353
354 /// Adds a constraint that checks the approximate count of distinct values in a column.
355 ///
356 /// Uses DataFusion's APPROX_DISTINCT function which provides an approximate count
357 /// using HyperLogLog algorithm. This is much faster than exact COUNT(DISTINCT)
358 /// for large datasets while maintaining accuracy within 2-3% error margin.
359 ///
360 /// # Arguments
361 ///
362 /// * `column` - The column to count distinct values in
363 /// * `assertion` - The assertion to evaluate against the approximate distinct count
364 ///
365 /// # Examples
366 ///
367 /// ```rust
368 /// use term_guard::core::{Check, Level};
369 /// use term_guard::constraints::Assertion;
370 ///
371 /// let check = Check::builder("cardinality_validation")
372 /// .level(Level::Warning)
373 /// // High cardinality check (e.g., user IDs)
374 /// .has_approx_count_distinct("user_id", Assertion::GreaterThan(1000000.0))
375 /// // Low cardinality check (e.g., country codes)
376 /// .has_approx_count_distinct("country_code", Assertion::LessThan(200.0))
377 /// .build();
378 /// ```
379 pub fn has_approx_count_distinct(
380 mut self,
381 column: impl Into<String>,
382 assertion: Assertion,
383 ) -> Self {
384 self.constraints
385 .push(Arc::new(ApproxCountDistinctConstraint::new(
386 column, assertion,
387 )));
388 self
389 }
390
391 /// Adds a constraint that checks an approximate quantile of a column.
392 ///
393 /// # Arguments
394 ///
395 /// * `column` - The column to check
396 /// * `quantile` - The quantile to compute (0.0 to 1.0)
397 /// * `assertion` - The assertion to evaluate against the quantile value
398 ///
399 /// # Examples
400 ///
401 /// ```rust
402 /// use term_guard::core::{Check, Level};
403 /// use term_guard::constraints::Assertion;
404 ///
405 /// let check = Check::builder("quantile_validation")
406 /// .level(Level::Warning)
407 /// .has_approx_quantile("response_time", 0.95, Assertion::LessThan(1000.0))
408 /// .build();
409 /// ```
410 ///
411 /// # Panics
412 ///
413 /// Panics if quantile is not between 0.0 and 1.0
414 pub fn has_approx_quantile(
415 mut self,
416 column: impl Into<String>,
417 quantile: f64,
418 assertion: Assertion,
419 ) -> Self {
420 self.constraints.push(Arc::new(
421 QuantileConstraint::percentile(column, quantile, assertion)
422 .expect("Invalid quantile parameters"),
423 ));
424 self
425 }
426
427 /// Adds a constraint that checks the mutual information between two columns.
428 ///
429 /// # Arguments
430 ///
431 /// * `column1` - The first column
432 /// * `column2` - The second column
433 /// * `assertion` - The assertion to evaluate against the mutual information
434 ///
435 /// # Examples
436 ///
437 /// ```rust
438 /// use term_guard::core::{Check, Level};
439 /// use term_guard::constraints::Assertion;
440 ///
441 /// let check = Check::builder("mi_validation")
442 /// .level(Level::Info)
443 /// .has_mutual_information("feature1", "feature2", Assertion::GreaterThan(0.5))
444 /// .build();
445 /// ```
446 pub fn has_mutual_information(
447 mut self,
448 column1: impl Into<String>,
449 column2: impl Into<String>,
450 assertion: Assertion,
451 ) -> Self {
452 self.constraints.push(Arc::new(
453 CorrelationConstraint::mutual_information(column1, column2, 10, assertion)
454 .expect("Invalid mutual information parameters"),
455 ));
456 self
457 }
458
459 /// Adds a constraint that checks the correlation between two columns.
460 ///
461 /// # Arguments
462 ///
463 /// * `column1` - The first column
464 /// * `column2` - The second column
465 /// * `assertion` - The assertion to evaluate against the correlation
466 ///
467 /// # Examples
468 ///
469 /// ```rust
470 /// use term_guard::core::{Check, Level};
471 /// use term_guard::constraints::Assertion;
472 ///
473 /// let check = Check::builder("correlation_validation")
474 /// .level(Level::Warning)
475 /// .has_correlation("height", "weight", Assertion::GreaterThan(0.7))
476 /// .build();
477 /// ```
478 pub fn has_correlation(
479 mut self,
480 column1: impl Into<String>,
481 column2: impl Into<String>,
482 assertion: Assertion,
483 ) -> Self {
484 self.constraints.push(Arc::new(
485 CorrelationConstraint::pearson(column1, column2, assertion)
486 .expect("Invalid correlation parameters"),
487 ));
488 self
489 }
490
491 /// Adds a constraint that checks minimum string length.
492 ///
493 /// **Note**: Consider using the new `length()` method for more flexibility.
494 ///
495 /// # Arguments
496 ///
497 /// * `column` - The column to check
498 /// * `min_length` - The minimum acceptable string length
499 ///
500 /// # Examples
501 ///
502 /// ```rust
503 /// use term_guard::core::{Check, Level};
504 /// use term_guard::constraints::LengthAssertion;
505 ///
506 /// // Using the convenience method:
507 /// let check = Check::builder("password_validation")
508 /// .level(Level::Error)
509 /// .has_min_length("password", 8)
510 /// .build();
511 ///
512 /// // Or using the unified length API:
513 /// let check = Check::builder("password_validation")
514 /// .level(Level::Error)
515 /// .length("password", LengthAssertion::Min(8))
516 /// .build();
517 /// ```
518 pub fn has_min_length(mut self, column: impl Into<String>, min_length: usize) -> Self {
519 use crate::constraints::LengthConstraint;
520 self.constraints
521 .push(Arc::new(LengthConstraint::min(column, min_length)));
522 self
523 }
524
525 /// Adds a constraint that checks maximum string length.
526 ///
527 /// # Arguments
528 ///
529 /// * `column` - The column to check
530 /// * `max_length` - The maximum acceptable string length
531 ///
532 /// # Examples
533 ///
534 /// ```rust
535 /// use term_guard::core::{Check, Level};
536 ///
537 /// let check = Check::builder("username_validation")
538 /// .level(Level::Error)
539 /// .has_max_length("username", 20)
540 /// .build();
541 /// ```
542 pub fn has_max_length(mut self, column: impl Into<String>, max_length: usize) -> Self {
543 use crate::constraints::LengthConstraint;
544 self.constraints
545 .push(Arc::new(LengthConstraint::max(column, max_length)));
546 self
547 }
548
549 /// Adds a constraint that checks string length is between bounds (inclusive).
550 ///
551 /// # Arguments
552 ///
553 /// * `column` - The column to check
554 /// * `min_length` - The minimum acceptable string length
555 /// * `max_length` - The maximum acceptable string length
556 ///
557 /// # Examples
558 ///
559 /// ```rust
560 /// use term_guard::core::{Check, Level};
561 ///
562 /// let check = Check::builder("description_validation")
563 /// .level(Level::Warning)
564 /// .has_length_between("description", 10, 500)
565 /// .build();
566 /// ```
567 pub fn has_length_between(
568 mut self,
569 column: impl Into<String>,
570 min_length: usize,
571 max_length: usize,
572 ) -> Self {
573 use crate::constraints::LengthConstraint;
574 self.constraints.push(Arc::new(LengthConstraint::between(
575 column, min_length, max_length,
576 )));
577 self
578 }
579
580 /// Adds a constraint that checks string has exact length.
581 ///
582 /// # Arguments
583 ///
584 /// * `column` - The column to check
585 /// * `length` - The required exact string length
586 ///
587 /// # Examples
588 ///
589 /// ```rust
590 /// use term_guard::core::{Check, Level};
591 ///
592 /// let check = Check::builder("code_validation")
593 /// .level(Level::Error)
594 /// .has_exact_length("verification_code", 6)
595 /// .build();
596 /// ```
597 pub fn has_exact_length(mut self, column: impl Into<String>, length: usize) -> Self {
598 use crate::constraints::LengthConstraint;
599 self.constraints
600 .push(Arc::new(LengthConstraint::exactly(column, length)));
601 self
602 }
603
604 /// Adds a constraint that checks strings are not empty.
605 ///
606 /// # Arguments
607 ///
608 /// * `column` - The column to check
609 ///
610 /// # Examples
611 ///
612 /// ```rust
613 /// use term_guard::core::{Check, Level};
614 ///
615 /// let check = Check::builder("name_validation")
616 /// .level(Level::Error)
617 /// .is_not_empty("name")
618 /// .build();
619 /// ```
620 pub fn is_not_empty(mut self, column: impl Into<String>) -> Self {
621 use crate::constraints::LengthConstraint;
622 self.constraints
623 .push(Arc::new(LengthConstraint::not_empty(column)));
624 self
625 }
626
627 /// Adds a constraint that checks data type consistency.
628 ///
629 /// This analyzes the actual data types present in a column and reports on consistency,
630 /// helping identify columns with mixed types.
631 ///
632 /// # Arguments
633 ///
634 /// * `column` - The column to check
635 /// * `threshold` - The minimum ratio of values that must have the most common type (0.0 to 1.0)
636 ///
637 /// # Examples
638 ///
639 /// ```rust
640 /// use term_guard::core::{Check, Level};
641 ///
642 /// let check = Check::builder("consistency_validation")
643 /// .level(Level::Warning)
644 /// .has_consistent_data_type("user_id", 0.95)
645 /// .build();
646 /// ```
647 ///
648 /// # Panics
649 ///
650 /// Panics if threshold is not between 0.0 and 1.0
651 pub fn has_consistent_data_type(mut self, column: impl Into<String>, threshold: f64) -> Self {
652 self.constraints.push(Arc::new(
653 DataTypeConstraint::type_consistency(column, threshold)
654 .expect("Invalid data type consistency parameters"),
655 ));
656 self
657 }
658
659 /// Adds a constraint that evaluates a custom SQL expression.
660 ///
661 /// This allows users to define custom validation logic using SQL expressions.
662 /// The expression should evaluate to a boolean value for each row.
663 /// For safety, the expression cannot contain data-modifying operations.
664 ///
665 /// # Arguments
666 ///
667 /// * `sql_expression` - The SQL expression to evaluate (must return boolean)
668 /// * `hint` - Optional hint message to provide context when the constraint fails
669 ///
670 /// # Examples
671 ///
672 /// ```rust
673 /// use term_guard::core::{Check, Level};
674 ///
675 /// let check = Check::builder("business_rules")
676 /// .level(Level::Error)
677 /// .satisfies("price > 0 AND price < 1000000", Some("Price must be positive and reasonable"))
678 /// .satisfies("order_date <= ship_date", Some("Orders cannot ship before being placed"))
679 /// .build();
680 /// ```
681 ///
682 /// # Panics
683 ///
684 /// Panics if the SQL expression contains dangerous operations like DROP, DELETE, UPDATE, etc.
685 pub fn satisfies(
686 mut self,
687 sql_expression: impl Into<String>,
688 hint: Option<impl Into<String>>,
689 ) -> Self {
690 self.constraints.push(Arc::new(
691 CustomSqlConstraint::new(sql_expression, hint).expect("Invalid SQL expression"),
692 ));
693 self
694 }
695
696 /// Adds a constraint that analyzes value distribution and applies custom assertions.
697 ///
698 /// This constraint computes a histogram of value frequencies in the specified column
699 /// and allows custom assertion functions to validate distribution characteristics.
700 ///
701 /// # Arguments
702 ///
703 /// * `column` - The column to analyze
704 /// * `assertion` - The assertion function to apply to the histogram
705 ///
706 /// # Examples
707 ///
708 /// ```rust
709 /// use term_guard::core::{Check, Level};
710 /// use term_guard::constraints::Histogram;
711 /// use std::sync::Arc;
712 ///
713 /// let check = Check::builder("distribution_validation")
714 /// .level(Level::Warning)
715 /// // No single value should dominate
716 /// .has_histogram("status", Arc::new(|hist: &Histogram| {
717 /// hist.most_common_ratio() < 0.5
718 /// }))
719 /// // Check expected number of categories
720 /// .has_histogram("category", Arc::new(|hist| {
721 /// hist.bucket_count() >= 5 && hist.bucket_count() <= 10
722 /// }))
723 /// .build();
724 /// ```
725 pub fn has_histogram(
726 mut self,
727 column: impl Into<String>,
728 assertion: HistogramAssertion,
729 ) -> Self {
730 self.constraints
731 .push(Arc::new(HistogramConstraint::new(column, assertion)));
732 self
733 }
734
735 /// Adds a constraint that analyzes value distribution with a custom description.
736 ///
737 /// This is similar to `has_histogram` but allows providing a description of what
738 /// the assertion checks, which is useful for error messages.
739 ///
740 /// # Arguments
741 ///
742 /// * `column` - The column to analyze
743 /// * `assertion` - The assertion function to apply to the histogram
744 /// * `description` - A description of what the assertion checks
745 ///
746 /// # Examples
747 ///
748 /// ```rust
749 /// use term_guard::core::{Check, Level};
750 /// use term_guard::constraints::Histogram;
751 /// use std::sync::Arc;
752 ///
753 /// let check = Check::builder("distribution_validation")
754 /// .level(Level::Error)
755 /// .has_histogram_with_description(
756 /// "age",
757 /// Arc::new(|hist: &Histogram| hist.is_roughly_uniform(2.0)),
758 /// "age distribution is roughly uniform"
759 /// )
760 /// .build();
761 /// ```
762 pub fn has_histogram_with_description(
763 mut self,
764 column: impl Into<String>,
765 assertion: HistogramAssertion,
766 description: impl Into<String>,
767 ) -> Self {
768 self.constraints
769 .push(Arc::new(HistogramConstraint::new_with_description(
770 column,
771 assertion,
772 description,
773 )));
774 self
775 }
776
777 // ========================================================================
778 // NEW UNIFIED FORMAT VALIDATION METHODS
779 // ========================================================================
780
781 /// Adds a general format validation constraint with full configuration options.
782 ///
783 /// This is the most flexible format validation method, supporting all format types
784 /// and configuration options through the unified FormatConstraint API.
785 ///
786 /// # Arguments
787 ///
788 /// * `column` - The column to validate
789 /// * `format` - The format type to validate against
790 /// * `threshold` - The minimum ratio of values that must match (0.0 to 1.0)
791 /// * `options` - Configuration options for the validation
792 ///
793 /// # Examples
794 ///
795 /// ```rust
796 /// use term_guard::core::{Check, Level};
797 /// use term_guard::constraints::{FormatType, FormatOptions};
798 ///
799 /// let check = Check::builder("format_validation")
800 /// .level(Level::Error)
801 /// // Custom regex with case-insensitive matching
802 /// .has_format(
803 /// "phone",
804 /// FormatType::Regex(r"^\+?\d{3}-\d{3}-\d{4}$".to_string()),
805 /// 0.95,
806 /// FormatOptions::default().case_sensitive(false).trim_before_check(true)
807 /// )
808 /// // Email validation with custom options
809 /// .has_format(
810 /// "email",
811 /// FormatType::Email,
812 /// 0.99,
813 /// FormatOptions::default().null_is_valid(true)
814 /// )
815 /// // UUID validation
816 /// .has_format(
817 /// "user_id",
818 /// FormatType::UUID,
819 /// 1.0,
820 /// FormatOptions::default()
821 /// )
822 /// .build();
823 /// ```
824 ///
825 /// # Errors
826 ///
827 /// Returns error if column name is invalid, threshold is out of range,
828 /// or regex pattern is invalid.
829 pub fn has_format(
830 mut self,
831 column: impl Into<String>,
832 format: FormatType,
833 threshold: f64,
834 options: FormatOptions,
835 ) -> Self {
836 self.constraints.push(Arc::new(
837 FormatConstraint::new(column, format, threshold, options)
838 .expect("Invalid column, format, threshold, or options"),
839 ));
840 self
841 }
842
843 /// Adds a regex pattern validation constraint.
844 ///
845 /// This is a convenience method for `has_format()` with `FormatType::Regex`.
846 ///
847 /// # Arguments
848 ///
849 /// * `column` - The column to validate
850 /// * `pattern` - The regular expression pattern
851 /// * `threshold` - The minimum ratio of values that must match (0.0 to 1.0)
852 ///
853 /// # Examples
854 ///
855 /// ```rust
856 /// use term_guard::core::{Check, Level};
857 ///
858 /// let check = Check::builder("regex_validation")
859 /// .level(Level::Error)
860 /// .validates_regex("phone", r"^\+?\d{3}-\d{3}-\d{4}$", 0.95)
861 /// .validates_regex("product_code", r"^[A-Z]{2}\d{6}$", 1.0)
862 /// .build();
863 /// ```
864 ///
865 /// # Errors
866 ///
867 /// Returns error if column name is invalid, threshold is out of range,
868 /// or regex pattern is invalid.
869 pub fn validates_regex(
870 mut self,
871 column: impl Into<String>,
872 pattern: impl Into<String>,
873 threshold: f64,
874 ) -> Self {
875 self.constraints.push(Arc::new(
876 FormatConstraint::regex(column, pattern, threshold)
877 .expect("Invalid column, pattern, or threshold"),
878 ));
879 self
880 }
881
882 /// Adds an email address validation constraint.
883 ///
884 /// This is a convenience method for `has_format()` with `FormatType::Email`.
885 ///
886 /// # Arguments
887 ///
888 /// * `column` - The column to validate
889 /// * `threshold` - The minimum ratio of values that must be valid emails (0.0 to 1.0)
890 ///
891 /// # Examples
892 ///
893 /// ```rust
894 /// use term_guard::core::{Check, Level};
895 ///
896 /// let check = Check::builder("email_validation")
897 /// .level(Level::Error)
898 /// .validates_email("primary_email", 0.99)
899 /// .validates_email("secondary_email", 0.80)
900 /// .build();
901 /// ```
902 ///
903 /// # Errors
904 ///
905 /// Returns error if column name is invalid or threshold is out of range.
906 pub fn validates_email(mut self, column: impl Into<String>, threshold: f64) -> Self {
907 self.constraints.push(Arc::new(
908 FormatConstraint::email(column, threshold).expect("Invalid column or threshold"),
909 ));
910 self
911 }
912
913 /// Adds a URL validation constraint.
914 ///
915 /// This is a convenience method for `has_format()` with `FormatType::Url`.
916 ///
917 /// # Arguments
918 ///
919 /// * `column` - The column to validate
920 /// * `threshold` - The minimum ratio of values that must be valid URLs (0.0 to 1.0)
921 /// * `allow_localhost` - Whether to allow localhost URLs
922 ///
923 /// # Examples
924 ///
925 /// ```rust
926 /// use term_guard::core::{Check, Level};
927 ///
928 /// let check = Check::builder("url_validation")
929 /// .level(Level::Error)
930 /// .validates_url("website", 0.90, false)
931 /// .validates_url("dev_endpoint", 0.80, true)
932 /// .build();
933 /// ```
934 ///
935 /// # Errors
936 ///
937 /// Returns error if column name is invalid or threshold is out of range.
938 pub fn validates_url(
939 mut self,
940 column: impl Into<String>,
941 threshold: f64,
942 allow_localhost: bool,
943 ) -> Self {
944 self.constraints.push(Arc::new(
945 FormatConstraint::url(column, threshold, allow_localhost)
946 .expect("Invalid column or threshold"),
947 ));
948 self
949 }
950
951 /// Adds a credit card number detection constraint.
952 ///
953 /// This is a convenience method for `has_format()` with `FormatType::CreditCard`.
954 /// Note: For PII detection, you typically want a low threshold (e.g., 0.01 or 0.05)
955 /// to catch any potential credit card numbers.
956 ///
957 /// # Arguments
958 ///
959 /// * `column` - The column to validate
960 /// * `threshold` - The maximum ratio of values that can be credit card numbers (0.0 to 1.0)
961 /// * `detect_only` - If true, optimizes for detection; if false, for validation
962 ///
963 /// # Examples
964 ///
965 /// ```rust
966 /// use term_guard::core::{Check, Level};
967 ///
968 /// let check = Check::builder("pii_detection")
969 /// .level(Level::Error)
970 /// // PII detection - should find very few or no credit cards
971 /// .validates_credit_card("comments", 0.01, true)
972 /// .validates_credit_card("description", 0.0, true)
973 /// // Credit card validation - most values should be valid credit cards
974 /// .validates_credit_card("payment_info", 0.95, false)
975 /// .build();
976 /// ```
977 ///
978 /// # Errors
979 ///
980 /// Returns error if column name is invalid or threshold is out of range.
981 pub fn validates_credit_card(
982 mut self,
983 column: impl Into<String>,
984 threshold: f64,
985 detect_only: bool,
986 ) -> Self {
987 self.constraints.push(Arc::new(
988 FormatConstraint::credit_card(column, threshold, detect_only)
989 .expect("Invalid column or threshold"),
990 ));
991 self
992 }
993
994 /// Adds a phone number validation constraint.
995 ///
996 /// This is a convenience method for `has_format()` with `FormatType::Phone`.
997 ///
998 /// # Arguments
999 ///
1000 /// * `column` - The column to validate
1001 /// * `threshold` - The minimum ratio of values that must be valid phone numbers (0.0 to 1.0)
1002 /// * `country` - Optional country code for country-specific validation (e.g., "US", "CA")
1003 ///
1004 /// # Examples
1005 ///
1006 /// ```rust
1007 /// use term_guard::core::{Check, Level};
1008 ///
1009 /// let check = Check::builder("phone_validation")
1010 /// .level(Level::Error)
1011 /// .validates_phone("phone", 0.95, Some("US"))
1012 /// .validates_phone("international_phone", 0.90, None)
1013 /// .build();
1014 /// ```
1015 ///
1016 /// # Errors
1017 ///
1018 /// Returns error if column name is invalid or threshold is out of range.
1019 pub fn validates_phone(
1020 mut self,
1021 column: impl Into<String>,
1022 threshold: f64,
1023 country: Option<&str>,
1024 ) -> Self {
1025 self.constraints.push(Arc::new(
1026 FormatConstraint::phone(column, threshold, country.map(|s| s.to_string()))
1027 .expect("Invalid column or threshold"),
1028 ));
1029 self
1030 }
1031
1032 /// Adds a postal code validation constraint.
1033 ///
1034 /// This is a convenience method for `has_format()` with `FormatType::PostalCode`.
1035 ///
1036 /// # Arguments
1037 ///
1038 /// * `column` - The column to validate
1039 /// * `threshold` - The minimum ratio of values that must be valid postal codes (0.0 to 1.0)
1040 /// * `country` - Country code for country-specific validation (e.g., "US", "CA", "UK")
1041 ///
1042 /// # Examples
1043 ///
1044 /// ```rust
1045 /// use term_guard::core::{Check, Level};
1046 ///
1047 /// let check = Check::builder("postal_code_validation")
1048 /// .level(Level::Error)
1049 /// .validates_postal_code("zip_code", 0.98, "US")
1050 /// .validates_postal_code("postal_code", 0.95, "CA")
1051 /// .build();
1052 /// ```
1053 ///
1054 /// # Errors
1055 ///
1056 /// Returns error if column name is invalid or threshold is out of range.
1057 pub fn validates_postal_code(
1058 mut self,
1059 column: impl Into<String>,
1060 threshold: f64,
1061 country: &str,
1062 ) -> Self {
1063 self.constraints.push(Arc::new(
1064 FormatConstraint::postal_code(column, threshold, country)
1065 .expect("Invalid column or threshold"),
1066 ));
1067 self
1068 }
1069
1070 /// Adds a UUID validation constraint.
1071 ///
1072 /// This is a convenience method for `has_format()` with `FormatType::UUID`.
1073 ///
1074 /// # Arguments
1075 ///
1076 /// * `column` - The column to validate
1077 /// * `threshold` - The minimum ratio of values that must be valid UUIDs (0.0 to 1.0)
1078 ///
1079 /// # Examples
1080 ///
1081 /// ```rust
1082 /// use term_guard::core::{Check, Level};
1083 ///
1084 /// let check = Check::builder("uuid_validation")
1085 /// .level(Level::Error)
1086 /// .validates_uuid("user_id", 1.0)
1087 /// .validates_uuid("session_id", 0.99)
1088 /// .build();
1089 /// ```
1090 ///
1091 /// # Errors
1092 ///
1093 /// Returns error if column name is invalid or threshold is out of range.
1094 pub fn validates_uuid(mut self, column: impl Into<String>, threshold: f64) -> Self {
1095 self.constraints.push(Arc::new(
1096 FormatConstraint::uuid(column, threshold).expect("Invalid column or threshold"),
1097 ));
1098 self
1099 }
1100
1101 /// Adds an IPv4 address validation constraint.
1102 ///
1103 /// This is a convenience method for `has_format()` with `FormatType::IPv4`.
1104 ///
1105 /// # Arguments
1106 ///
1107 /// * `column` - The column to validate
1108 /// * `threshold` - The minimum ratio of values that must be valid IPv4 addresses (0.0 to 1.0)
1109 ///
1110 /// # Examples
1111 ///
1112 /// ```rust
1113 /// use term_guard::core::{Check, Level};
1114 ///
1115 /// let check = Check::builder("ip_validation")
1116 /// .level(Level::Error)
1117 /// .validates_ipv4("client_ip", 0.98)
1118 /// .validates_ipv4("server_ip", 1.0)
1119 /// .build();
1120 /// ```
1121 ///
1122 /// # Errors
1123 ///
1124 /// Returns error if column name is invalid or threshold is out of range.
1125 pub fn validates_ipv4(mut self, column: impl Into<String>, threshold: f64) -> Self {
1126 self.constraints.push(Arc::new(
1127 FormatConstraint::ipv4(column, threshold).expect("Invalid column or threshold"),
1128 ));
1129 self
1130 }
1131
1132 /// Adds an IPv6 address validation constraint.
1133 ///
1134 /// This is a convenience method for `has_format()` with `FormatType::IPv6`.
1135 ///
1136 /// # Arguments
1137 ///
1138 /// * `column` - The column to validate
1139 /// * `threshold` - The minimum ratio of values that must be valid IPv6 addresses (0.0 to 1.0)
1140 ///
1141 /// # Examples
1142 ///
1143 /// ```rust
1144 /// use term_guard::core::{Check, Level};
1145 ///
1146 /// let check = Check::builder("ipv6_validation")
1147 /// .level(Level::Error)
1148 /// .validates_ipv6("client_ipv6", 0.95)
1149 /// .build();
1150 /// ```
1151 ///
1152 /// # Errors
1153 ///
1154 /// Returns error if column name is invalid or threshold is out of range.
1155 pub fn validates_ipv6(mut self, column: impl Into<String>, threshold: f64) -> Self {
1156 self.constraints.push(Arc::new(
1157 FormatConstraint::ipv6(column, threshold).expect("Invalid column or threshold"),
1158 ));
1159 self
1160 }
1161
1162 /// Adds a JSON format validation constraint.
1163 ///
1164 /// This is a convenience method for `has_format()` with `FormatType::Json`.
1165 ///
1166 /// # Arguments
1167 ///
1168 /// * `column` - The column to validate
1169 /// * `threshold` - The minimum ratio of values that must be valid JSON (0.0 to 1.0)
1170 ///
1171 /// # Examples
1172 ///
1173 /// ```rust
1174 /// use term_guard::core::{Check, Level};
1175 ///
1176 /// let check = Check::builder("json_validation")
1177 /// .level(Level::Error)
1178 /// .validates_json("metadata", 0.99)
1179 /// .validates_json("config", 1.0)
1180 /// .build();
1181 /// ```
1182 ///
1183 /// # Errors
1184 ///
1185 /// Returns error if column name is invalid or threshold is out of range.
1186 pub fn validates_json(mut self, column: impl Into<String>, threshold: f64) -> Self {
1187 self.constraints.push(Arc::new(
1188 FormatConstraint::json(column, threshold).expect("Invalid column or threshold"),
1189 ));
1190 self
1191 }
1192
1193 /// Adds an ISO 8601 datetime validation constraint.
1194 ///
1195 /// This is a convenience method for `has_format()` with `FormatType::Iso8601DateTime`.
1196 ///
1197 /// # Arguments
1198 ///
1199 /// * `column` - The column to validate
1200 /// * `threshold` - The minimum ratio of values that must be valid ISO 8601 datetimes (0.0 to 1.0)
1201 ///
1202 /// # Examples
1203 ///
1204 /// ```rust
1205 /// use term_guard::core::{Check, Level};
1206 ///
1207 /// let check = Check::builder("datetime_validation")
1208 /// .level(Level::Error)
1209 /// .validates_iso8601_datetime("order_date", 1.0)
1210 /// .validates_iso8601_datetime("modified_date", 0.98)
1211 /// .build();
1212 /// ```
1213 ///
1214 /// # Errors
1215 ///
1216 /// Returns error if column name is invalid or threshold is out of range.
1217 pub fn validates_iso8601_datetime(mut self, column: impl Into<String>, threshold: f64) -> Self {
1218 self.constraints.push(Arc::new(
1219 FormatConstraint::iso8601_datetime(column, threshold)
1220 .expect("Invalid column or threshold"),
1221 ));
1222 self
1223 }
1224
1225 // ========================================================================
1226 // ENHANCED FORMAT VALIDATION METHODS WITH OPTIONS
1227 // ========================================================================
1228
1229 /// Adds an enhanced email validation constraint with configurable options.
1230 ///
1231 /// This method provides more control than `validates_email()` by supporting
1232 /// case sensitivity, whitespace trimming, and null handling options.
1233 ///
1234 /// # Arguments
1235 ///
1236 /// * `column` - The column to validate
1237 /// * `threshold` - The minimum ratio of values that must be valid emails (0.0 to 1.0)
1238 /// * `options` - Format validation options
1239 ///
1240 /// # Examples
1241 ///
1242 /// ```rust
1243 /// use term_guard::core::{Check, Level};
1244 /// use term_guard::constraints::FormatOptions;
1245 ///
1246 /// let check = Check::builder("enhanced_email_validation")
1247 /// .level(Level::Error)
1248 /// // Case-insensitive email validation with trimming
1249 /// .validates_email_with_options(
1250 /// "email",
1251 /// 0.95,
1252 /// FormatOptions::new()
1253 /// .case_sensitive(false)
1254 /// .trim_before_check(true)
1255 /// .null_is_valid(false)
1256 /// )
1257 /// .build();
1258 /// ```
1259 pub fn validates_email_with_options(
1260 mut self,
1261 column: impl Into<String>,
1262 threshold: f64,
1263 options: FormatOptions,
1264 ) -> Self {
1265 self.constraints.push(Arc::new(
1266 FormatConstraint::new(column, FormatType::Email, threshold, options)
1267 .expect("Invalid column, threshold, or options"),
1268 ));
1269 self
1270 }
1271
1272 /// Adds an enhanced URL validation constraint with configurable options.
1273 ///
1274 /// This method provides more control than `validates_url()` by supporting
1275 /// case sensitivity, whitespace trimming, and null handling options.
1276 ///
1277 /// # Arguments
1278 ///
1279 /// * `column` - The column to validate
1280 /// * `threshold` - The minimum ratio of values that must be valid URLs (0.0 to 1.0)
1281 /// * `allow_localhost` - Whether to allow localhost URLs
1282 /// * `options` - Format validation options
1283 ///
1284 /// # Examples
1285 ///
1286 /// ```rust
1287 /// use term_guard::core::{Check, Level};
1288 /// use term_guard::constraints::FormatOptions;
1289 ///
1290 /// let check = Check::builder("enhanced_url_validation")
1291 /// .level(Level::Error)
1292 /// // Case-insensitive URL validation with trimming
1293 /// .validates_url_with_options(
1294 /// "website",
1295 /// 0.90,
1296 /// true, // allow localhost
1297 /// FormatOptions::new()
1298 /// .case_sensitive(false)
1299 /// .trim_before_check(true)
1300 /// )
1301 /// .build();
1302 /// ```
1303 pub fn validates_url_with_options(
1304 mut self,
1305 column: impl Into<String>,
1306 threshold: f64,
1307 allow_localhost: bool,
1308 options: FormatOptions,
1309 ) -> Self {
1310 self.constraints.push(Arc::new(
1311 FormatConstraint::new(
1312 column,
1313 FormatType::Url { allow_localhost },
1314 threshold,
1315 options,
1316 )
1317 .expect("Invalid column, threshold, or options"),
1318 ));
1319 self
1320 }
1321
1322 /// Adds an enhanced phone number validation constraint with configurable options.
1323 ///
1324 /// This method provides more control than `validates_phone()` by supporting
1325 /// case sensitivity, whitespace trimming, and null handling options.
1326 ///
1327 /// # Arguments
1328 ///
1329 /// * `column` - The column to validate
1330 /// * `threshold` - The minimum ratio of values that must be valid phone numbers (0.0 to 1.0)
1331 /// * `country` - Optional country code for region-specific validation
1332 /// * `options` - Format validation options
1333 ///
1334 /// # Examples
1335 ///
1336 /// ```rust
1337 /// use term_guard::core::{Check, Level};
1338 /// use term_guard::constraints::FormatOptions;
1339 ///
1340 /// let check = Check::builder("enhanced_phone_validation")
1341 /// .level(Level::Error)
1342 /// // US phone validation with trimming
1343 /// .validates_phone_with_options(
1344 /// "phone",
1345 /// 0.95,
1346 /// Some("US".to_string()),
1347 /// FormatOptions::new().trim_before_check(true)
1348 /// )
1349 /// .build();
1350 /// ```
1351 pub fn validates_phone_with_options(
1352 mut self,
1353 column: impl Into<String>,
1354 threshold: f64,
1355 country: Option<String>,
1356 options: FormatOptions,
1357 ) -> Self {
1358 self.constraints.push(Arc::new(
1359 FormatConstraint::new(column, FormatType::Phone { country }, threshold, options)
1360 .expect("Invalid column, threshold, or options"),
1361 ));
1362 self
1363 }
1364
1365 /// Adds an enhanced regex pattern validation constraint with configurable options.
1366 ///
1367 /// This method provides more control than `validates_regex()` by supporting
1368 /// case sensitivity, whitespace trimming, and null handling options.
1369 ///
1370 /// # Arguments
1371 ///
1372 /// * `column` - The column to validate
1373 /// * `pattern` - The regular expression pattern
1374 /// * `threshold` - The minimum ratio of values that must match (0.0 to 1.0)
1375 /// * `options` - Format validation options
1376 ///
1377 /// # Examples
1378 ///
1379 /// ```rust
1380 /// use term_guard::core::{Check, Level};
1381 /// use term_guard::constraints::FormatOptions;
1382 ///
1383 /// let check = Check::builder("enhanced_regex_validation")
1384 /// .level(Level::Error)
1385 /// // Case-insensitive product code validation
1386 /// .validates_regex_with_options(
1387 /// "product_code",
1388 /// r"^[A-Z]{2}\d{4}$",
1389 /// 0.98,
1390 /// FormatOptions::new()
1391 /// .case_sensitive(false)
1392 /// .trim_before_check(true)
1393 /// )
1394 /// .build();
1395 /// ```
1396 pub fn validates_regex_with_options(
1397 mut self,
1398 column: impl Into<String>,
1399 pattern: impl Into<String>,
1400 threshold: f64,
1401 options: FormatOptions,
1402 ) -> Self {
1403 self.constraints.push(Arc::new(
1404 FormatConstraint::new(
1405 column,
1406 FormatType::Regex(pattern.into()),
1407 threshold,
1408 options,
1409 )
1410 .expect("Invalid column, pattern, threshold, or options"),
1411 ));
1412 self
1413 }
1414
1415 // ========================================================================
1416 // NEW UNIFIED API METHODS
1417 // ========================================================================
1418
1419 /// Adds a unified uniqueness constraint with full control over validation type and options.
1420 ///
1421 /// This method provides a comprehensive alternative to `is_unique`, `are_unique`, `has_uniqueness`,
1422 /// `is_primary_key`, `has_distinctness`, and `has_unique_value_ratio` by supporting all uniqueness
1423 /// validation types with flexible configuration options.
1424 ///
1425 /// # Arguments
1426 ///
1427 /// * `columns` - The column(s) to check (single string, vec, or array)
1428 /// * `uniqueness_type` - The type of uniqueness validation to perform
1429 /// * `options` - Configuration options for null handling, case sensitivity, etc.
1430 ///
1431 /// # Examples
1432 ///
1433 /// ```rust
1434 /// use term_guard::core::{Check, Level};
1435 /// use term_guard::constraints::{UniquenessType, UniquenessOptions, NullHandling, Assertion};
1436 ///
1437 /// let check = Check::builder("unified_uniqueness")
1438 /// // Full uniqueness with threshold
1439 /// .uniqueness(
1440 /// vec!["user_id"],
1441 /// UniquenessType::FullUniqueness { threshold: 1.0 },
1442 /// UniquenessOptions::default()
1443 /// )
1444 /// // Primary key validation
1445 /// .uniqueness(
1446 /// vec!["order_id", "line_item_id"],
1447 /// UniquenessType::PrimaryKey,
1448 /// UniquenessOptions::default()
1449 /// )
1450 /// // Distinctness check
1451 /// .uniqueness(
1452 /// vec!["category"],
1453 /// UniquenessType::Distinctness(Assertion::LessThan(0.1)),
1454 /// UniquenessOptions::default()
1455 /// )
1456 /// // Unique value ratio
1457 /// .uniqueness(
1458 /// vec!["transaction_id"],
1459 /// UniquenessType::UniqueValueRatio(Assertion::GreaterThan(0.99)),
1460 /// UniquenessOptions::default()
1461 /// )
1462 /// // Composite uniqueness with null handling
1463 /// .uniqueness(
1464 /// vec!["email", "domain"],
1465 /// UniquenessType::UniqueComposite {
1466 /// threshold: 0.95,
1467 /// null_handling: NullHandling::Exclude,
1468 /// case_sensitive: false
1469 /// },
1470 /// UniquenessOptions::new()
1471 /// .with_null_handling(NullHandling::Exclude)
1472 /// .case_sensitive(false)
1473 /// )
1474 /// .build();
1475 /// ```
1476 ///
1477 /// # Errors
1478 ///
1479 /// Returns error if column names are invalid or thresholds are out of range.
1480 pub fn uniqueness<I, S>(
1481 mut self,
1482 columns: I,
1483 uniqueness_type: UniquenessType,
1484 options: UniquenessOptions,
1485 ) -> Self
1486 where
1487 I: IntoIterator<Item = S>,
1488 S: Into<String>,
1489 {
1490 self.constraints.push(Arc::new(
1491 UniquenessConstraint::new(columns, uniqueness_type, options)
1492 .expect("Invalid columns, uniqueness type, or options"),
1493 ));
1494 self
1495 }
1496
1497 /// Adds a full uniqueness constraint for single or multiple columns.
1498 ///
1499 /// This is a convenience method for `uniqueness()` with `UniquenessType::FullUniqueness`.
1500 ///
1501 /// # Arguments
1502 ///
1503 /// * `columns` - The column(s) to check for uniqueness
1504 /// * `threshold` - The minimum acceptable uniqueness ratio (0.0 to 1.0)
1505 ///
1506 /// # Examples
1507 ///
1508 /// ```rust
1509 /// use term_guard::core::{Check, Level};
1510 ///
1511 /// let check = Check::builder("uniqueness_validation")
1512 /// .level(Level::Error)
1513 /// .validates_uniqueness(vec!["user_id"], 1.0)
1514 /// .validates_uniqueness(vec!["email", "domain"], 0.95)
1515 /// .build();
1516 /// ```
1517 ///
1518 /// # Errors
1519 ///
1520 /// Returns error if column names are invalid or threshold is out of range.
1521 pub fn validates_uniqueness<I, S>(mut self, columns: I, threshold: f64) -> Self
1522 where
1523 I: IntoIterator<Item = S>,
1524 S: Into<String>,
1525 {
1526 self.constraints.push(Arc::new(
1527 UniquenessConstraint::new(
1528 columns,
1529 UniquenessType::FullUniqueness { threshold },
1530 UniquenessOptions::default(),
1531 )
1532 .expect("Invalid columns or threshold"),
1533 ));
1534 self
1535 }
1536
1537 /// Adds a distinctness constraint with assertion-based validation.
1538 ///
1539 /// This is a convenience method for `uniqueness()` with `UniquenessType::Distinctness`.
1540 ///
1541 /// # Arguments
1542 ///
1543 /// * `columns` - The column(s) to check for distinctness
1544 /// * `assertion` - The assertion to apply to the distinctness ratio
1545 ///
1546 /// # Examples
1547 ///
1548 /// ```rust
1549 /// use term_guard::core::{Check, Level};
1550 /// use term_guard::constraints::Assertion;
1551 ///
1552 /// let check = Check::builder("distinctness_validation")
1553 /// .level(Level::Warning)
1554 /// .validates_distinctness(vec!["status"], Assertion::LessThan(0.1))
1555 /// .validates_distinctness(vec!["user_id"], Assertion::GreaterThan(0.95))
1556 /// .build();
1557 /// ```
1558 ///
1559 /// # Errors
1560 ///
1561 /// Returns error if column names are invalid.
1562 pub fn validates_distinctness<I, S>(mut self, columns: I, assertion: Assertion) -> Self
1563 where
1564 I: IntoIterator<Item = S>,
1565 S: Into<String>,
1566 {
1567 self.constraints.push(Arc::new(
1568 UniquenessConstraint::new(
1569 columns,
1570 UniquenessType::Distinctness(assertion),
1571 UniquenessOptions::default(),
1572 )
1573 .expect("Invalid columns"),
1574 ));
1575 self
1576 }
1577
1578 /// Adds a unique value ratio constraint with assertion-based validation.
1579 ///
1580 /// This is a convenience method for `uniqueness()` with `UniquenessType::UniqueValueRatio`.
1581 ///
1582 /// # Arguments
1583 ///
1584 /// * `columns` - The column(s) to check for unique value ratio
1585 /// * `assertion` - The assertion to apply to the unique value ratio
1586 ///
1587 /// # Examples
1588 ///
1589 /// ```rust
1590 /// use term_guard::core::{Check, Level};
1591 /// use term_guard::constraints::Assertion;
1592 ///
1593 /// let check = Check::builder("unique_ratio_validation")
1594 /// .level(Level::Warning)
1595 /// .validates_unique_value_ratio(vec!["transaction_id"], Assertion::GreaterThan(0.99))
1596 /// .validates_unique_value_ratio(vec!["category"], Assertion::LessThan(0.01))
1597 /// .build();
1598 /// ```
1599 ///
1600 /// # Errors
1601 ///
1602 /// Returns error if column names are invalid.
1603 pub fn validates_unique_value_ratio<I, S>(mut self, columns: I, assertion: Assertion) -> Self
1604 where
1605 I: IntoIterator<Item = S>,
1606 S: Into<String>,
1607 {
1608 self.constraints.push(Arc::new(
1609 UniquenessConstraint::new(
1610 columns,
1611 UniquenessType::UniqueValueRatio(assertion),
1612 UniquenessOptions::default(),
1613 )
1614 .expect("Invalid columns"),
1615 ));
1616 self
1617 }
1618
1619 /// Adds a primary key constraint (unique + non-null).
1620 ///
1621 /// This is a convenience method for `uniqueness()` with `UniquenessType::PrimaryKey`.
1622 ///
1623 /// # Arguments
1624 ///
1625 /// * `columns` - The column(s) that form the primary key
1626 ///
1627 /// # Examples
1628 ///
1629 /// ```rust
1630 /// use term_guard::core::{Check, Level};
1631 ///
1632 /// let check = Check::builder("primary_key_validation")
1633 /// .level(Level::Error)
1634 /// .validates_primary_key(vec!["user_id"])
1635 /// .validates_primary_key(vec!["order_id", "line_item_id"])
1636 /// .build();
1637 /// ```
1638 ///
1639 /// # Errors
1640 ///
1641 /// Returns error if column names are invalid.
1642 pub fn validates_primary_key<I, S>(mut self, columns: I) -> Self
1643 where
1644 I: IntoIterator<Item = S>,
1645 S: Into<String>,
1646 {
1647 self.constraints.push(Arc::new(
1648 UniquenessConstraint::new(
1649 columns,
1650 UniquenessType::PrimaryKey,
1651 UniquenessOptions::default(),
1652 )
1653 .expect("Invalid columns"),
1654 ));
1655 self
1656 }
1657
1658 /// Adds a uniqueness constraint that allows NULL values with configurable handling.
1659 ///
1660 /// This is a convenience method for `uniqueness()` with `UniquenessType::UniqueWithNulls`.
1661 ///
1662 /// # Arguments
1663 ///
1664 /// * `columns` - The column(s) to check for uniqueness
1665 /// * `threshold` - The minimum acceptable uniqueness ratio (0.0 to 1.0)
1666 /// * `null_handling` - How to handle NULL values in uniqueness calculations
1667 ///
1668 /// # Examples
1669 ///
1670 /// ```rust
1671 /// use term_guard::core::{Check, Level};
1672 /// use term_guard::constraints::NullHandling;
1673 ///
1674 /// let check = Check::builder("null_handling_validation")
1675 /// .level(Level::Warning)
1676 /// .validates_uniqueness_with_nulls(vec!["optional_id"], 0.9, NullHandling::Exclude)
1677 /// .validates_uniqueness_with_nulls(vec!["reference"], 0.8, NullHandling::Include)
1678 /// .build();
1679 /// ```
1680 ///
1681 /// # Errors
1682 ///
1683 /// Returns error if column names are invalid or threshold is out of range.
1684 pub fn validates_uniqueness_with_nulls<I, S>(
1685 mut self,
1686 columns: I,
1687 threshold: f64,
1688 null_handling: NullHandling,
1689 ) -> Self
1690 where
1691 I: IntoIterator<Item = S>,
1692 S: Into<String>,
1693 {
1694 self.constraints.push(Arc::new(
1695 UniquenessConstraint::new(
1696 columns,
1697 UniquenessType::UniqueWithNulls {
1698 threshold,
1699 null_handling,
1700 },
1701 UniquenessOptions::new().with_null_handling(null_handling),
1702 )
1703 .expect("Invalid columns or threshold"),
1704 ));
1705 self
1706 }
1707
1708 /// Adds a completeness constraint using the unified options pattern.
1709 ///
1710 /// This method provides a more flexible alternative to `is_complete`, `has_completeness`,
1711 /// `are_complete`, and `are_any_complete` by supporting arbitrary logical operators
1712 /// and thresholds.
1713 ///
1714 /// # Arguments
1715 ///
1716 /// * `columns` - The column(s) to check (single string, vec, or array)
1717 /// * `options` - Configuration options including threshold and logical operator
1718 ///
1719 /// # Examples
1720 ///
1721 /// ```rust
1722 /// use term_guard::core::{Check, ConstraintOptions, LogicalOperator};
1723 ///
1724 /// let check = Check::builder("unified_completeness")
1725 /// // Single column with threshold
1726 /// .completeness("email", ConstraintOptions::new().with_threshold(0.95))
1727 /// // Multiple columns - all must be complete
1728 /// .completeness(
1729 /// vec!["first_name", "last_name"],
1730 /// ConstraintOptions::new()
1731 /// .with_operator(LogicalOperator::All)
1732 /// .with_threshold(1.0)
1733 /// )
1734 /// // At least 2 of 4 contact methods must be 90% complete
1735 /// .completeness(
1736 /// vec!["email", "phone", "address", "postal_code"],
1737 /// ConstraintOptions::new()
1738 /// .with_operator(LogicalOperator::AtLeast(2))
1739 /// .with_threshold(0.9)
1740 /// )
1741 /// .build();
1742 /// ```
1743 pub fn completeness(
1744 mut self,
1745 columns: impl Into<crate::core::ColumnSpec>,
1746 options: crate::core::ConstraintOptions,
1747 ) -> Self {
1748 use crate::constraints::CompletenessConstraint;
1749 self.constraints
1750 .push(Arc::new(CompletenessConstraint::new(columns, options)));
1751 self
1752 }
1753
1754 /// Adds a string length constraint using the unified options pattern.
1755 ///
1756 /// This method provides a more flexible alternative to the individual length methods
1757 /// by supporting all length assertion types in a single interface.
1758 ///
1759 /// # Arguments
1760 ///
1761 /// * `column` - The column to check
1762 /// * `assertion` - The length assertion (Min, Max, Between, Exactly, NotEmpty)
1763 ///
1764 /// # Examples
1765 ///
1766 /// ```rust
1767 /// use term_guard::core::Check;
1768 /// use term_guard::constraints::LengthAssertion;
1769 ///
1770 /// let check = Check::builder("length_validation")
1771 /// .length("password", LengthAssertion::Min(8))
1772 /// .length("username", LengthAssertion::Between(3, 20))
1773 /// .length("verification_code", LengthAssertion::Exactly(6))
1774 /// .length("name", LengthAssertion::NotEmpty)
1775 /// .build();
1776 /// ```
1777 pub fn length(
1778 mut self,
1779 column: impl Into<String>,
1780 assertion: crate::constraints::LengthAssertion,
1781 ) -> Self {
1782 use crate::constraints::LengthConstraint;
1783 self.constraints
1784 .push(Arc::new(LengthConstraint::new(column, assertion)));
1785 self
1786 }
1787
1788 /// Adds a statistical constraint using the unified options pattern.
1789 ///
1790 /// This method provides a unified interface for all statistical constraints
1791 /// (min, max, mean, sum, standard deviation) with consistent assertion patterns.
1792 ///
1793 /// # Arguments
1794 ///
1795 /// * `column` - The column to analyze
1796 /// * `statistic` - The type of statistic to compute
1797 /// * `assertion` - The assertion to apply to the statistic
1798 ///
1799 /// # Examples
1800 ///
1801 /// ```rust
1802 /// use term_guard::core::Check;
1803 /// use term_guard::constraints::{StatisticType, Assertion};
1804 ///
1805 /// let check = Check::builder("statistical_validation")
1806 /// .statistic("age", StatisticType::Min, Assertion::GreaterThanOrEqual(0.0))
1807 /// .statistic("age", StatisticType::Max, Assertion::LessThanOrEqual(120.0))
1808 /// .statistic("salary", StatisticType::Mean, Assertion::Between(50000.0, 100000.0))
1809 /// .statistic("response_time", StatisticType::StandardDeviation, Assertion::LessThan(100.0))
1810 /// .build();
1811 /// ```
1812 pub fn statistic(
1813 mut self,
1814 column: impl Into<String>,
1815 statistic: crate::constraints::StatisticType,
1816 assertion: Assertion,
1817 ) -> Self {
1818 use crate::constraints::StatisticalConstraint;
1819 self.constraints.push(Arc::new(
1820 StatisticalConstraint::new(column, statistic, assertion)
1821 .expect("Invalid column name or statistic"),
1822 ));
1823 self
1824 }
1825
1826 /// Adds a minimum value constraint for a column.
1827 ///
1828 /// This is a convenience method for `statistic()` with `StatisticType::Min`.
1829 ///
1830 /// # Examples
1831 ///
1832 /// ```rust
1833 /// use term_guard::core::{Check, Level};
1834 /// use term_guard::constraints::Assertion;
1835 ///
1836 /// let check = Check::builder("age_validation")
1837 /// .level(Level::Error)
1838 /// .has_min("age", Assertion::GreaterThanOrEqual(0.0))
1839 /// .build();
1840 /// ```
1841 pub fn has_min(self, column: impl Into<String>, assertion: Assertion) -> Self {
1842 self.statistic(column, crate::constraints::StatisticType::Min, assertion)
1843 }
1844
1845 /// Adds a maximum value constraint for a column.
1846 ///
1847 /// This is a convenience method for `statistic()` with `StatisticType::Max`.
1848 ///
1849 /// # Examples
1850 ///
1851 /// ```rust
1852 /// use term_guard::core::{Check, Level};
1853 /// use term_guard::constraints::Assertion;
1854 ///
1855 /// let check = Check::builder("age_validation")
1856 /// .level(Level::Error)
1857 /// .has_max("age", Assertion::LessThanOrEqual(120.0))
1858 /// .build();
1859 /// ```
1860 pub fn has_max(self, column: impl Into<String>, assertion: Assertion) -> Self {
1861 self.statistic(column, crate::constraints::StatisticType::Max, assertion)
1862 }
1863
1864 /// Adds a mean (average) value constraint for a column.
1865 ///
1866 /// This is a convenience method for `statistic()` with `StatisticType::Mean`.
1867 ///
1868 /// # Examples
1869 ///
1870 /// ```rust
1871 /// use term_guard::core::{Check, Level};
1872 /// use term_guard::constraints::Assertion;
1873 ///
1874 /// let check = Check::builder("salary_validation")
1875 /// .level(Level::Warning)
1876 /// .has_mean("salary", Assertion::Between(50000.0, 100000.0))
1877 /// .build();
1878 /// ```
1879 pub fn has_mean(self, column: impl Into<String>, assertion: Assertion) -> Self {
1880 self.statistic(column, crate::constraints::StatisticType::Mean, assertion)
1881 }
1882
1883 /// Adds a sum constraint for a column.
1884 ///
1885 /// This is a convenience method for `statistic()` with `StatisticType::Sum`.
1886 ///
1887 /// # Examples
1888 ///
1889 /// ```rust
1890 /// use term_guard::core::{Check, Level};
1891 /// use term_guard::constraints::Assertion;
1892 ///
1893 /// let check = Check::builder("revenue_validation")
1894 /// .level(Level::Error)
1895 /// .has_sum("revenue", Assertion::GreaterThan(1000000.0))
1896 /// .build();
1897 /// ```
1898 pub fn has_sum(self, column: impl Into<String>, assertion: Assertion) -> Self {
1899 self.statistic(column, crate::constraints::StatisticType::Sum, assertion)
1900 }
1901
1902 /// Adds a standard deviation constraint for a column.
1903 ///
1904 /// This is a convenience method for `statistic()` with `StatisticType::StandardDeviation`.
1905 ///
1906 /// # Examples
1907 ///
1908 /// ```rust
1909 /// use term_guard::core::{Check, Level};
1910 /// use term_guard::constraints::Assertion;
1911 ///
1912 /// let check = Check::builder("response_time_validation")
1913 /// .level(Level::Warning)
1914 /// .has_standard_deviation("response_time", Assertion::LessThan(100.0))
1915 /// .build();
1916 /// ```
1917 pub fn has_standard_deviation(self, column: impl Into<String>, assertion: Assertion) -> Self {
1918 self.statistic(
1919 column,
1920 crate::constraints::StatisticType::StandardDeviation,
1921 assertion,
1922 )
1923 }
1924
1925 /// Adds a variance constraint for a column.
1926 ///
1927 /// This is a convenience method for `statistic()` with `StatisticType::Variance`.
1928 ///
1929 /// # Examples
1930 ///
1931 /// ```rust
1932 /// use term_guard::core::{Check, Level};
1933 /// use term_guard::constraints::Assertion;
1934 ///
1935 /// let check = Check::builder("score_validation")
1936 /// .level(Level::Warning)
1937 /// .has_variance("score", Assertion::LessThan(250.0))
1938 /// .build();
1939 /// ```
1940 pub fn has_variance(self, column: impl Into<String>, assertion: Assertion) -> Self {
1941 self.statistic(
1942 column,
1943 crate::constraints::StatisticType::Variance,
1944 assertion,
1945 )
1946 }
1947
1948 /// Adds a foreign key constraint for referential integrity validation.
1949 ///
1950 /// This constraint ensures that all values in the child table's foreign key column
1951 /// exist as values in the parent table's referenced column. This is essential for
1952 /// maintaining data consistency and preventing orphaned records in joined datasets.
1953 ///
1954 /// # Arguments
1955 ///
1956 /// * `child_column` - The column in the child table (qualified as "table.column")
1957 /// * `parent_column` - The column in the parent table (qualified as "table.column")
1958 ///
1959 /// # Examples
1960 ///
1961 /// ```rust
1962 /// use term_guard::core::{Check, Level};
1963 ///
1964 /// let check = Check::builder("referential_integrity")
1965 /// .level(Level::Error)
1966 /// .foreign_key("orders.customer_id", "customers.id")
1967 /// .foreign_key("order_items.order_id", "orders.id")
1968 /// .build();
1969 /// ```
1970 ///
1971 /// # Foreign Key Configuration
1972 ///
1973 /// For more advanced configuration (null handling, join strategy, violation reporting),
1974 /// use `ForeignKeyConstraint::new()` directly with the `constraint()` method:
1975 ///
1976 /// ```rust
1977 /// use term_guard::core::Check;
1978 /// use term_guard::constraints::ForeignKeyConstraint;
1979 ///
1980 /// let check = Check::builder("advanced_foreign_key")
1981 /// .constraint(
1982 /// ForeignKeyConstraint::new("orders.customer_id", "customers.id")
1983 /// .allow_nulls(true)
1984 /// .use_left_join(false)
1985 /// .max_violations_reported(50)
1986 /// )
1987 /// .build();
1988 /// ```
1989 ///
1990 /// # Requirements
1991 ///
1992 /// This constraint requires that both referenced tables are available in the DataFusion
1993 /// session context. When using with `JoinedSource`, ensure the joined source is registered
1994 /// with the appropriate table name.
1995 pub fn foreign_key(
1996 mut self,
1997 child_column: impl Into<String>,
1998 parent_column: impl Into<String>,
1999 ) -> Self {
2000 use crate::constraints::ForeignKeyConstraint;
2001 self.constraints.push(Arc::new(ForeignKeyConstraint::new(
2002 child_column,
2003 parent_column,
2004 )));
2005 self
2006 }
2007
2008 /// Adds a constraint that validates sums between two tables match within tolerance.
2009 ///
2010 /// This is essential for Phase 2 joined data sources validation, ensuring that aggregated
2011 /// values are consistent across related tables. Common use cases include validating that
2012 /// order totals match payment amounts, or inventory quantities align with transaction logs.
2013 ///
2014 /// # Arguments
2015 ///
2016 /// * `left_column` - Left side column in table.column format (e.g., "orders.total")
2017 /// * `right_column` - Right side column in table.column format (e.g., "payments.amount")
2018 ///
2019 /// # Examples
2020 ///
2021 /// ## Basic Cross-Table Sum Validation
2022 ///
2023 /// ```rust
2024 /// use term_guard::core::{Check, Level};
2025 ///
2026 /// let check = Check::builder("financial_integrity")
2027 /// .level(Level::Error)
2028 /// .cross_table_sum("orders.total", "payments.amount")
2029 /// .build();
2030 /// ```
2031 ///
2032 /// ## For Advanced Configuration, Use the Constraint Directly
2033 ///
2034 /// ```rust
2035 /// use term_guard::core::Check;
2036 /// use term_guard::constraints::CrossTableSumConstraint;
2037 ///
2038 /// let check = Check::builder("advanced_cross_table")
2039 /// .constraint(
2040 /// CrossTableSumConstraint::new("orders.total", "payments.amount")
2041 /// .group_by(vec!["customer_id"])
2042 /// .tolerance(0.01)
2043 /// .max_violations_reported(50)
2044 /// )
2045 /// .build();
2046 /// ```
2047 ///
2048 /// # Requirements
2049 ///
2050 /// This constraint requires that both referenced tables are available in the DataFusion
2051 /// session context. When using with `JoinedSource`, ensure the joined source is registered
2052 /// with the appropriate table names and that the tables can be joined on the specified
2053 /// group-by columns if provided.
2054 pub fn cross_table_sum(
2055 mut self,
2056 left_column: impl Into<String>,
2057 right_column: impl Into<String>,
2058 ) -> Self {
2059 use crate::constraints::CrossTableSumConstraint;
2060 self.constraints.push(Arc::new(CrossTableSumConstraint::new(
2061 left_column,
2062 right_column,
2063 )));
2064 self
2065 }
2066
2067 /// Adds a join coverage constraint for validating join quality.
2068 ///
2069 /// This constraint measures what percentage of rows from the left table successfully
2070 /// join with the right table, helping identify missing reference data, data quality
2071 /// issues in foreign key relationships, and incomplete data loads.
2072 ///
2073 /// # Arguments
2074 ///
2075 /// * `left_table` - Name of the left table in the join
2076 /// * `right_table` - Name of the right table in the join
2077 ///
2078 /// # Examples
2079 ///
2080 /// ## Basic Usage
2081 ///
2082 /// ```rust
2083 /// use term_guard::core::{Check, Level};
2084 ///
2085 /// let check = Check::builder("join_quality")
2086 /// .level(Level::Warning)
2087 /// .join_coverage("sales", "customers")
2088 /// .build();
2089 /// ```
2090 ///
2091 /// ## For Advanced Configuration, Use the Constraint Directly
2092 ///
2093 /// ```rust
2094 /// use term_guard::core::Check;
2095 /// use term_guard::constraints::{JoinCoverageConstraint, CoverageType};
2096 ///
2097 /// let check = Check::builder("advanced_join_coverage")
2098 /// .constraint(
2099 /// JoinCoverageConstraint::new("orders", "products")
2100 /// .on_multiple(vec![("product_id", "id"), ("variant", "variant_code")])
2101 /// .expect_match_rate(0.98)
2102 /// .coverage_type(CoverageType::BidirectionalCoverage)
2103 /// .distinct_only(true)
2104 /// )
2105 /// .build();
2106 /// ```
2107 ///
2108 /// # Requirements
2109 ///
2110 /// Both tables must be registered with the DataFusion session context.
2111 /// Use `.on()` or `.on_multiple()` on the constraint to specify join keys.
2112 pub fn join_coverage(
2113 mut self,
2114 left_table: impl Into<String>,
2115 right_table: impl Into<String>,
2116 ) -> Self {
2117 use crate::constraints::JoinCoverageConstraint;
2118 self.constraints.push(Arc::new(JoinCoverageConstraint::new(
2119 left_table,
2120 right_table,
2121 )));
2122 self
2123 }
2124
2125 /// Adds a temporal ordering constraint for time-based validation.
2126 ///
2127 /// This constraint ensures that temporal data follows expected patterns, including
2128 /// chronological ordering between columns, business hour compliance, date range
2129 /// validation, and event sequence validation.
2130 ///
2131 /// # Arguments
2132 ///
2133 /// * `table_name` - Name of the table to validate
2134 ///
2135 /// # Examples
2136 ///
2137 /// ## Basic Temporal Ordering
2138 ///
2139 /// ```rust
2140 /// use term_guard::core::{Check, Level};
2141 ///
2142 /// let check = Check::builder("temporal_consistency")
2143 /// .level(Level::Error)
2144 /// .temporal_ordering("events")
2145 /// .build();
2146 /// ```
2147 ///
2148 /// ## For Advanced Configuration, Use the Constraint Directly
2149 ///
2150 /// ```rust
2151 /// use term_guard::core::Check;
2152 /// use term_guard::constraints::TemporalOrderingConstraint;
2153 ///
2154 /// let check = Check::builder("advanced_temporal")
2155 /// .constraint(
2156 /// TemporalOrderingConstraint::new("transactions")
2157 /// .business_hours("timestamp", "09:00", "17:00")
2158 /// .weekdays_only(true)
2159 /// .with_timezone("America/New_York")
2160 /// )
2161 /// .constraint(
2162 /// TemporalOrderingConstraint::new("events")
2163 /// .before_after("created_at", "processed_at")
2164 /// .tolerance_seconds(60)
2165 /// .allow_nulls(true)
2166 /// )
2167 /// .build();
2168 /// ```
2169 ///
2170 /// # Requirements
2171 ///
2172 /// The table must be registered with the DataFusion session context.
2173 /// Configure the specific temporal validation type using the constraint's methods.
2174 pub fn temporal_ordering(mut self, table_name: impl Into<String>) -> Self {
2175 use crate::constraints::TemporalOrderingConstraint;
2176 self.constraints
2177 .push(Arc::new(TemporalOrderingConstraint::new(table_name)));
2178 self
2179 }
2180
2181 /// Adds a constraint using a fluent constraint builder.
2182 ///
2183 /// This method provides the most flexible API for building complex constraints
2184 /// with full access to all unified constraint features.
2185 ///
2186 /// # Arguments
2187 ///
2188 /// * `constraint` - A constraint built using the fluent API
2189 ///
2190 /// # Examples
2191 ///
2192 /// ```rust
2193 /// use term_guard::core::{Check, ConstraintOptions, LogicalOperator};
2194 /// use term_guard::constraints::{CompletenessConstraint, LengthConstraint, LengthAssertion};
2195 ///
2196 /// let check = Check::builder("advanced_validation")
2197 /// .with_constraint(
2198 /// CompletenessConstraint::new(
2199 /// vec!["phone", "email"],
2200 /// ConstraintOptions::new()
2201 /// .with_operator(LogicalOperator::Any)
2202 /// .with_threshold(0.99)
2203 /// )
2204 /// )
2205 /// .with_constraint(
2206 /// LengthConstraint::new("description", LengthAssertion::Between(50, 2000))
2207 /// )
2208 /// .build();
2209 /// ```
2210 pub fn with_constraint(mut self, constraint: impl crate::core::Constraint + 'static) -> Self {
2211 self.constraints.push(Arc::new(constraint));
2212 self
2213 }
2214
2215 // ========================================================================
2216 // CONVENIENCE METHODS FOR COMMON PATTERNS
2217 // ========================================================================
2218
2219 /// Convenience method for requiring any of multiple columns to be complete.
2220 ///
2221 /// Equivalent to `completeness(columns, ConstraintOptions::new().with_operator(LogicalOperator::Any).with_threshold(1.0))`
2222 /// but more concise for this common pattern.
2223 ///
2224 /// # Examples
2225 ///
2226 /// ```rust
2227 /// use term_guard::core::Check;
2228 ///
2229 /// let check = Check::builder("contact_validation")
2230 /// .any_complete(vec!["phone", "email", "address"])
2231 /// .build();
2232 /// ```
2233 pub fn any_complete<I, S>(self, columns: I) -> Self
2234 where
2235 I: IntoIterator<Item = S>,
2236 S: Into<String>,
2237 {
2238 use crate::core::{ConstraintOptions, LogicalOperator};
2239 let cols: Vec<String> = columns.into_iter().map(Into::into).collect();
2240 self.completeness(
2241 cols,
2242 ConstraintOptions::new()
2243 .with_operator(LogicalOperator::Any)
2244 .with_threshold(1.0),
2245 )
2246 }
2247
2248 /// Convenience method for requiring at least N columns to meet a threshold.
2249 ///
2250 /// # Examples
2251 ///
2252 /// ```rust
2253 /// use term_guard::core::Check;
2254 ///
2255 /// let check = Check::builder("contact_validation")
2256 /// .at_least_complete(2, vec!["email", "phone", "address", "postal_code"], 0.9)
2257 /// .build();
2258 /// ```
2259 pub fn at_least_complete<I, S>(self, n: usize, columns: I, threshold: f64) -> Self
2260 where
2261 I: IntoIterator<Item = S>,
2262 S: Into<String>,
2263 {
2264 use crate::core::{ConstraintOptions, LogicalOperator};
2265 let cols: Vec<String> = columns.into_iter().map(Into::into).collect();
2266 self.completeness(
2267 cols,
2268 ConstraintOptions::new()
2269 .with_operator(LogicalOperator::AtLeast(n))
2270 .with_threshold(threshold),
2271 )
2272 }
2273
2274 /// Convenience method for exactly N columns meeting a threshold.
2275 ///
2276 /// # Examples
2277 ///
2278 /// ```rust
2279 /// use term_guard::core::Check;
2280 ///
2281 /// let check = Check::builder("balance_validation")
2282 /// .exactly_complete(1, vec!["primary_phone", "secondary_phone"], 1.0)
2283 /// .build();
2284 /// ```
2285 pub fn exactly_complete<I, S>(self, n: usize, columns: I, threshold: f64) -> Self
2286 where
2287 I: IntoIterator<Item = S>,
2288 S: Into<String>,
2289 {
2290 use crate::core::{ConstraintOptions, LogicalOperator};
2291 let cols: Vec<String> = columns.into_iter().map(Into::into).collect();
2292 self.completeness(
2293 cols,
2294 ConstraintOptions::new()
2295 .with_operator(LogicalOperator::Exactly(n))
2296 .with_threshold(threshold),
2297 )
2298 }
2299
2300 /// Builds the `Check` instance.
2301 ///
2302 /// # Returns
2303 ///
2304 /// The constructed `Check`
2305 pub fn build(self) -> Check {
2306 Check {
2307 name: self.name,
2308 level: self.level,
2309 description: self.description,
2310 constraints: self.constraints,
2311 }
2312 }
2313}
2314
2315#[cfg(test)]
2316mod tests {
2317 use super::*;
2318 use crate::prelude::*;
2319 use async_trait::async_trait;
2320 use datafusion::prelude::*;
2321
2322 #[derive(Debug)]
2323 struct DummyConstraint {
2324 name: String,
2325 }
2326
2327 #[async_trait]
2328 impl Constraint for DummyConstraint {
2329 async fn evaluate(&self, _ctx: &SessionContext) -> Result<crate::core::ConstraintResult> {
2330 Ok(crate::core::ConstraintResult::success())
2331 }
2332
2333 fn name(&self) -> &str {
2334 &self.name
2335 }
2336
2337 fn metadata(&self) -> crate::core::ConstraintMetadata {
2338 crate::core::ConstraintMetadata::new()
2339 }
2340 }
2341
2342 #[test]
2343 fn test_check_builder() {
2344 let check = Check::builder("test_check")
2345 .level(Level::Error)
2346 .description("Test check description")
2347 .constraint(DummyConstraint {
2348 name: "constraint1".to_string(),
2349 })
2350 .build();
2351
2352 assert_eq!(check.name(), "test_check");
2353 assert_eq!(check.level(), Level::Error);
2354 assert_eq!(check.description(), Some("Test check description"));
2355 assert_eq!(check.constraints().len(), 1);
2356 }
2357
2358 #[test]
2359 fn test_check_default_level() {
2360 let check = Check::builder("test_check").build();
2361 assert_eq!(check.level(), Level::Warning);
2362 }
2363
2364 #[test]
2365 fn test_check_builder_completeness() {
2366 use crate::core::ConstraintOptions;
2367
2368 let check = Check::builder("completeness_check")
2369 .level(Level::Error)
2370 .completeness("user_id", ConstraintOptions::new().with_threshold(1.0))
2371 .completeness("email", ConstraintOptions::new().with_threshold(0.95))
2372 .build();
2373
2374 assert_eq!(check.name(), "completeness_check");
2375 assert_eq!(check.level(), Level::Error);
2376 assert_eq!(check.constraints().len(), 2);
2377 }
2378
2379 #[test]
2380 fn test_check_builder_uniqueness() {
2381 let check = Check::builder("uniqueness_check")
2382 .validates_uniqueness(vec!["user_id"], 1.0)
2383 .validates_uniqueness(vec!["first_name", "last_name"], 1.0)
2384 .validates_uniqueness(vec!["email"], 0.99)
2385 .build();
2386
2387 assert_eq!(check.constraints().len(), 3);
2388 }
2389
2390 #[test]
2391 fn test_check_builder_method_chaining() {
2392 use crate::core::ConstraintOptions;
2393
2394 let check = Check::builder("comprehensive_check")
2395 .level(Level::Error)
2396 .description("Comprehensive data quality check")
2397 .completeness("id", ConstraintOptions::new().with_threshold(1.0))
2398 .completeness("name", ConstraintOptions::new().with_threshold(0.9))
2399 .validates_uniqueness(vec!["id"], 1.0)
2400 .validates_uniqueness(vec!["email", "phone"], 1.0)
2401 .build();
2402
2403 assert_eq!(check.name(), "comprehensive_check");
2404 assert_eq!(check.level(), Level::Error);
2405 assert_eq!(
2406 check.description(),
2407 Some("Comprehensive data quality check")
2408 );
2409 assert_eq!(check.constraints().len(), 4);
2410 }
2411
2412 #[test]
2413 fn test_check_builder_multiple_completeness() {
2414 use crate::core::{ConstraintOptions, LogicalOperator};
2415
2416 let check = Check::builder("multi_completeness_check")
2417 .completeness(
2418 vec!["user_id", "email", "name"],
2419 ConstraintOptions::new()
2420 .with_operator(LogicalOperator::All)
2421 .with_threshold(1.0),
2422 )
2423 .any_complete(vec!["phone", "mobile", "fax"])
2424 .build();
2425
2426 assert_eq!(check.constraints().len(), 2);
2427 }
2428
2429 #[test]
2430 #[should_panic(expected = "Threshold must be between 0.0 and 1.0")]
2431 fn test_check_builder_invalid_completeness_threshold() {
2432 use crate::core::ConstraintOptions;
2433
2434 Check::builder("test")
2435 .completeness("column", ConstraintOptions::new().with_threshold(1.5))
2436 .build();
2437 }
2438
2439 #[test]
2440 #[should_panic(expected = "Invalid columns or threshold")]
2441 fn test_check_builder_invalid_uniqueness_threshold() {
2442 Check::builder("test")
2443 .validates_uniqueness(vec!["column"], -0.1)
2444 .build();
2445 }
2446
2447 #[test]
2448 fn test_check_builder_string_length() {
2449 let check = Check::builder("string_length_check")
2450 .has_min_length("password", 8)
2451 .has_max_length("username", 20)
2452 .build();
2453
2454 assert_eq!(check.constraints().len(), 2);
2455 }
2456
2457 #[test]
2458 fn test_unified_completeness_api() {
2459 use crate::core::{ConstraintOptions, LogicalOperator};
2460
2461 let check = Check::builder("unified_completeness_test")
2462 // Single column with threshold
2463 .completeness("email", ConstraintOptions::new().with_threshold(0.95))
2464 // Multiple columns with ANY operator
2465 .completeness(
2466 vec!["phone", "email", "address"],
2467 ConstraintOptions::new()
2468 .with_operator(LogicalOperator::Any)
2469 .with_threshold(1.0),
2470 )
2471 // At least 2 columns with threshold
2472 .completeness(
2473 vec!["a", "b", "c", "d"],
2474 ConstraintOptions::new()
2475 .with_operator(LogicalOperator::AtLeast(2))
2476 .with_threshold(0.9),
2477 )
2478 .build();
2479
2480 assert_eq!(check.constraints().len(), 3);
2481 }
2482
2483 #[test]
2484 fn test_unified_length_api() {
2485 use crate::constraints::LengthAssertion;
2486
2487 let check = Check::builder("unified_length_test")
2488 .length("password", LengthAssertion::Min(8))
2489 .length("username", LengthAssertion::Between(3, 20))
2490 .length("code", LengthAssertion::Exactly(6))
2491 .length("name", LengthAssertion::NotEmpty)
2492 .build();
2493
2494 assert_eq!(check.constraints().len(), 4);
2495 }
2496
2497 #[test]
2498 fn test_unified_statistics_api() {
2499 use crate::constraints::{Assertion, StatisticType};
2500
2501 let check = Check::builder("unified_statistics_test")
2502 .statistic(
2503 "age",
2504 StatisticType::Min,
2505 Assertion::GreaterThanOrEqual(0.0),
2506 )
2507 .statistic("age", StatisticType::Max, Assertion::LessThanOrEqual(120.0))
2508 .statistic(
2509 "salary",
2510 StatisticType::Mean,
2511 Assertion::Between(50000.0, 100000.0),
2512 )
2513 .statistic(
2514 "response_time",
2515 StatisticType::StandardDeviation,
2516 Assertion::LessThan(100.0),
2517 )
2518 .build();
2519
2520 assert_eq!(check.constraints().len(), 4);
2521 }
2522
2523 #[test]
2524 fn test_convenience_methods() {
2525 let check = Check::builder("convenience_test")
2526 .any_complete(vec!["phone", "email", "address"])
2527 .at_least_complete(2, vec!["a", "b", "c", "d"], 0.9)
2528 .exactly_complete(1, vec!["primary", "secondary"], 1.0)
2529 .build();
2530
2531 assert_eq!(check.constraints().len(), 3);
2532 }
2533
2534 #[test]
2535 fn test_with_constraint_method() {
2536 use crate::constraints::{LengthAssertion, LengthConstraint};
2537
2538 let constraint = LengthConstraint::new("test", LengthAssertion::Between(5, 50));
2539 let check = Check::builder("with_constraint_test")
2540 .with_constraint(constraint)
2541 .build();
2542
2543 assert_eq!(check.constraints().len(), 1);
2544 }
2545
2546 #[test]
2547 fn test_enhanced_format_validation_methods() {
2548 let check = Check::builder("enhanced_format_test")
2549 // Enhanced email validation with options
2550 .validates_email_with_options(
2551 "email",
2552 0.95,
2553 FormatOptions::new()
2554 .case_sensitive(false)
2555 .trim_before_check(true)
2556 .null_is_valid(false),
2557 )
2558 // Enhanced URL validation with options
2559 .validates_url_with_options(
2560 "website",
2561 0.90,
2562 true, // allow localhost
2563 FormatOptions::new()
2564 .case_sensitive(false)
2565 .trim_before_check(true),
2566 )
2567 // Enhanced phone validation with options
2568 .validates_phone_with_options(
2569 "phone",
2570 0.95,
2571 Some("US".to_string()),
2572 FormatOptions::new().trim_before_check(true),
2573 )
2574 // Enhanced regex validation with options
2575 .validates_regex_with_options(
2576 "product_code",
2577 r"^[A-Z]{2}\d{4}$",
2578 0.98,
2579 FormatOptions::new()
2580 .case_sensitive(false)
2581 .trim_before_check(true),
2582 )
2583 .build();
2584
2585 assert_eq!(check.constraints().len(), 4);
2586 assert_eq!(check.name(), "enhanced_format_test");
2587 }
2588
2589 #[test]
2590 fn test_enhanced_vs_basic_format_methods() {
2591 // Test that basic and enhanced methods can be used together
2592 let check = Check::builder("mixed_format_test")
2593 // Basic methods
2594 .validates_email("basic_email", 0.90)
2595 .validates_url("basic_url", 0.85, false)
2596 .validates_phone("basic_phone", 0.80, None)
2597 .validates_regex("basic_pattern", r"^\d+$", 0.75)
2598 // Enhanced methods
2599 .validates_email_with_options(
2600 "enhanced_email",
2601 0.95,
2602 FormatOptions::new().case_sensitive(false),
2603 )
2604 .validates_url_with_options(
2605 "enhanced_url",
2606 0.90,
2607 true,
2608 FormatOptions::new().trim_before_check(true),
2609 )
2610 .build();
2611
2612 assert_eq!(check.constraints().len(), 6);
2613 }
2614
2615 #[test]
2616 fn test_cross_table_sum_builder_method() {
2617 let check = Check::builder("cross_table_sum_test")
2618 .level(Level::Error)
2619 .cross_table_sum("orders.total", "payments.amount")
2620 .cross_table_sum("inventory.quantity", "transactions.quantity")
2621 .build();
2622
2623 assert_eq!(check.constraints().len(), 2);
2624 assert_eq!(check.name(), "cross_table_sum_test");
2625 assert_eq!(check.level(), Level::Error);
2626
2627 // Verify that both constraints are cross-table sum constraints
2628 for constraint in check.constraints() {
2629 assert_eq!(constraint.name(), "cross_table_sum");
2630 }
2631 }
2632}