term-guard 0.0.2

A Rust data validation library providing Deequ-like capabilities without Spark dependencies
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
//! Error types for the Term data validation library.
//!
//! This module provides a comprehensive error handling strategy using `thiserror`
//! for automatic error trait implementations. All errors in the Term library
//! are represented by the `TermError` enum.

use thiserror::Error;

/// The main error type for the Term library.
///
/// This enum represents all possible errors that can occur during
/// data validation operations.
#[derive(Error, Debug)]
pub enum TermError {
    /// Error that occurs when a validation check fails.
    #[error("Validation failed: {message}")]
    ValidationFailed {
        /// Human-readable error message
        message: String,
        /// Name of the check that failed
        check: String,
        /// Optional underlying error that caused the validation failure
        #[source]
        source: Option<Box<dyn std::error::Error + Send + Sync>>,
    },

    /// Error that occurs when a constraint evaluation fails.
    #[error("Constraint evaluation failed for '{constraint}': {message}")]
    ConstraintEvaluation {
        /// Name of the constraint that failed
        constraint: String,
        /// Detailed error message
        message: String,
    },

    /// Error from DataFusion operations.
    #[error("DataFusion error: {0}")]
    DataFusion(#[from] datafusion::error::DataFusionError),

    /// Error from Arrow operations.
    #[error("Arrow error: {0}")]
    Arrow(#[from] arrow::error::ArrowError),

    /// Error from data source operations.
    #[error("Data source error: {message}")]
    DataSource {
        /// Type of data source (e.g., "CSV", "Parquet", "Database")
        source_type: String,
        /// Detailed error message
        message: String,
        /// Optional underlying error
        #[source]
        source: Option<Box<dyn std::error::Error + Send + Sync>>,
    },

    /// Error from I/O operations.
    #[error("IO error: {0}")]
    Io(#[from] std::io::Error),

    /// Error when parsing or processing data.
    #[error("Parse error: {0}")]
    Parse(String),

    /// Error related to configuration.
    #[error("Configuration error: {0}")]
    Configuration(String),

    /// Error from serialization/deserialization operations.
    #[error("Serialization error: {0}")]
    Serialization(String),

    /// Error from OpenTelemetry operations.
    #[error("OpenTelemetry error: {0}")]
    OpenTelemetry(String),

    /// Error when a required column is not found in the dataset.
    #[error("Column '{column}' not found in dataset")]
    ColumnNotFound { column: String },

    /// Error when data types don't match expected types.
    #[error("Type mismatch: expected {expected}, found {found}")]
    TypeMismatch { expected: String, found: String },

    /// Error when an operation is not supported.
    #[error("Operation not supported: {0}")]
    NotSupported(String),

    /// Generic internal error for unexpected conditions.
    #[error("Internal error: {0}")]
    Internal(String),

    /// Security-related error.
    #[error("Security error: {0}")]
    SecurityError(String),

    /// Error from repository operations.
    #[error("Repository error ({operation} on {repository_type}): {message}")]
    Repository {
        /// Type of repository (e.g., "in_memory", "filesystem", "s3")
        repository_type: String,
        /// Operation that failed (e.g., "save", "load", "delete", "query")
        operation: String,
        /// Detailed error message
        message: String,
        /// Optional underlying error
        #[source]
        source: Option<Box<dyn std::error::Error + Send + Sync>>,
    },

    /// Error when a repository key is invalid or malformed.
    #[error("Invalid repository key: {message}")]
    InvalidRepositoryKey {
        /// The invalid key that caused the error
        key: String,
        /// Detailed error message explaining why the key is invalid
        message: String,
    },

    /// Error when repository query parameters are invalid.
    #[error("Invalid repository query: {message}")]
    InvalidRepositoryQuery {
        /// Detailed error message describing the invalid query
        message: String,
        /// The query parameters that caused the error
        query_info: String,
    },

    /// Error when a repository key collision is detected.
    #[error("Repository key collision detected: {message}")]
    RepositoryKeyCollision {
        /// The key that caused the collision
        key: String,
        /// Detailed error message
        message: String,
    },

    /// Error when repository validation fails.
    #[error("Repository validation error: {message}")]
    RepositoryValidation {
        /// The field or component that failed validation
        field: String,
        /// Detailed error message
        message: String,
        /// The invalid value that caused the error
        invalid_value: String,
    },
}

/// A type alias for `Result<T, TermError>`.
///
/// This is the standard `Result` type used throughout the Term library.
///
/// # Examples
///
/// ```rust,ignore
/// use term_guard::error::Result;
///
/// fn validate_data() -> Result<()> {
///     // validation logic here
///     Ok(())
/// }
/// ```
pub type Result<T> = std::result::Result<T, TermError>;

impl TermError {
    /// Creates a new validation failed error with the given message and check name.
    pub fn validation_failed(check: impl Into<String>, message: impl Into<String>) -> Self {
        Self::ValidationFailed {
            message: message.into(),
            check: check.into(),
            source: None,
        }
    }

    /// Creates a new validation failed error with a source error.
    pub fn validation_failed_with_source(
        check: impl Into<String>,
        message: impl Into<String>,
        source: Box<dyn std::error::Error + Send + Sync>,
    ) -> Self {
        Self::ValidationFailed {
            message: message.into(),
            check: check.into(),
            source: Some(source),
        }
    }

    /// Creates a new data source error.
    pub fn data_source(source_type: impl Into<String>, message: impl Into<String>) -> Self {
        Self::DataSource {
            source_type: source_type.into(),
            message: message.into(),
            source: None,
        }
    }

    /// Creates a new data source error with a source error.
    pub fn data_source_with_source(
        source_type: impl Into<String>,
        message: impl Into<String>,
        source: Box<dyn std::error::Error + Send + Sync>,
    ) -> Self {
        Self::DataSource {
            source_type: source_type.into(),
            message: message.into(),
            source: Some(source),
        }
    }

    /// Creates a new repository error.
    pub fn repository(
        repository_type: impl Into<String>,
        operation: impl Into<String>,
        message: impl Into<String>,
    ) -> Self {
        Self::Repository {
            repository_type: repository_type.into(),
            operation: operation.into(),
            message: message.into(),
            source: None,
        }
    }

    /// Creates a new repository error with a source error.
    pub fn repository_with_source(
        repository_type: impl Into<String>,
        operation: impl Into<String>,
        message: impl Into<String>,
        source: Box<dyn std::error::Error + Send + Sync>,
    ) -> Self {
        Self::Repository {
            repository_type: repository_type.into(),
            operation: operation.into(),
            message: message.into(),
            source: Some(source),
        }
    }

    /// Creates a new invalid repository key error.
    pub fn invalid_repository_key(key: impl Into<String>, message: impl Into<String>) -> Self {
        Self::InvalidRepositoryKey {
            key: key.into(),
            message: message.into(),
        }
    }

    /// Creates a new invalid repository query error.
    pub fn invalid_repository_query(
        message: impl Into<String>,
        query_info: impl Into<String>,
    ) -> Self {
        Self::InvalidRepositoryQuery {
            message: message.into(),
            query_info: query_info.into(),
        }
    }

    /// Creates a new repository key collision error.
    pub fn repository_key_collision(key: impl Into<String>, message: impl Into<String>) -> Self {
        Self::RepositoryKeyCollision {
            key: key.into(),
            message: message.into(),
        }
    }

    /// Creates a new repository validation error.
    pub fn repository_validation(
        field: impl Into<String>,
        message: impl Into<String>,
        invalid_value: impl Into<String>,
    ) -> Self {
        Self::RepositoryValidation {
            field: field.into(),
            message: message.into(),
            invalid_value: invalid_value.into(),
        }
    }

    /// Creates a new constraint evaluation error.
    pub fn constraint_evaluation(
        constraint: impl Into<String>,
        message: impl Into<String>,
    ) -> Self {
        Self::ConstraintEvaluation {
            constraint: constraint.into(),
            message: message.into(),
        }
    }
}

/// Conversion from AnalyzerError to TermError.
impl From<crate::analyzers::AnalyzerError> for TermError {
    fn from(err: crate::analyzers::AnalyzerError) -> Self {
        use crate::analyzers::AnalyzerError;

        match err {
            AnalyzerError::StateComputation(msg) => {
                TermError::Internal(format!("Analyzer state computation failed: {msg}"))
            }
            AnalyzerError::MetricComputation(msg) => {
                TermError::Internal(format!("Analyzer metric computation failed: {msg}"))
            }
            AnalyzerError::StateMerge(msg) => {
                TermError::Internal(format!("Analyzer state merge failed: {msg}"))
            }
            AnalyzerError::QueryExecution(e) => TermError::DataFusion(e),
            AnalyzerError::ArrowComputation(e) => TermError::Arrow(e),
            AnalyzerError::InvalidConfiguration(msg) => TermError::Configuration(msg),
            AnalyzerError::InvalidData(msg) => TermError::Parse(msg),
            AnalyzerError::NoData => {
                TermError::Internal("No data available for analysis".to_string())
            }
            AnalyzerError::Serialization(msg) => TermError::Serialization(msg),
            AnalyzerError::Custom(msg) => TermError::Internal(msg),
        }
    }
}

/// Extension trait for adding context to errors.
pub trait ErrorContext<T> {
    /// Adds context to an error.
    fn context(self, msg: &str) -> Result<T>;

    /// Adds context with a lazy message.
    fn with_context<F>(self, f: F) -> Result<T>
    where
        F: FnOnce() -> String;
}

impl<T, E> ErrorContext<T> for std::result::Result<T, E>
where
    E: Into<TermError>,
{
    fn context(self, msg: &str) -> Result<T> {
        self.map_err(|e| {
            let base_error = e.into();
            match base_error {
                TermError::Internal(inner) => TermError::Internal(format!("{msg}: {inner}")),
                other => TermError::Internal(format!("{msg}: {other}")),
            }
        })
    }

    fn with_context<F>(self, f: F) -> Result<T>
    where
        F: FnOnce() -> String,
    {
        self.map_err(|e| {
            let msg = f();
            let base_error = e.into();
            match base_error {
                TermError::Internal(inner) => TermError::Internal(format!("{msg}: {inner}")),
                other => TermError::Internal(format!("{msg}: {other}")),
            }
        })
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::error::Error;

    #[test]
    fn test_validation_failed_error() {
        let err = TermError::validation_failed("completeness_check", "Too many null values");
        assert_eq!(err.to_string(), "Validation failed: Too many null values");
    }

    #[test]
    fn test_error_with_source() {
        let source = std::io::Error::new(std::io::ErrorKind::NotFound, "File not found");
        let err = TermError::validation_failed_with_source(
            "file_check",
            "Could not read validation file",
            Box::new(source),
        );

        // Check that source is preserved
        assert!(err.source().is_some());
    }

    #[test]
    fn test_data_source_error() {
        let err = TermError::data_source("CSV", "Invalid file format");
        assert_eq!(err.to_string(), "Data source error: Invalid file format");
    }

    #[test]
    fn test_column_not_found() {
        let err = TermError::ColumnNotFound {
            column: "user_id".to_string(),
        };
        assert_eq!(err.to_string(), "Column 'user_id' not found in dataset");
    }

    #[test]
    fn test_type_mismatch() {
        let err = TermError::TypeMismatch {
            expected: "Int64".to_string(),
            found: "Utf8".to_string(),
        };
        assert_eq!(err.to_string(), "Type mismatch: expected Int64, found Utf8");
    }

    #[test]
    fn test_error_context() {
        fn failing_operation() -> Result<()> {
            Err(TermError::Internal("Something went wrong".to_string()))
        }

        let result = failing_operation().context("During data validation");
        assert!(result.is_err());
        let err = result.unwrap_err();
        assert!(err.to_string().contains("During data validation"));
    }
}