Skip to main content

fraiseql_db/filters/
validators.rs

1//! Validation framework for extended operator parameters.
2//!
3//! This module provides reusable validators that can be configured via TOML
4//! at compile time. Validators are applied before SQL generation to ensure
5//! parameters are valid before executing queries.
6//!
7//! # Design
8//!
9//! Validators are expressed as rules in fraiseql.toml:
10//!
11//! ```toml
12//! [fraiseql.validation]
13//! email_domain_eq = { pattern = "^[a-z0-9]..." }
14//! vin_wmi_eq = { length = 3, pattern = "^[A-Z0-9]{3}$" }
15//! iban_country_eq = { checksum = "mod97" }
16//! ```
17//!
18//! Rules are compiled into schema.compiled.json and applied at runtime.
19
20use fraiseql_error::{FraiseQLError, Result};
21use regex::{Regex, RegexBuilder};
22use serde_json::Value;
23
24/// Maximum byte length for a validation regex pattern.
25///
26/// Long patterns with nested quantifiers can cause catastrophic backtracking
27/// (ReDoS). Rejecting patterns above this threshold is a defence-in-depth
28/// measure on top of the DFA size cap applied in [`compile_pattern`].
29const MAX_PATTERN_BYTES: usize = 1024;
30
31/// Compile a regex pattern with ReDoS defence-in-depth guards.
32///
33/// # Guards
34/// 1. Rejects patterns longer than [`MAX_PATTERN_BYTES`] before compilation.
35/// 2. Caps the DFA state-machine size at 1 MiB via [`RegexBuilder::size_limit`]. Patterns that
36///    require a larger DFA are rejected rather than executed.
37///
38/// # Errors
39///
40/// Returns [`FraiseQLError::Validation`] if the pattern is too long, contains
41/// invalid syntax, or exceeds the DFA size limit.
42fn compile_pattern(pattern: &str) -> Result<Regex> {
43    if pattern.len() > MAX_PATTERN_BYTES {
44        return Err(FraiseQLError::validation(format!(
45            "Validation pattern too long ({} bytes, max {MAX_PATTERN_BYTES})",
46            pattern.len()
47        )));
48    }
49    RegexBuilder::new(pattern)
50        .size_limit(1 << 20) // 1 MiB DFA cap
51        .build()
52        .map_err(|e| {
53            FraiseQLError::validation(format!("Invalid validation pattern '{pattern}': {e}"))
54        })
55}
56
57/// Validation rule for an operator parameter.
58#[derive(Debug, Clone)]
59#[non_exhaustive]
60pub enum ValidationRule {
61    /// Pattern matching (pre-compiled regex)
62    Pattern(Regex),
63    /// Exact length
64    Length(usize),
65    /// Min and max length
66    LengthRange {
67        /// Minimum allowed length (inclusive).
68        min: usize,
69        /// Maximum allowed length (inclusive).
70        max: usize,
71    },
72    /// Checksum algorithm
73    Checksum(ChecksumType),
74    /// Range of numeric values
75    NumericRange {
76        /// Minimum allowed numeric value (inclusive).
77        min: f64,
78        /// Maximum allowed numeric value (inclusive).
79        max: f64,
80    },
81    /// Value must be one of these options
82    Enum(Vec<String>),
83    /// Composite rule (all must pass)
84    All(Vec<ValidationRule>),
85}
86
87/// Supported checksum algorithms.
88#[derive(Debug, Clone, Copy, PartialEq, Eq)]
89#[non_exhaustive]
90pub enum ChecksumType {
91    /// IBAN MOD-97 checksum
92    Mod97,
93    /// Luhn algorithm (credit cards, VINs)
94    Luhn,
95}
96
97impl ValidationRule {
98    /// Validate a string value against this rule.
99    ///
100    /// # Errors
101    ///
102    /// Returns [`FraiseQLError::Validation`] if the value fails the validation rule,
103    /// or if the pattern is an invalid regex.
104    pub fn validate(&self, value: &str) -> Result<()> {
105        match self {
106            ValidationRule::Pattern(re) => {
107                if !re.is_match(value) {
108                    return Err(FraiseQLError::validation(format!(
109                        "Value '{}' does not match pattern '{}'",
110                        value,
111                        re.as_str()
112                    )));
113                }
114                Ok(())
115            },
116
117            ValidationRule::Length(expected) => {
118                if value.len() != *expected {
119                    return Err(FraiseQLError::validation(format!(
120                        "Value '{}' has length {}, expected {}",
121                        value,
122                        value.len(),
123                        expected
124                    )));
125                }
126                Ok(())
127            },
128
129            ValidationRule::LengthRange { min, max } => {
130                let len = value.len();
131                if len < *min || len > *max {
132                    return Err(FraiseQLError::validation(format!(
133                        "Value '{}' has length {}, expected between {} and {}",
134                        value, len, min, max
135                    )));
136                }
137                Ok(())
138            },
139
140            ValidationRule::Checksum(checksum_type) => {
141                match checksum_type {
142                    ChecksumType::Mod97 => validate_mod97(value)?,
143                    ChecksumType::Luhn => validate_luhn(value)?,
144                }
145                Ok(())
146            },
147
148            ValidationRule::NumericRange { min, max } => {
149                let num: f64 = value.parse().map_err(|_| {
150                    FraiseQLError::validation(format!("Value '{}' is not a valid number", value))
151                })?;
152
153                if num < *min || num > *max {
154                    return Err(FraiseQLError::validation(format!(
155                        "Value {} is outside range [{}, {}]",
156                        num, min, max
157                    )));
158                }
159                Ok(())
160            },
161
162            ValidationRule::Enum(options) => {
163                if !options.contains(&value.to_string()) {
164                    return Err(FraiseQLError::validation(format!(
165                        "Value '{}' must be one of: {}",
166                        value,
167                        options.join(", ")
168                    )));
169                }
170                Ok(())
171            },
172
173            ValidationRule::All(rules) => {
174                for rule in rules {
175                    rule.validate(value)?;
176                }
177                Ok(())
178            },
179        }
180    }
181
182    /// Parse validation rules from JSON (compiled from TOML).
183    ///
184    /// # Errors
185    ///
186    /// Returns [`FraiseQLError::Validation`] if the JSON structure does not match any
187    /// known validation rule format.
188    ///
189    /// # Panics
190    ///
191    /// Cannot panic: the internal `.expect("len checked == 1")` is only reached
192    /// after verifying `rules.len() == 1`.
193    pub fn from_json(value: &Value) -> Result<Self> {
194        match value {
195            Value::String(s) => {
196                // Simple case: just a pattern — compile at parse time
197                let re = compile_pattern(s)?;
198                Ok(ValidationRule::Pattern(re))
199            },
200
201            Value::Object(map) => {
202                let mut rules = Vec::new();
203
204                // Pattern rule — compile at parse time
205                if let Some(Value::String(pattern)) = map.get("pattern") {
206                    rules.push(ValidationRule::Pattern(compile_pattern(pattern)?));
207                }
208
209                // Length rule
210                if let Some(Value::Number(n)) = map.get("length") {
211                    if let Some(length) = n.as_u64() {
212                        #[allow(clippy::cast_possible_truncation)]
213                        // Reason: value is bounded; truncation cannot occur in practice
214                        let length_usize = usize::try_from(length).unwrap_or(usize::MAX);
215                        rules.push(ValidationRule::Length(length_usize));
216                    }
217                }
218
219                // Length range rule
220                if let (Some(Value::Number(min)), Some(Value::Number(max))) =
221                    (map.get("min_length"), map.get("max_length"))
222                {
223                    if let (Some(min_val), Some(max_val)) = (min.as_u64(), max.as_u64()) {
224                        #[allow(clippy::cast_possible_truncation)]
225                        // Reason: value is bounded; truncation cannot occur in practice
226                        let (min, max) = (
227                            usize::try_from(min_val).unwrap_or(usize::MAX),
228                            usize::try_from(max_val).unwrap_or(usize::MAX),
229                        );
230                        rules.push(ValidationRule::LengthRange { min, max });
231                    }
232                }
233
234                // Checksum rule
235                if let Some(Value::String(checksum)) = map.get("checksum") {
236                    let checksum_type = match checksum.as_str() {
237                        "mod97" => ChecksumType::Mod97,
238                        "luhn" => ChecksumType::Luhn,
239                        _ => {
240                            return Err(FraiseQLError::validation(format!(
241                                "Unknown checksum type: {}",
242                                checksum
243                            )));
244                        },
245                    };
246                    rules.push(ValidationRule::Checksum(checksum_type));
247                }
248
249                // Enum rule
250                if let Some(Value::Array(options)) = map.get("enum") {
251                    let enum_values: Vec<String> =
252                        options.iter().filter_map(|v| v.as_str().map(|s| s.to_string())).collect();
253
254                    if !enum_values.is_empty() {
255                        rules.push(ValidationRule::Enum(enum_values));
256                    }
257                }
258
259                // Numeric range rule
260                if let (Some(Value::Number(min)), Some(Value::Number(max))) =
261                    (map.get("min"), map.get("max"))
262                {
263                    if let (Some(min_val), Some(max_val)) = (min.as_f64(), max.as_f64()) {
264                        rules.push(ValidationRule::NumericRange {
265                            min: min_val,
266                            max: max_val,
267                        });
268                    }
269                }
270
271                if rules.is_empty() {
272                    return Err(FraiseQLError::validation(
273                        "No valid validation rules found".to_string(),
274                    ));
275                }
276
277                if rules.len() == 1 {
278                    Ok(rules.into_iter().next().expect("len checked == 1"))
279                } else {
280                    Ok(ValidationRule::All(rules))
281                }
282            },
283
284            _ => Err(FraiseQLError::validation(
285                "Validation rule must be string or object".to_string(),
286            )),
287        }
288    }
289}
290
291/// MOD-97 checksum validation for IBAN and similar formats.
292fn validate_mod97(value: &str) -> Result<()> {
293    // Move country code (first 4 chars) to end
294    if value.len() < 4 {
295        return Err(FraiseQLError::validation("IBAN must be at least 4 characters".to_string()));
296    }
297
298    let rearranged = format!("{}{}", &value[4..], &value[..4]);
299
300    // Convert letters to numbers (A=10, B=11, ..., Z=35)
301    let numeric_string: String = rearranged
302        .chars()
303        .map(|c| {
304            if c.is_ascii_digit() {
305                c.to_string()
306            } else {
307                ((c.to_ascii_uppercase() as u32 - 'A' as u32) + 10).to_string()
308            }
309        })
310        .collect();
311
312    // Compute MOD 97
313    let mut remainder: u64 = 0;
314    for digit_char in numeric_string.chars() {
315        if let Some(digit) = digit_char.to_digit(10) {
316            remainder = (remainder * 10 + u64::from(digit)) % 97;
317        }
318    }
319
320    if remainder == 1 {
321        Ok(())
322    } else {
323        Err(FraiseQLError::validation("Invalid IBAN checksum".to_string()))
324    }
325}
326
327/// Luhn algorithm checksum validation (used for VINs, credit cards, etc.).
328fn validate_luhn(value: &str) -> Result<()> {
329    let digits: Vec<u32> = value.chars().filter_map(|c| c.to_digit(10)).collect();
330
331    if digits.is_empty() {
332        return Err(FraiseQLError::validation("Value must contain at least one digit".to_string()));
333    }
334
335    let mut sum = 0u32;
336    let mut is_even = false;
337
338    for digit in digits.iter().rev() {
339        let mut n = *digit;
340        if is_even {
341            n *= 2;
342            if n > 9 {
343                n -= 9;
344            }
345        }
346        sum += n;
347        is_even = !is_even;
348    }
349
350    if sum.is_multiple_of(10) {
351        Ok(())
352    } else {
353        Err(FraiseQLError::validation("Invalid Luhn checksum".to_string()))
354    }
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360
361    #[test]
362    fn test_pattern_validation() {
363        let rule = ValidationRule::Pattern(Regex::new("^[a-z]+$").expect("valid regex"));
364        rule.validate("hello")
365            .unwrap_or_else(|e| panic!("expected Ok for 'hello': {e}"));
366        assert!(rule.validate("Hello").is_err(), "expected Err for 'Hello' (uppercase)");
367    }
368
369    #[test]
370    fn test_length_validation() {
371        let rule = ValidationRule::Length(3);
372        rule.validate("abc")
373            .unwrap_or_else(|e| panic!("expected Ok for len=3 string: {e}"));
374        assert!(rule.validate("ab").is_err(), "expected Err for len=2 string");
375        assert!(rule.validate("abcd").is_err(), "expected Err for len=4 string");
376    }
377
378    #[test]
379    fn test_mod97_valid() {
380        // Valid IBAN: GB82 WEST 1234 5698 7654 32
381        let result = validate_mod97("GB82WEST12345698765432");
382        result.unwrap_or_else(|e| panic!("expected Ok for valid IBAN: {e}"));
383    }
384
385    #[test]
386    fn test_luhn_valid() {
387        // Valid credit card number
388        let result = validate_luhn("4532015112830366");
389        result.unwrap_or_else(|e| panic!("expected Ok for valid Luhn number: {e}"));
390    }
391
392    #[test]
393    fn test_enum_validation() {
394        let rule = ValidationRule::Enum(vec!["US".to_string(), "CA".to_string()]);
395        rule.validate("US").unwrap_or_else(|e| panic!("expected Ok for 'US': {e}"));
396        assert!(rule.validate("UK").is_err(), "expected Err for 'UK' (not in enum)");
397    }
398
399    #[test]
400    fn test_numeric_range_validation() {
401        let rule = ValidationRule::NumericRange {
402            min: 0.0,
403            max: 90.0,
404        };
405        rule.validate("45.5").unwrap_or_else(|e| panic!("expected Ok for 45.5: {e}"));
406        assert!(rule.validate("91").is_err(), "expected Err for 91 (out of range)");
407    }
408}