Skip to main content

fastapi_core/
validation.rs

1//! Validation helper functions for the `#[derive(Validate)]` macro.
2//!
3//! These functions provide runtime validation for common constraints like
4//! email format, URL format, and regex pattern matching.
5
6use crate::error::ValidationErrors;
7
8/// Trait for types that can be validated.
9///
10/// Types implementing this trait can have their values checked against
11/// defined constraints, returning validation errors if any constraints
12/// are violated.
13///
14/// # Deriving
15///
16/// Use the `#[derive(Validate)]` macro from `fastapi_macros` to automatically
17/// implement this trait based on field attributes.
18///
19/// # Example
20///
21/// ```ignore
22/// use fastapi_macros::Validate;
23/// use fastapi_core::validation::Validate;
24///
25/// #[derive(Validate)]
26/// struct CreateUser {
27///     #[validate(email)]
28///     email: String,
29///     #[validate(length(min = 3, max = 50))]
30///     username: String,
31/// }
32///
33/// let user = CreateUser {
34///     email: "test@example.com".to_string(),
35///     username: "testuser".to_string(),
36/// };
37///
38/// assert!(user.validate().is_ok());
39/// ```
40pub trait Validate {
41    /// Validate this value against all defined constraints.
42    ///
43    /// # Errors
44    ///
45    /// Returns `ValidationErrors` if any constraints are violated.
46    fn validate(&self) -> Result<(), Box<ValidationErrors>>;
47}
48
49/// Check if a string is a valid email address.
50///
51/// Uses a simple but practical regex that matches most real-world emails
52/// without being overly permissive.
53///
54/// # Examples
55///
56/// ```
57/// use fastapi_core::validation::is_valid_email;
58///
59/// assert!(is_valid_email("user@example.com"));
60/// assert!(is_valid_email("user.name+tag@sub.domain.org"));
61/// assert!(!is_valid_email("invalid"));
62/// assert!(!is_valid_email("@domain.com"));
63/// assert!(!is_valid_email("user@"));
64/// ```
65#[must_use]
66pub fn is_valid_email(value: &str) -> bool {
67    // Simple but practical email validation
68    // Must have exactly one @, non-empty local and domain parts
69    let parts: Vec<&str> = value.split('@').collect();
70    if parts.len() != 2 {
71        return false;
72    }
73
74    let local = parts[0];
75    let domain = parts[1];
76
77    // Local part must be non-empty and not start/end with dot
78    if local.is_empty() || local.starts_with('.') || local.ends_with('.') {
79        return false;
80    }
81
82    // Domain must have at least one dot and valid characters
83    if domain.is_empty() || !domain.contains('.') {
84        return false;
85    }
86
87    // Check domain format (no leading/trailing dots, no consecutive dots)
88    if domain.starts_with('.') || domain.ends_with('.') || domain.contains("..") {
89        return false;
90    }
91
92    // Check for valid characters in local part
93    for c in local.chars() {
94        if !c.is_alphanumeric() && !".!#$%&'*+/=?^_`{|}~-".contains(c) {
95            return false;
96        }
97    }
98
99    // Check for valid characters in domain
100    for c in domain.chars() {
101        if !c.is_alphanumeric() && c != '.' && c != '-' {
102            return false;
103        }
104    }
105
106    // Domain parts must not start or end with hyphen
107    for part in domain.split('.') {
108        if part.is_empty() || part.starts_with('-') || part.ends_with('-') {
109            return false;
110        }
111    }
112
113    true
114}
115
116/// Check if a string is a valid URL.
117///
118/// Validates that the string starts with http:// or https:// and has
119/// a valid domain structure.
120///
121/// # Examples
122///
123/// ```
124/// use fastapi_core::validation::is_valid_url;
125///
126/// assert!(is_valid_url("https://example.com"));
127/// assert!(is_valid_url("http://sub.domain.org/path?query=value"));
128/// assert!(!is_valid_url("not-a-url"));
129/// assert!(!is_valid_url("ftp://example.com")); // Only http/https
130/// ```
131#[must_use]
132pub fn is_valid_url(value: &str) -> bool {
133    // Must start with http:// or https://
134    let rest = if let Some(rest) = value.strip_prefix("https://") {
135        rest
136    } else if let Some(rest) = value.strip_prefix("http://") {
137        rest
138    } else {
139        return false;
140    };
141
142    // Must have something after the protocol
143    if rest.is_empty() {
144        return false;
145    }
146
147    // Extract the host part (before any path, query, or fragment)
148    let host = rest
149        .split('/')
150        .next()
151        .unwrap_or("")
152        .split('?')
153        .next()
154        .unwrap_or("")
155        .split('#')
156        .next()
157        .unwrap_or("");
158
159    // Remove port if present
160    let host = host.split(':').next().unwrap_or("");
161
162    // Host must be non-empty
163    if host.is_empty() {
164        return false;
165    }
166
167    // Check for valid hostname characters
168    for c in host.chars() {
169        if !c.is_alphanumeric() && c != '.' && c != '-' {
170            return false;
171        }
172    }
173
174    // Must have at least one dot (or be localhost)
175    if host != "localhost" && !host.contains('.') {
176        return false;
177    }
178
179    true
180}
181
182/// Check if a string matches a regex pattern.
183///
184/// This function compiles the regex on each call, which is fine for
185/// validation but may not be ideal for hot paths. Consider caching
186/// the compiled regex if validating many values.
187///
188/// # Examples
189///
190/// ```
191/// use fastapi_core::validation::matches_pattern;
192///
193/// // Simple exact match patterns (no regex features)
194/// assert!(matches_pattern("hello", r"^hello$"));
195/// assert!(!matches_pattern("world", r"^hello$"));
196/// assert!(matches_pattern("anything", "")); // Empty pattern matches all
197/// ```
198#[must_use]
199pub fn matches_pattern(value: &str, pattern: &str) -> bool {
200    // Pattern matching without pulling in a full regex engine.
201    //
202    // This intentionally supports a small, practical subset used by our derive
203    // tests and common validation cases:
204    // - anchors: ^ and $
205    // - literals and '.' wildcard
206    // - escapes: \\d (digit)
207    // - character classes: [a-z0-9-] with ranges and literals
208    // - quantifiers: +, *, ?, and {n}
209    //
210    // Anything outside this subset returns false (no match).
211
212    // Handle common simple patterns.
213    if pattern.is_empty() {
214        return true;
215    }
216
217    // Fast path: exact match patterns.
218    if pattern.starts_with('^') && pattern.ends_with('$') {
219        let inner = &pattern[1..pattern.len() - 1];
220        // If no special chars, it's an exact match
221        if !inner.contains(['[', ']', '*', '+', '?', '\\', '(', ')', '|', '.']) {
222            return value == inner;
223        }
224    }
225
226    let Ok(compiled) = SimpleRegex::compile(pattern) else {
227        return false;
228    };
229
230    compiled.is_match(value)
231}
232
233/// Check if a string is a "reasonably formatted" phone number.
234///
235/// This is not a full E.164 validator. It is tuned to catch obvious bad input
236/// while allowing common human formats (spaces, parens, hyphens, dots).
237#[must_use]
238pub fn is_valid_phone(value: &str) -> bool {
239    let s = value.trim();
240    if s.is_empty() {
241        return false;
242    }
243
244    // '+' is allowed only at the beginning (and at most once).
245    if let Some(pos) = s.find('+') {
246        if pos != 0 {
247            return false;
248        }
249        if s[1..].contains('+') {
250            return false;
251        }
252    }
253
254    // Can't start or end with a separator (unless starting with '+').
255    let first = s.chars().next().unwrap();
256    if first != '+' && matches!(first, '-' | '.' | ' ') {
257        return false;
258    }
259    let last = s.chars().last().unwrap();
260    if matches!(last, '-' | '.' | ' ') {
261        return false;
262    }
263
264    let mut digits = 0usize;
265    let mut open_parens = 0usize;
266    let mut last_sep: Option<char> = None; // only tracks '-' and '.'
267    let mut paren_digit_count: usize = 0; // digits since last '('
268
269    for (i, c) in s.chars().enumerate() {
270        match c {
271            '0'..='9' => {
272                digits += 1;
273                if open_parens > 0 {
274                    paren_digit_count += 1;
275                }
276                last_sep = None;
277            }
278            '+' => {
279                if i != 0 {
280                    return false;
281                }
282                last_sep = None;
283            }
284            ' ' => {
285                // Multiple spaces are allowed.
286                last_sep = None;
287            }
288            '-' | '.' => {
289                // No consecutive '-' or '.' (including mixed like "-.").
290                if let Some(prev) = last_sep {
291                    if matches!(prev, '-' | '.') {
292                        return false;
293                    }
294                }
295                last_sep = Some(c);
296            }
297            '(' => {
298                open_parens += 1;
299                paren_digit_count = 0;
300                last_sep = None;
301            }
302            ')' => {
303                if open_parens == 0 {
304                    return false;
305                }
306                // Disallow empty parentheses, e.g. "()".
307                if paren_digit_count == 0 {
308                    return false;
309                }
310                open_parens -= 1;
311                last_sep = None;
312            }
313            _ => return false, // letters or other punctuation
314        }
315    }
316
317    if open_parens != 0 {
318        return false;
319    }
320
321    // Practical minimum length: 10 digits.
322    digits >= 10
323}
324
325#[derive(Debug, Clone)]
326struct SimpleRegex {
327    anchored_start: bool,
328    anchored_end: bool,
329    tokens: Vec<Token>,
330}
331
332#[derive(Debug, Clone)]
333struct Token {
334    atom: Atom,
335    min: usize,
336    max: Option<usize>, // None means unbounded
337}
338
339#[derive(Debug, Clone)]
340enum Atom {
341    Any,
342    Literal(char),
343    Digit,
344    CharClass(CharClass),
345}
346
347#[derive(Debug, Clone)]
348struct CharClass {
349    parts: Vec<CharClassPart>,
350}
351
352#[derive(Debug, Clone)]
353enum CharClassPart {
354    Single(char),
355    Range(char, char),
356}
357
358impl CharClass {
359    fn matches(&self, c: char) -> bool {
360        for part in &self.parts {
361            match *part {
362                CharClassPart::Single(x) if c == x => return true,
363                CharClassPart::Range(a, b) if a <= c && c <= b => return true,
364                _ => {}
365            }
366        }
367        false
368    }
369}
370
371impl Atom {
372    fn matches(&self, c: char) -> bool {
373        match self {
374            Atom::Any => true,
375            Atom::Literal(x) => *x == c,
376            Atom::Digit => c.is_ascii_digit(),
377            Atom::CharClass(cc) => cc.matches(c),
378        }
379    }
380}
381
382impl SimpleRegex {
383    fn compile(pattern: &str) -> Result<Self, ()> {
384        let mut chars: Vec<char> = pattern.chars().collect();
385        let mut anchored_start = false;
386        let mut anchored_end = false;
387
388        if chars.first() == Some(&'^') {
389            anchored_start = true;
390            chars.remove(0);
391        }
392        if chars.last() == Some(&'$') {
393            anchored_end = true;
394            chars.pop();
395        }
396
397        let mut i = 0usize;
398        let mut tokens = Vec::<Token>::new();
399
400        while i < chars.len() {
401            let atom = match chars[i] {
402                '.' => {
403                    i += 1;
404                    Atom::Any
405                }
406                '\\' => {
407                    i += 1;
408                    if i >= chars.len() {
409                        return Err(());
410                    }
411                    let esc = chars[i];
412                    i += 1;
413                    match esc {
414                        'd' => Atom::Digit,
415                        other => Atom::Literal(other),
416                    }
417                }
418                '[' => {
419                    i += 1;
420                    let (cc, next) = parse_char_class(&chars, i)?;
421                    i = next;
422                    Atom::CharClass(cc)
423                }
424                c => {
425                    i += 1;
426                    Atom::Literal(c)
427                }
428            };
429
430            // Quantifier (optional).
431            let mut min = 1usize;
432            let mut max: Option<usize> = Some(1);
433
434            if i < chars.len() {
435                match chars[i] {
436                    '+' => {
437                        min = 1;
438                        max = None;
439                        i += 1;
440                    }
441                    '*' => {
442                        min = 0;
443                        max = None;
444                        i += 1;
445                    }
446                    '?' => {
447                        min = 0;
448                        max = Some(1);
449                        i += 1;
450                    }
451                    '{' => {
452                        i += 1;
453                        let (n, next) = parse_braced_number(&chars, i)?;
454                        i = next;
455                        min = n;
456                        max = Some(n);
457                    }
458                    _ => {}
459                }
460            }
461
462            tokens.push(Token { atom, min, max });
463        }
464
465        Ok(Self {
466            anchored_start,
467            anchored_end,
468            tokens,
469        })
470    }
471
472    fn is_match(&self, value: &str) -> bool {
473        let s: Vec<char> = value.chars().collect();
474
475        if self.anchored_start {
476            return self.is_match_at(&s, 0) && (!self.anchored_end || self.matches_end(&s));
477        }
478
479        // Unanchored: allow a match starting at any position.
480        for start in 0..=s.len() {
481            if self.is_match_at(&s, start) && (!self.anchored_end || self.matches_end(&s)) {
482                return true;
483            }
484        }
485        false
486    }
487
488    fn matches_end(&self, s: &[char]) -> bool {
489        // If anchored_end, we require a match that consumes to the end of the string.
490        // Our DP matcher returns true only for full consumption from the chosen start.
491        // So here we just return true and rely on is_match_at's end-check.
492        //
493        // This function exists to keep the calling logic clear if we later extend
494        // the matcher to support partial-consumption modes.
495        let _ = s;
496        true
497    }
498
499    fn is_match_at(&self, s: &[char], start: usize) -> bool {
500        use std::collections::HashMap;
501
502        fn dp(
503            tokens: &[Token],
504            s: &[char],
505            ti: usize,
506            si: usize,
507            memo: &mut HashMap<(usize, usize), bool>,
508        ) -> bool {
509            if let Some(&v) = memo.get(&(ti, si)) {
510                return v;
511            }
512
513            let ans = if ti == tokens.len() {
514                si == s.len()
515            } else {
516                let t = &tokens[ti];
517                let remaining = s.len().saturating_sub(si);
518                let max_rep = t.max.unwrap_or(remaining).min(remaining);
519
520                // Try all repetition counts in [min, max_rep].
521                let mut ok = false;
522                for rep in t.min..=max_rep {
523                    let mut good = true;
524                    for k in 0..rep {
525                        if !t.atom.matches(s[si + k]) {
526                            good = false;
527                            break;
528                        }
529                    }
530                    if good && dp(tokens, s, ti + 1, si + rep, memo) {
531                        ok = true;
532                        break;
533                    }
534                }
535                ok
536            };
537
538            memo.insert((ti, si), ans);
539            ans
540        }
541
542        // Match must consume to end of string if anchored_end is set; otherwise we accept
543        // consumption until token exhaustion (and ignore trailing chars) similar to a
544        // plain "find" match. Our tests always anchor with ^...$ so we keep strict
545        // behavior when anchored_end is true.
546        if self.anchored_end {
547            let mut memo = HashMap::new();
548            dp(&self.tokens, s, 0, start, &mut memo)
549        } else {
550            // Non-anchored end: accept any prefix match from start.
551            // Implement by checking dp and allowing trailing characters.
552            // We do this by running dp against all possible end positions.
553            for end in start..=s.len() {
554                let slice = &s[..end];
555                let mut memo = HashMap::new();
556                if dp(&self.tokens, slice, 0, start, &mut memo) {
557                    return true;
558                }
559            }
560            false
561        }
562    }
563}
564
565fn parse_char_class(chars: &[char], mut i: usize) -> Result<(CharClass, usize), ()> {
566    let mut parts = Vec::<CharClassPart>::new();
567    if i >= chars.len() {
568        return Err(());
569    }
570
571    while i < chars.len() {
572        if chars[i] == ']' {
573            return Ok((CharClass { parts }, i + 1));
574        }
575
576        let first = chars[i];
577        i += 1;
578
579        if i + 1 < chars.len() && chars[i] == '-' && chars[i + 1] != ']' {
580            // Range like a-z.
581            let second = chars[i + 1];
582            i += 2;
583            parts.push(CharClassPart::Range(first, second));
584        } else {
585            parts.push(CharClassPart::Single(first));
586        }
587    }
588
589    Err(())
590}
591
592fn parse_braced_number(chars: &[char], mut i: usize) -> Result<(usize, usize), ()> {
593    let mut n: usize = 0;
594    let mut saw_digit = false;
595    while i < chars.len() {
596        let c = chars[i];
597        if c == '}' {
598            if !saw_digit {
599                return Err(());
600            }
601            return Ok((n, i + 1));
602        }
603        if let Some(d) = c.to_digit(10) {
604            saw_digit = true;
605            n = n
606                .checked_mul(10)
607                .and_then(|x| x.checked_add(d as usize))
608                .ok_or(())?;
609            i += 1;
610        } else {
611            return Err(());
612        }
613    }
614    Err(())
615}
616
617#[cfg(test)]
618mod tests {
619    use super::*;
620
621    #[test]
622    fn test_valid_emails() {
623        assert!(is_valid_email("user@example.com"));
624        assert!(is_valid_email("user.name@example.com"));
625        assert!(is_valid_email("user+tag@example.com"));
626        assert!(is_valid_email("user@sub.domain.org"));
627    }
628
629    #[test]
630    fn test_invalid_emails() {
631        assert!(!is_valid_email(""));
632        assert!(!is_valid_email("invalid"));
633        assert!(!is_valid_email("@domain.com"));
634        assert!(!is_valid_email("user@"));
635        assert!(!is_valid_email("user@@domain.com"));
636        assert!(!is_valid_email(".user@domain.com"));
637        assert!(!is_valid_email("user.@domain.com"));
638        assert!(!is_valid_email("user@.domain.com"));
639        assert!(!is_valid_email("user@domain.com."));
640        assert!(!is_valid_email("user@domain..com"));
641    }
642
643    #[test]
644    fn test_valid_urls() {
645        assert!(is_valid_url("https://example.com"));
646        assert!(is_valid_url("http://example.com"));
647        assert!(is_valid_url("https://sub.domain.org"));
648        assert!(is_valid_url("https://example.com/path"));
649        assert!(is_valid_url("https://example.com/path?query=value"));
650        assert!(is_valid_url("https://example.com:8080"));
651        assert!(is_valid_url("http://localhost"));
652    }
653
654    #[test]
655    fn test_invalid_urls() {
656        assert!(!is_valid_url(""));
657        assert!(!is_valid_url("not-a-url"));
658        assert!(!is_valid_url("ftp://example.com"));
659        assert!(!is_valid_url("https://"));
660        assert!(!is_valid_url("http://"));
661    }
662
663    #[test]
664    fn test_simple_patterns() {
665        assert!(matches_pattern("hello", "^hello$"));
666        assert!(matches_pattern("test", "^test$"));
667        assert!(!matches_pattern("hello", "^world$"));
668        assert!(matches_pattern("abc", "abc"));
669    }
670}