Skip to main content

formatparse_core/parser/
mod.rs

1/// Parser module for formatparse-core
2pub mod pattern;
3pub mod regex;
4
5/// Security constants for input validation
6pub const MAX_PATTERN_LENGTH: usize = 10_000;
7pub const MAX_INPUT_LENGTH: usize = 10_000_000; // 10MB
8pub const MAX_FIELDS: usize = 100;
9pub const MAX_FIELD_NAME_LENGTH: usize = 200;
10
11/// Validate pattern length
12pub fn validate_pattern_length(pattern: &str) -> Result<(), String> {
13    if pattern.len() > MAX_PATTERN_LENGTH {
14        return Err(format!(
15            "Pattern length {} exceeds maximum allowed length of {} characters",
16            pattern.len(),
17            MAX_PATTERN_LENGTH
18        ));
19    }
20    Ok(())
21}
22
23/// Validate input string length
24pub fn validate_input_length(input: &str) -> Result<(), String> {
25    if input.len() > MAX_INPUT_LENGTH {
26        return Err(format!(
27            "Input length {} exceeds maximum allowed length of {} characters",
28            input.len(),
29            MAX_INPUT_LENGTH
30        ));
31    }
32    Ok(())
33}
34
35/// Validate field name length and characters
36pub fn validate_field_name(field_name: &str) -> Result<(), String> {
37    if field_name.len() > MAX_FIELD_NAME_LENGTH {
38        return Err(format!(
39            "Field name length {} exceeds maximum allowed length of {} characters",
40            field_name.len(),
41            MAX_FIELD_NAME_LENGTH
42        ));
43    }
44
45    // Check for null bytes
46    if field_name.contains('\0') {
47        return Err("Field name contains null byte".to_string());
48    }
49
50    Ok(())
51}
52
53/// Count capturing groups in a regex pattern string (for validating `with_pattern` / custom types).
54///
55/// Handles `(?P<name>...)` named captures; other `(?...)` extensions are treated as non-capturing
56/// at the opening parenthesis (same rule of thumb as skipping `(?:...)`, `(?=...)`, etc.).
57pub fn count_capturing_groups(pattern: &str) -> usize {
58    let mut count = 0;
59    let mut i = 0;
60    let chars: Vec<char> = pattern.chars().collect();
61
62    while i < chars.len() {
63        if chars[i] == '\\' {
64            i += 2;
65            if i > chars.len() {
66                break;
67            }
68            continue;
69        }
70        if chars[i] == '(' {
71            if i + 1 < chars.len() && chars[i + 1] == '?' {
72                i += 2;
73                if i + 1 < chars.len() && chars[i] == 'P' && chars[i + 1] == '<' {
74                    i += 2;
75                    while i < chars.len() && chars[i] != '>' {
76                        i += 1;
77                    }
78                    if i < chars.len() {
79                        i += 1;
80                    }
81                    count += 1;
82                    continue;
83                }
84                continue;
85            }
86            count += 1;
87        }
88        i += 1;
89    }
90    count
91}
92
93#[cfg(test)]
94mod count_capturing_groups_tests {
95    use super::count_capturing_groups;
96
97    #[test]
98    fn named_group_counts_as_one() {
99        assert_eq!(count_capturing_groups(r"(?P<foo>\d+)"), 1);
100    }
101
102    #[test]
103    fn named_group_with_nested_capture() {
104        assert_eq!(count_capturing_groups(r"(?P<outer>\d(\d))"), 2);
105    }
106
107    #[test]
108    fn non_capturing_zero() {
109        assert_eq!(count_capturing_groups(r"(?:ab)"), 0);
110    }
111
112    #[test]
113    fn plain_capture_plus_named() {
114        assert_eq!(count_capturing_groups(r"(\w)(?P<n>\d+)"), 2);
115    }
116
117    #[test]
118    fn backreference_not_counted_as_capture_here() {
119        assert_eq!(count_capturing_groups(r"(?P<x>a)(?P=x)"), 1);
120    }
121}