Skip to main content

textfsm_core/template/
value.rs

1//! Value definition parsing and representation.
2
3use fancy_regex::Regex;
4use std::collections::HashSet;
5
6use crate::error::TemplateError;
7use crate::types::{ValueOption, ValueOptions};
8
9/// A Value definition from the template header.
10#[derive(Debug, Clone)]
11pub struct ValueDef {
12    /// Name of the value (used as column header).
13    pub name: String,
14
15    /// Original regex pattern from template.
16    pub pattern: String,
17
18    /// Options applied to this value.
19    pub options: ValueOptions,
20
21    /// Regex pattern transformed for named capture: `(...)` -> `(?P<name>...)`.
22    pub(crate) template_pattern: String,
23
24    /// Compiled regex for List values with nested groups.
25    pub(crate) compiled_regex: Option<Regex>,
26}
27
28impl ValueDef {
29    /// Maximum allowed length for a value name.
30    pub const MAX_NAME_LEN: usize = 48;
31
32    /// Parse a Value line: `Value [Options] Name (regex)`
33    pub fn parse(line: &str, line_num: usize) -> Result<Self, TemplateError> {
34        let trimmed = line.trim();
35
36        if !trimmed.starts_with("Value ") {
37            return Err(TemplateError::InvalidValue {
38                line: line_num,
39                message: "line must start with 'Value '".into(),
40            });
41        }
42
43        // Remove "Value " prefix
44        let rest = &trimmed[6..];
45
46        // Find where the regex starts (first '(')
47        let regex_start = rest.find('(').ok_or_else(|| TemplateError::InvalidValue {
48            line: line_num,
49            message: "regex pattern must be wrapped in parentheses".into(),
50        })?;
51
52        let before_regex = rest[..regex_start].trim();
53        let pattern = rest[regex_start..].trim();
54
55        // Parse the part before regex for options and name
56        let mut parts = before_regex.split_whitespace();
57        let first = parts.next();
58        let second = parts.next();
59        let third = parts.next();
60
61        let (options, name) = match (first, second, third) {
62            // missing name
63            (None, _, _) => {
64                return Err(TemplateError::InvalidValue {
65                    line: line_num,
66                    message: "missing value name".into(),
67                });
68            }
69            // just name, no options
70            (Some(name), None, _) => (HashSet::new(), name.to_string()),
71            // names + options
72            (Some(opts), Some(name), None) => {
73                // Options contain commas or are valid option names
74                if opts.contains(',') || ValueOption::parse(opts).is_some() {
75                    let options = Self::parse_options(opts, line_num)?;
76                    (options, name.to_string())
77                } else {
78                    // First part is not a valid option, error
79                    return Err(TemplateError::InvalidValue {
80                        line: line_num,
81                        message: format!(
82                            "invalid format - expected 'Value [Options] Name (regex)', got unknown token '{}'",
83                            opts
84                        ),
85                    });
86                }
87            }
88            // too many tokens
89            (Some(_), Some(_), Some(_)) => {
90                return Err(TemplateError::InvalidValue {
91                    line: line_num,
92                    message: "too many tokens before regex pattern".into(),
93                });
94            }
95        };
96
97        if name.len() > Self::MAX_NAME_LEN {
98            return Err(TemplateError::InvalidValue {
99                line: line_num,
100                message: format!(
101                    "name '{}' exceeds maximum length of {}",
102                    name,
103                    Self::MAX_NAME_LEN
104                ),
105            });
106        }
107
108        if !name.chars().all(|c| c.is_alphanumeric() || c == '_') {
109            return Err(TemplateError::InvalidValue {
110                line: line_num,
111                message: format!("name '{}' contains invalid characters", name),
112            });
113        }
114
115        if !pattern.starts_with('(') || !pattern.ends_with(')') {
116            return Err(TemplateError::InvalidValue {
117                line: line_num,
118                message: "regex must be wrapped in parentheses".into(),
119            });
120        }
121
122        if pattern.ends_with("\\)") {
123            return Err(TemplateError::InvalidValue {
124                line: line_num,
125                message: "regex cannot end with escaped parenthesis".into(),
126            });
127        }
128
129        // Normalize pattern for Python-to-Rust regex compatibility
130        let pattern = normalize_pattern(pattern);
131
132        Regex::new(&pattern).map_err(|e| TemplateError::InvalidRegex {
133            pattern: pattern.to_string(),
134            message: e.to_string(),
135        })?;
136
137        // Create the named capture group version: (pattern) -> (?P<name>pattern)
138        let inner_pattern = &pattern[1..pattern.len() - 1];
139        let template_pattern = format!("(?P<{}>{})", name, inner_pattern);
140
141        // For List values with nested groups, compile the regex
142        let compiled_regex = if options.contains(&ValueOption::List) {
143            let re = Regex::new(&pattern).ok();
144            // Only store if there are nested groups
145            re.filter(|r| r.captures_len() > 1)
146        } else {
147            None
148        };
149
150        Ok(Self {
151            name,
152            pattern,
153            options,
154            template_pattern,
155            compiled_regex,
156        })
157    }
158
159    fn parse_options(opts_str: &str, _line_num: usize) -> Result<ValueOptions, TemplateError> {
160        let mut options = HashSet::new();
161
162        // Note that the python implementation strictly requires no spaces between
163        // commas in the options. It must be "Required,Filldown" not "Required, Filldown"
164        for opt_name in opts_str.split(',') {
165            let opt_name = opt_name.trim();
166            let opt = ValueOption::parse(opt_name)
167                .ok_or_else(|| TemplateError::UnknownOption(opt_name.into()))?;
168
169            if !options.insert(opt) {
170                return Err(TemplateError::DuplicateOption(opt_name.into()));
171            }
172        }
173
174        Ok(options)
175    }
176
177    /// Check if this value has a specific option.
178    pub fn has_option(&self, opt: ValueOption) -> bool {
179        self.options.contains(&opt)
180    }
181
182}
183
184/// Normalize a regex pattern for Python-to-Rust compatibility.
185///
186/// Python's `re` module is more lenient than Rust's `fancy-regex` in two
187/// specific ways that affect real-world TextFSM templates:
188///
189/// ## 1. Backslash angle brackets
190///
191/// Python treats `\<` and `\>` as literal `<` and `>` because they are not
192/// recognized escape sequences. Rust's fancy-regex treats them as word
193/// boundary assertions (GNU-style `\<` = start-of-word, `\>` = end-of-word).
194///
195/// ## 2. Quantifiers on lookaround assertions
196///
197/// Lookaround assertions (`(?<=...)`, `(?<!...)`, `(?=...)`, `(?!...)`) are
198/// zero-width — they match a position, not characters. Quantifying a
199/// zero-width match (`(?<=x)+`) is semantically meaningless.
200/// Python silently ignores the quantifier. fancy-regex rejects it as invalid.
201///
202/// Handled quantifiers: `+`, `*`, `?`, `{n}`, `{n,}`, `{n,m}` (and lazy variants).
203pub(crate) fn normalize_pattern(pattern: &str) -> String {
204    let mut result = String::with_capacity(pattern.len());
205    let chars: Vec<char> = pattern.chars().collect();
206    let len = chars.len();
207    let mut i = 0;
208
209    // Stack to track group types. Each entry is true if the group is a lookaround.
210    let mut group_stack: Vec<bool> = Vec::new();
211
212    while i < len {
213        // Handle escape sequences
214        if chars[i] == '\\' && i + 1 < len {
215            if chars[i + 1] == '<' || chars[i + 1] == '>' {
216                // Normalization 1: \< and \> → literal < and >
217                result.push(chars[i + 1]);
218                i += 2;
219                continue;
220            }
221            // Other escape: copy both chars verbatim
222            result.push(chars[i]);
223            result.push(chars[i + 1]);
224            i += 2;
225            continue;
226        }
227
228        // Skip character class contents (parens inside [...] are literal)
229        if chars[i] == '[' {
230            result.push(chars[i]);
231            i += 1;
232            // Handle negation
233            if i < len && chars[i] == '^' {
234                result.push(chars[i]);
235                i += 1;
236            }
237            // Handle literal ] at start of class
238            if i < len && chars[i] == ']' {
239                result.push(chars[i]);
240                i += 1;
241            }
242            while i < len && chars[i] != ']' {
243                if chars[i] == '\\' && i + 1 < len {
244                    result.push(chars[i]);
245                    result.push(chars[i + 1]);
246                    i += 2;
247                } else {
248                    result.push(chars[i]);
249                    i += 1;
250                }
251            }
252            if i < len {
253                result.push(chars[i]); // the ']'
254                i += 1;
255            }
256            continue;
257        }
258
259        // Track group openings
260        if chars[i] == '(' {
261            let is_lookaround = if i + 2 < len && chars[i + 1] == '?' {
262                // (?= or (?!
263                chars[i + 2] == '=' || chars[i + 2] == '!'
264                // (?<= or (?<!
265                || (i + 3 < len
266                    && chars[i + 2] == '<'
267                    && (chars[i + 3] == '=' || chars[i + 3] == '!'))
268            } else {
269                false
270            };
271            group_stack.push(is_lookaround);
272            result.push(chars[i]);
273            i += 1;
274            continue;
275        }
276
277        // Track group closings
278        if chars[i] == ')' {
279            let is_lookaround = group_stack.pop().unwrap_or(false);
280            result.push(chars[i]);
281            i += 1;
282
283            // Normalization 2: strip quantifiers after lookaround close
284            if is_lookaround && i < len {
285                i = skip_quantifier(&chars, i);
286            }
287            continue;
288        }
289
290        result.push(chars[i]);
291        i += 1;
292    }
293
294    result
295}
296
297/// Advance past a quantifier (`+`, `*`, `?`, `{n,m}`) and optional lazy modifier.
298/// Returns the new index. If no quantifier is found, returns the input index unchanged.
299fn skip_quantifier(chars: &[char], mut i: usize) -> usize {
300    let len = chars.len();
301    if i >= len {
302        return i;
303    }
304    match chars[i] {
305        '+' | '*' | '?' => {
306            i += 1;
307            // Also skip lazy modifier ?
308            if i < len && chars[i] == '?' {
309                i += 1;
310            }
311        }
312        '{' => {
313            let start = i;
314            i += 1;
315            // Expect digits
316            if i >= len || !chars[i].is_ascii_digit() {
317                return start; // Not a valid quantifier
318            }
319            while i < len && chars[i].is_ascii_digit() {
320                i += 1;
321            }
322            if i < len && chars[i] == ',' {
323                i += 1;
324                while i < len && chars[i].is_ascii_digit() {
325                    i += 1;
326                }
327            }
328            if i < len && chars[i] == '}' {
329                i += 1;
330                // Also skip lazy modifier ?
331                if i < len && chars[i] == '?' {
332                    i += 1;
333                }
334            } else {
335                return start; // Not a valid quantifier
336            }
337        }
338        _ => {} // Not a quantifier, don't skip
339    }
340    i
341}
342
343#[cfg(test)]
344mod tests {
345    use super::*;
346
347    #[test]
348    fn test_parse_simple_value() {
349        let v = ValueDef::parse("Value Interface (\\S+)", 1).unwrap();
350        assert_eq!(v.name, "Interface");
351        assert_eq!(v.pattern, "(\\S+)");
352        assert!(v.options.is_empty());
353        assert_eq!(v.template_pattern, "(?P<Interface>\\S+)");
354    }
355
356    #[test]
357    fn test_parse_value_with_options() {
358        let v = ValueDef::parse("Value Required,Filldown Hostname (\\S+)", 1).unwrap();
359        assert_eq!(v.name, "Hostname");
360        assert!(v.has_option(ValueOption::Required));
361        assert!(v.has_option(ValueOption::Filldown));
362        assert!(!v.has_option(ValueOption::List));
363    }
364
365    #[test]
366    fn test_parse_value_with_spaces_in_regex() {
367        let v = ValueDef::parse("Value Status (up|down|administratively down)", 1).unwrap();
368        assert_eq!(v.name, "Status");
369        assert_eq!(v.pattern, "(up|down|administratively down)");
370    }
371
372    #[test]
373    fn test_invalid_regex() {
374        let result = ValueDef::parse("Value Bad ([invalid)", 1);
375        assert!(matches!(result, Err(TemplateError::InvalidRegex { .. })));
376    }
377
378    #[test]
379    fn test_missing_parens() {
380        let result = ValueDef::parse("Value Name \\S+", 1);
381        assert!(matches!(result, Err(TemplateError::InvalidValue { .. })));
382    }
383
384    #[test]
385    fn test_normalize_angle_brackets() {
386        // \< and \> should be converted to < and >
387        let v = ValueDef::parse(r"Value DateTime (\S+\s+\d+\s+\d+|\<no date\>)", 1).unwrap();
388        // The pattern should have literal < and > after normalization
389        assert!(v.pattern.contains("<no date>"));
390        assert!(!v.pattern.contains(r"\<"));
391    }
392
393    #[test]
394    fn test_normalize_pattern_angle_brackets() {
395        assert_eq!(normalize_pattern(r"^\s*\<\S+"), r"^\s*<\S+");
396        assert_eq!(normalize_pattern(r"\<omited\>"), "<omited>");
397        // Regular escapes should not be affected
398        assert_eq!(normalize_pattern(r"\s+\d+"), r"\s+\d+");
399        // Only \< and \> are affected, not < and > alone
400        assert_eq!(normalize_pattern("<already>"), "<already>");
401    }
402
403    #[test]
404    fn test_normalize_pattern_lookaround_quantifiers() {
405        // Lookbehind with +
406        assert_eq!(
407            normalize_pattern(r"(?<=[^()\s])+"),
408            r"(?<=[^()\s])"
409        );
410        // Lookahead with *
411        assert_eq!(normalize_pattern(r"(?=foo)*"), r"(?=foo)");
412        // Negative lookbehind with ?
413        assert_eq!(normalize_pattern(r"(?<!bar)?"), r"(?<!bar)");
414        // Negative lookahead with {2,3}
415        assert_eq!(normalize_pattern(r"(?!baz){2,3}"), r"(?!baz)");
416        // Lazy quantifier
417        assert_eq!(normalize_pattern(r"(?<=x)+?"), r"(?<=x)");
418    }
419
420    #[test]
421    fn test_normalize_pattern_preserves_normal_groups() {
422        // Regular groups should keep their quantifiers
423        assert_eq!(normalize_pattern(r"(foo)+"), r"(foo)+");
424        assert_eq!(normalize_pattern(r"(?:bar)*"), r"(?:bar)*");
425        assert_eq!(normalize_pattern(r"(?P<name>baz){2}"), r"(?P<name>baz){2}");
426    }
427
428    #[test]
429    fn test_normalize_pattern_combined() {
430        // Both normalizations in one pattern (like a real template rule)
431        let input = r"^\s+\<omited\s+output\>(?<=[^()\s])+";
432        let expected = r"^\s+<omited\s+output>(?<=[^()\s])";
433        assert_eq!(normalize_pattern(input), expected);
434    }
435
436    #[test]
437    fn test_normalize_pattern_char_class_with_parens() {
438        // Parens inside character classes are literal, not group delimiters
439        assert_eq!(
440            normalize_pattern(r"(?<=[^()\s])+(\s+foo)"),
441            r"(?<=[^()\s])(\s+foo)"
442        );
443    }
444}