textfsm-core 0.3.1

Core parsing library for TextFSM template-based state machine
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
//! Value definition parsing and representation.

use fancy_regex::Regex;
use std::collections::HashSet;

use crate::error::TemplateError;
use crate::types::{ValueOption, ValueOptions};

/// A Value definition from the template header.
#[derive(Debug, Clone)]
pub struct ValueDef {
    /// Name of the value (used as column header).
    pub name: String,

    /// Original regex pattern from template.
    pub pattern: String,

    /// Options applied to this value.
    pub options: ValueOptions,

    /// Regex pattern transformed for named capture: `(...)` -> `(?P<name>...)`.
    pub(crate) template_pattern: String,

    /// Compiled regex for List values with nested groups.
    pub(crate) compiled_regex: Option<Regex>,
}

impl ValueDef {
    /// Maximum allowed length for a value name.
    pub const MAX_NAME_LEN: usize = 48;

    /// Parse a Value line: `Value [Options] Name (regex)`
    pub fn parse(line: &str, line_num: usize) -> Result<Self, TemplateError> {
        let trimmed = line.trim();

        if !trimmed.starts_with("Value ") {
            return Err(TemplateError::InvalidValue {
                line: line_num,
                message: "line must start with 'Value '".into(),
            });
        }

        // Remove "Value " prefix
        let rest = &trimmed[6..];

        // Find where the regex starts (first '(')
        let regex_start = rest.find('(').ok_or_else(|| TemplateError::InvalidValue {
            line: line_num,
            message: "regex pattern must be wrapped in parentheses".into(),
        })?;

        let before_regex = rest[..regex_start].trim();
        let pattern = rest[regex_start..].trim();

        // Parse the part before regex for options and name
        let mut parts = before_regex.split_whitespace();
        let first = parts.next();
        let second = parts.next();
        let third = parts.next();

        let (options, name) = match (first, second, third) {
            // missing name
            (None, _, _) => {
                return Err(TemplateError::InvalidValue {
                    line: line_num,
                    message: "missing value name".into(),
                });
            }
            // just name, no options
            (Some(name), None, _) => (HashSet::new(), name.to_string()),
            // names + options
            (Some(opts), Some(name), None) => {
                // Options contain commas or are valid option names
                if opts.contains(',') || ValueOption::parse(opts).is_some() {
                    let options = Self::parse_options(opts, line_num)?;
                    (options, name.to_string())
                } else {
                    // First part is not a valid option, error
                    return Err(TemplateError::InvalidValue {
                        line: line_num,
                        message: format!(
                            "invalid format - expected 'Value [Options] Name (regex)', got unknown token '{}'",
                            opts
                        ),
                    });
                }
            }
            // too many tokens
            (Some(_), Some(_), Some(_)) => {
                return Err(TemplateError::InvalidValue {
                    line: line_num,
                    message: "too many tokens before regex pattern".into(),
                });
            }
        };

        if name.len() > Self::MAX_NAME_LEN {
            return Err(TemplateError::InvalidValue {
                line: line_num,
                message: format!(
                    "name '{}' exceeds maximum length of {}",
                    name,
                    Self::MAX_NAME_LEN
                ),
            });
        }

        if !name.chars().all(|c| c.is_alphanumeric() || c == '_') {
            return Err(TemplateError::InvalidValue {
                line: line_num,
                message: format!("name '{}' contains invalid characters", name),
            });
        }

        if !pattern.starts_with('(') || !pattern.ends_with(')') {
            return Err(TemplateError::InvalidValue {
                line: line_num,
                message: "regex must be wrapped in parentheses".into(),
            });
        }

        if pattern.ends_with("\\)") {
            return Err(TemplateError::InvalidValue {
                line: line_num,
                message: "regex cannot end with escaped parenthesis".into(),
            });
        }

        // Normalize pattern for Python-to-Rust regex compatibility
        let pattern = normalize_pattern(pattern);

        Regex::new(&pattern).map_err(|e| TemplateError::InvalidRegex {
            pattern: pattern.to_string(),
            message: e.to_string(),
        })?;

        // Create the named capture group version: (pattern) -> (?P<name>pattern)
        let inner_pattern = &pattern[1..pattern.len() - 1];
        let template_pattern = format!("(?P<{}>{})", name, inner_pattern);

        // For List values with nested groups, compile the regex
        let compiled_regex = if options.contains(&ValueOption::List) {
            let re = Regex::new(&pattern).ok();
            // Only store if there are nested groups
            re.filter(|r| r.captures_len() > 1)
        } else {
            None
        };

        Ok(Self {
            name,
            pattern,
            options,
            template_pattern,
            compiled_regex,
        })
    }

    fn parse_options(opts_str: &str, _line_num: usize) -> Result<ValueOptions, TemplateError> {
        let mut options = HashSet::new();

        // Note that the python implementation strictly requires no spaces between
        // commas in the options. It must be "Required,Filldown" not "Required, Filldown"
        for opt_name in opts_str.split(',') {
            let opt_name = opt_name.trim();
            let opt = ValueOption::parse(opt_name)
                .ok_or_else(|| TemplateError::UnknownOption(opt_name.into()))?;

            if !options.insert(opt) {
                return Err(TemplateError::DuplicateOption(opt_name.into()));
            }
        }

        Ok(options)
    }

    /// Check if this value has a specific option.
    pub fn has_option(&self, opt: ValueOption) -> bool {
        self.options.contains(&opt)
    }

}

/// Normalize a regex pattern for Python-to-Rust compatibility.
///
/// Python's `re` module is more lenient than Rust's `fancy-regex` in two
/// specific ways that affect real-world TextFSM templates:
///
/// ## 1. Backslash angle brackets
///
/// Python treats `\<` and `\>` as literal `<` and `>` because they are not
/// recognized escape sequences. Rust's fancy-regex treats them as word
/// boundary assertions (GNU-style `\<` = start-of-word, `\>` = end-of-word).
///
/// ## 2. Quantifiers on lookaround assertions
///
/// Lookaround assertions (`(?<=...)`, `(?<!...)`, `(?=...)`, `(?!...)`) are
/// zero-width — they match a position, not characters. Quantifying a
/// zero-width match (`(?<=x)+`) is semantically meaningless.
/// Python silently ignores the quantifier. fancy-regex rejects it as invalid.
///
/// Handled quantifiers: `+`, `*`, `?`, `{n}`, `{n,}`, `{n,m}` (and lazy variants).
pub(crate) fn normalize_pattern(pattern: &str) -> String {
    let mut result = String::with_capacity(pattern.len());
    let chars: Vec<char> = pattern.chars().collect();
    let len = chars.len();
    let mut i = 0;

    // Stack to track group types. Each entry is true if the group is a lookaround.
    let mut group_stack: Vec<bool> = Vec::new();

    while i < len {
        // Handle escape sequences
        if chars[i] == '\\' && i + 1 < len {
            if chars[i + 1] == '<' || chars[i + 1] == '>' {
                // Normalization 1: \< and \> → literal < and >
                result.push(chars[i + 1]);
                i += 2;
                continue;
            }
            // Other escape: copy both chars verbatim
            result.push(chars[i]);
            result.push(chars[i + 1]);
            i += 2;
            continue;
        }

        // Skip character class contents (parens inside [...] are literal)
        if chars[i] == '[' {
            result.push(chars[i]);
            i += 1;
            // Handle negation
            if i < len && chars[i] == '^' {
                result.push(chars[i]);
                i += 1;
            }
            // Handle literal ] at start of class
            if i < len && chars[i] == ']' {
                result.push(chars[i]);
                i += 1;
            }
            while i < len && chars[i] != ']' {
                if chars[i] == '\\' && i + 1 < len {
                    result.push(chars[i]);
                    result.push(chars[i + 1]);
                    i += 2;
                } else {
                    result.push(chars[i]);
                    i += 1;
                }
            }
            if i < len {
                result.push(chars[i]); // the ']'
                i += 1;
            }
            continue;
        }

        // Track group openings
        if chars[i] == '(' {
            let is_lookaround = if i + 2 < len && chars[i + 1] == '?' {
                // (?= or (?!
                chars[i + 2] == '=' || chars[i + 2] == '!'
                // (?<= or (?<!
                || (i + 3 < len
                    && chars[i + 2] == '<'
                    && (chars[i + 3] == '=' || chars[i + 3] == '!'))
            } else {
                false
            };
            group_stack.push(is_lookaround);
            result.push(chars[i]);
            i += 1;
            continue;
        }

        // Track group closings
        if chars[i] == ')' {
            let is_lookaround = group_stack.pop().unwrap_or(false);
            result.push(chars[i]);
            i += 1;

            // Normalization 2: strip quantifiers after lookaround close
            if is_lookaround && i < len {
                i = skip_quantifier(&chars, i);
            }
            continue;
        }

        result.push(chars[i]);
        i += 1;
    }

    result
}

/// Advance past a quantifier (`+`, `*`, `?`, `{n,m}`) and optional lazy modifier.
/// Returns the new index. If no quantifier is found, returns the input index unchanged.
fn skip_quantifier(chars: &[char], mut i: usize) -> usize {
    let len = chars.len();
    if i >= len {
        return i;
    }
    match chars[i] {
        '+' | '*' | '?' => {
            i += 1;
            // Also skip lazy modifier ?
            if i < len && chars[i] == '?' {
                i += 1;
            }
        }
        '{' => {
            let start = i;
            i += 1;
            // Expect digits
            if i >= len || !chars[i].is_ascii_digit() {
                return start; // Not a valid quantifier
            }
            while i < len && chars[i].is_ascii_digit() {
                i += 1;
            }
            if i < len && chars[i] == ',' {
                i += 1;
                while i < len && chars[i].is_ascii_digit() {
                    i += 1;
                }
            }
            if i < len && chars[i] == '}' {
                i += 1;
                // Also skip lazy modifier ?
                if i < len && chars[i] == '?' {
                    i += 1;
                }
            } else {
                return start; // Not a valid quantifier
            }
        }
        _ => {} // Not a quantifier, don't skip
    }
    i
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_simple_value() {
        let v = ValueDef::parse("Value Interface (\\S+)", 1).unwrap();
        assert_eq!(v.name, "Interface");
        assert_eq!(v.pattern, "(\\S+)");
        assert!(v.options.is_empty());
        assert_eq!(v.template_pattern, "(?P<Interface>\\S+)");
    }

    #[test]
    fn test_parse_value_with_options() {
        let v = ValueDef::parse("Value Required,Filldown Hostname (\\S+)", 1).unwrap();
        assert_eq!(v.name, "Hostname");
        assert!(v.has_option(ValueOption::Required));
        assert!(v.has_option(ValueOption::Filldown));
        assert!(!v.has_option(ValueOption::List));
    }

    #[test]
    fn test_parse_value_with_spaces_in_regex() {
        let v = ValueDef::parse("Value Status (up|down|administratively down)", 1).unwrap();
        assert_eq!(v.name, "Status");
        assert_eq!(v.pattern, "(up|down|administratively down)");
    }

    #[test]
    fn test_invalid_regex() {
        let result = ValueDef::parse("Value Bad ([invalid)", 1);
        assert!(matches!(result, Err(TemplateError::InvalidRegex { .. })));
    }

    #[test]
    fn test_missing_parens() {
        let result = ValueDef::parse("Value Name \\S+", 1);
        assert!(matches!(result, Err(TemplateError::InvalidValue { .. })));
    }

    #[test]
    fn test_normalize_angle_brackets() {
        // \< and \> should be converted to < and >
        let v = ValueDef::parse(r"Value DateTime (\S+\s+\d+\s+\d+|\<no date\>)", 1).unwrap();
        // The pattern should have literal < and > after normalization
        assert!(v.pattern.contains("<no date>"));
        assert!(!v.pattern.contains(r"\<"));
    }

    #[test]
    fn test_normalize_pattern_angle_brackets() {
        assert_eq!(normalize_pattern(r"^\s*\<\S+"), r"^\s*<\S+");
        assert_eq!(normalize_pattern(r"\<omited\>"), "<omited>");
        // Regular escapes should not be affected
        assert_eq!(normalize_pattern(r"\s+\d+"), r"\s+\d+");
        // Only \< and \> are affected, not < and > alone
        assert_eq!(normalize_pattern("<already>"), "<already>");
    }

    #[test]
    fn test_normalize_pattern_lookaround_quantifiers() {
        // Lookbehind with +
        assert_eq!(
            normalize_pattern(r"(?<=[^()\s])+"),
            r"(?<=[^()\s])"
        );
        // Lookahead with *
        assert_eq!(normalize_pattern(r"(?=foo)*"), r"(?=foo)");
        // Negative lookbehind with ?
        assert_eq!(normalize_pattern(r"(?<!bar)?"), r"(?<!bar)");
        // Negative lookahead with {2,3}
        assert_eq!(normalize_pattern(r"(?!baz){2,3}"), r"(?!baz)");
        // Lazy quantifier
        assert_eq!(normalize_pattern(r"(?<=x)+?"), r"(?<=x)");
    }

    #[test]
    fn test_normalize_pattern_preserves_normal_groups() {
        // Regular groups should keep their quantifiers
        assert_eq!(normalize_pattern(r"(foo)+"), r"(foo)+");
        assert_eq!(normalize_pattern(r"(?:bar)*"), r"(?:bar)*");
        assert_eq!(normalize_pattern(r"(?P<name>baz){2}"), r"(?P<name>baz){2}");
    }

    #[test]
    fn test_normalize_pattern_combined() {
        // Both normalizations in one pattern (like a real template rule)
        let input = r"^\s+\<omited\s+output\>(?<=[^()\s])+";
        let expected = r"^\s+<omited\s+output>(?<=[^()\s])";
        assert_eq!(normalize_pattern(input), expected);
    }

    #[test]
    fn test_normalize_pattern_char_class_with_parens() {
        // Parens inside character classes are literal, not group delimiters
        assert_eq!(
            normalize_pattern(r"(?<=[^()\s])+(\s+foo)"),
            r"(?<=[^()\s])(\s+foo)"
        );
    }
}