textfsm-core 0.3.1

Core parsing library for TextFSM template-based state machine
Documentation
//! Rule and State definitions.

use fancy_regex::Regex;
use std::collections::HashMap;
use std::sync::LazyLock;

use crate::error::TemplateError;
use crate::types::{LineOp, RecordOp, Transition};

use super::value::normalize_pattern;

// Splits a rule line into match pattern and action by finding the last `\s->` delimiter.
// Note that this pattern enforces a space before the arrow. I suspect this is because
// the arrow could be part of the regex. A space is used as a delimmter.
static MATCH_ACTION: LazyLock<fancy_regex::Regex> =
    LazyLock::new(|| fancy_regex::Regex::new(r"(?P<match>.*)(\s->(?P<action>.*))").unwrap());

/// Matches LineOp[.RecordOp] [NewState]
static ACTION_RE: LazyLock<fancy_regex::Regex> = LazyLock::new(|| {
    fancy_regex::Regex::new(
          r#"\s+(?P<ln_op>Continue|Next|Error)(\.(?P<rec_op>Clear|Clearall|Record|NoRecord))?(\s+(?P<new_state>\w+|".*"))?$"#
      ).unwrap()
});

/// Matches RecordOp [NewState]
static ACTION2_RE: LazyLock<fancy_regex::Regex> = LazyLock::new(|| {
    fancy_regex::Regex::new(
        r#"\s+(?P<rec_op>Clear|Clearall|Record|NoRecord)(\s+(?P<new_state>\w+|".*"))?$"#,
    )
    .unwrap()
});

/// Matches optional [NewState] only
static ACTION3_RE: LazyLock<fancy_regex::Regex> =
    LazyLock::new(|| fancy_regex::Regex::new(r#"(\s+(?P<new_state>\w+|".*"))?$"#).unwrap());

/// A rule within a state.
#[derive(Debug, Clone)]
pub struct Rule {
    /// Original match pattern from template (before variable substitution).
    pub match_pattern: String,

    /// Regex pattern after ${var} substitution.
    pub regex_pattern: String,

    /// Compiled regex for matching.
    pub(crate) regex: Regex,

    /// Line operator.
    pub line_op: LineOp,

    /// Record operator.
    pub record_op: RecordOp,

    /// State transition.
    pub transition: Transition,

    /// Line number in template (for error reporting).
    pub line_num: usize,
}

/// Reserved words that cannot be state names.
const RESERVED_LINE_OPS: &[&str] = &["Continue", "Next", "Error"];
const RESERVED_RECORD_OPS: &[&str] = &["Clear", "Clearall", "Record", "NoRecord"];

impl Rule {
    /// Parse a rule line: `  ^pattern -> LineOp.RecordOp NewState`
    pub fn parse(
        line: &str,
        line_num: usize,
        value_templates: &HashMap<String, String>,
    ) -> Result<Self, TemplateError> {
        // Google's implementation enforces spacing with 1 or 2 white spaces or a tab
        if !line.starts_with(" ^") && !line.starts_with("  ^") && !line.starts_with("\t^") {
            return Err(TemplateError::InvalidValue {
                line: line_num,
                message: "Rule must be indented with 1 or spaces or a tab and start with '^'"
                    .into(),
            });
        }
        let trimmed = line.trim();

        let (match_pattern, action_str) = match MATCH_ACTION.captures(trimmed) {
            Ok(Some(caps)) => {
                // If the regex matches, match and action are guaranteed to exist,
                // so okay to call unwrap()
                let pattern = caps.name("match").unwrap().as_str();
                let action = caps.name("action").unwrap().as_str();
                (pattern, Some(action))
            }
            _ => (trimmed, None),
        };

        // Substitute ${var} with named capture patterns
        let regex_pattern = Self::substitute_variables(match_pattern, value_templates, line_num)?;

        // Normalize regex for Python-to-Rust compatibility.
        //
        // Two normalizations are applied:
        //
        // 1. `\<` and `\>` → literal `<` and `>`. Python's `re` treats these as
        //    literal characters (unrecognized escapes). fancy-regex treats them as
        //    word boundary assertions, which silently changes matching behavior.
        //
        // 2. Quantifiers on lookaround groups (e.g. `(?<=x)+`) are stripped.
        //    Python ignores them silently; fancy-regex rejects them as invalid.
        let regex_pattern = normalize_pattern(&regex_pattern);

        let regex = Regex::new(&regex_pattern).map_err(|e| TemplateError::InvalidRegex {
            pattern: regex_pattern.clone(),
            message: e.to_string(),
        })?;

        // Parse action if present
        let (line_op, record_op, transition) = if let Some(action) = action_str {
            Self::parse_action(action, line_num)?
        } else {
            (
                LineOp::default(),
                RecordOp::default(),
                Transition::default(),
            )
        };

        // Continue cannot have state transition
        if line_op == LineOp::Continue && !matches!(transition, Transition::Stay) {
            return Err(TemplateError::ContinueWithTransition(line_num));
        }

        Ok(Self {
            match_pattern: match_pattern.to_string(),
            regex_pattern,
            regex,
            line_op,
            record_op,
            transition,
            line_num,
        })
    }

    // Python also supports bare $VarName via string.Template, but all
    // real-world templates use ${VarName}. We only support the braced form.
    // Bare $ is ambiguos in practice becuase in regex $ means end-of-line
    fn substitute_variables(
        pattern: &str,
        templates: &HashMap<String, String>,
        line_num: usize,
    ) -> Result<String, TemplateError> {
        let mut result = String::with_capacity(pattern.len());
        let mut rest = pattern;

        while let Some(start) = rest.find("${") {
            result.push_str(&rest[..start]);

            let after_dollar = &rest[start + 2..];
            let end = after_dollar
                .find('}')
                .ok_or_else(|| TemplateError::InvalidRule {
                    line: line_num,
                    message: "unclosed variable substitution".into(),
                })?;
            let var_name = &after_dollar[..end];
            let template =
                templates
                    .get(var_name)
                    .ok_or_else(|| TemplateError::InvalidSubstitution {
                        line: line_num,
                        message: format!("unknown variable '{}'", var_name),
                    })?;

            result.push_str(template);
            rest = &after_dollar[end + 1..];
        }

        result.push_str(rest);
        Ok(result)
    }

    fn parse_action(
        action: &str,
        line_num: usize,
    ) -> Result<(LineOp, RecordOp, Transition), TemplateError> {
        if action.is_empty() {
            return Ok((
                LineOp::default(),
                RecordOp::default(),
                Transition::default(),
            ));
        }

        // Try ACTION_RE: LineOp[.RecordOp] [NewState]
        // Then ACTION2_RE: RecordOp [NewState]
        // Then ACTION3_RE: [NewState]
        let caps = ACTION_RE
            .captures(action)
            .ok()
            .flatten()
            .or_else(|| ACTION2_RE.captures(action).ok().flatten())
            .or_else(|| ACTION3_RE.captures(action).ok().flatten())
            .ok_or_else(|| TemplateError::InvalidRule {
                line: line_num,
                message: format!("badly formatted action '{}'", action),
            })?;

        let line_op = match caps.name("ln_op").map(|m| m.as_str()) {
            Some(s) => Self::parse_line_op(s, line_num)?,
            None => LineOp::default(),
        };

        let record_op = match caps.name("rec_op").map(|m| m.as_str()) {
            Some(s) => Self::parse_record_op(s, line_num)?,
            None => RecordOp::default(),
        };

        let transition = match caps.name("new_state").map(|m| m.as_str()) {
            Some(s) => Self::parse_transition(s),
            None => Transition::default(),
        };

        Ok((line_op, record_op, transition))
    }

    fn try_parse_line_op(s: &str) -> Option<LineOp> {
        match s {
            "Next" => Some(LineOp::Next),
            "Continue" => Some(LineOp::Continue),
            "Error" => Some(LineOp::Error),
            _ => None,
        }
    }

    fn parse_line_op(s: &str, line_num: usize) -> Result<LineOp, TemplateError> {
        Self::try_parse_line_op(s).ok_or_else(|| TemplateError::InvalidRule {
            line: line_num,
            message: format!("invalid line operator '{}'", s),
        })
    }

    fn try_parse_record_op(s: &str) -> Option<RecordOp> {
        match s {
            "NoRecord" => Some(RecordOp::NoRecord),
            "Record" => Some(RecordOp::Record),
            "Clear" => Some(RecordOp::Clear),
            "Clearall" => Some(RecordOp::ClearAll),
            _ => None,
        }
    }

    fn parse_record_op(s: &str, line_num: usize) -> Result<RecordOp, TemplateError> {
        Self::try_parse_record_op(s).ok_or_else(|| TemplateError::InvalidRule {
            line: line_num,
            message: format!("invalid record operator '{}'", s),
        })
    }

    fn parse_transition(s: &str) -> Transition {
        match s {
            "End" => Transition::End,
            "EOF" => Transition::Eof,
            _ => {
                // Handle quoted error messages
                if s.starts_with('"') && s.ends_with('"') {
                    Transition::State(s[1..s.len() - 1].to_string())
                } else {
                    Transition::State(s.to_string())
                }
            }
        }
    }
}

/// A state containing rules.
#[derive(Debug, Clone)]
pub struct State {
    /// Name of this state.
    pub name: String,

    /// Rules in this state, checked in order.
    pub rules: Vec<Rule>,
}

impl State {
    /// Create a new empty state.
    pub fn new(name: String) -> Self {
        Self {
            name,
            rules: Vec::new(),
        }
    }

    /// Check if a name is valid for a state.
    pub fn is_valid_name(name: &str) -> bool {
        if name.is_empty() || name.len() > 48 {
            return false;
        }

        // Must be alphanumeric/underscore
        if !name.chars().all(|c| c.is_alphanumeric() || c == '_') {
            return false;
        }

        // Cannot be a reserved word
        if RESERVED_LINE_OPS.contains(&name) || RESERVED_RECORD_OPS.contains(&name) {
            return false;
        }

        true
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn empty_templates() -> HashMap<String, String> {
        HashMap::new()
    }

    fn sample_templates() -> HashMap<String, String> {
        let mut m = HashMap::new();
        m.insert("Interface".into(), "(?P<Interface>\\S+)".into());
        m.insert("Status".into(), "(?P<Status>up|down)".into());
        m
    }

    #[test]
    fn test_parse_simple_rule() {
        let r = Rule::parse(" ^Interface: (\\S+)", 1, &empty_templates()).unwrap();
        assert_eq!(r.match_pattern, "^Interface: (\\S+)");
        assert_eq!(r.line_op, LineOp::Next);
        assert_eq!(r.record_op, RecordOp::NoRecord);
        assert!(matches!(r.transition, Transition::Stay));
    }

    #[test]
    fn test_parse_rule_with_record() {
        let r = Rule::parse(" ^End -> Record", 1, &empty_templates()).unwrap();
        assert_eq!(r.line_op, LineOp::Next);
        assert_eq!(r.record_op, RecordOp::Record);
    }

    #[test]
    fn test_parse_rule_with_compound_action() {
        let r = Rule::parse(" ^Line -> Next.Record", 1, &empty_templates()).unwrap();
        assert_eq!(r.line_op, LineOp::Next);
        assert_eq!(r.record_op, RecordOp::Record);
    }

    #[test]
    fn test_parse_rule_with_state_transition() {
        let r = Rule::parse(" ^Start -> Continue.Record NextState", 1, &empty_templates());
        // Continue with state transition should fail
        assert!(matches!(r, Err(TemplateError::ContinueWithTransition(_))));
    }

    #[test]
    fn test_parse_rule_with_variable_substitution() {
        let templates = sample_templates();
        let r = Rule::parse(" ^Interface: ${Interface} is ${Status}", 1, &templates).unwrap();
        assert!(r.regex_pattern.contains("(?P<Interface>"));
        assert!(r.regex_pattern.contains("(?P<Status>"));
    }

    #[test]
    fn test_state_valid_names() {
        assert!(State::is_valid_name("Start"));
        assert!(State::is_valid_name("State1"));
        assert!(State::is_valid_name("my_state"));
        assert!(!State::is_valid_name("Continue")); // reserved
        assert!(!State::is_valid_name("Record")); // reserved
        assert!(!State::is_valid_name("")); // empty
    }
}