textfsm-core 0.3.1

//! Value definition parsing and representation.

use fancy_regex::Regex;
use std::collections::HashSet;

use crate::error::TemplateError;
use crate::types::{ValueOption, ValueOptions};

/// A Value definition from the template header.
#[derive(Debug, Clone)]
pub struct ValueDef {
    /// Name of the value (used as column header).
    pub name: String,

    /// Original regex pattern from template.
    pub pattern: String,

    /// Options applied to this value.
    pub options: ValueOptions,

    /// Regex pattern transformed for named capture: `(...)` -> `(?P<name>...)`.
    pub(crate) template_pattern: String,

    /// Compiled regex for List values with nested groups.
    pub(crate) compiled_regex: Option<Regex>,
}

impl ValueDef {
    /// Maximum allowed length for a value name.
    pub const MAX_NAME_LEN: usize = 48;

    /// Parse a Value line: `Value [Options] Name (regex)`
    pub fn parse(line: &str, line_num: usize) -> Result<Self, TemplateError> {
        let trimmed = line.trim();

        if !trimmed.starts_with("Value ") {
            return Err(TemplateError::InvalidValue {
                line: line_num,
                message: "line must start with 'Value '".into(),
            });
        }

        // Remove "Value " prefix
        let rest = &trimmed[6..];

        // Find where the regex starts (first '(')
        let regex_start = rest.find('(').ok_or_else(|| TemplateError::InvalidValue {
            line: line_num,
            message: "regex pattern must be wrapped in parentheses".into(),
        })?;

        let before_regex = rest[..regex_start].trim();
        let pattern = rest[regex_start..].trim();

        // Parse the part before regex for options and name
        let mut parts = before_regex.split_whitespace();
        let first = parts.next();
        let second = parts.next();
        let third = parts.next();

        let (options, name) = match (first, second, third) {
            // missing name
            (None, _, _) => {
                return Err(TemplateError::InvalidValue {
                    line: line_num,
                    message: "missing value name".into(),
                });
            }
            // just name, no options
            (Some(name), None, _) => (HashSet::new(), name.to_string()),
            // names + options
            (Some(opts), Some(name), None) => {
                // Options contain commas or are valid option names
                if opts.contains(',') || ValueOption::parse(opts).is_some() {
                    let options = Self::parse_options(opts, line_num)?;
                    (options, name.to_string())
                } else {
                    // First part is not a valid option, error
                    return Err(TemplateError::InvalidValue {
                        line: line_num,
                        message: format!(
                            "invalid format - expected 'Value [Options] Name (regex)', got unknown token '{}'",
                            opts
                        ),
                    });
                }
            }
            // too many tokens
            (Some(_), Some(_), Some(_)) => {
                return Err(TemplateError::InvalidValue {
                    line: line_num,
                    message: "too many tokens before regex pattern".into(),
                });
            }
        };

        if name.len() > Self::MAX_NAME_LEN {
            return Err(TemplateError::InvalidValue {
                line: line_num,
                message: format!(
                    "name '{}' exceeds maximum length of {}",
                    name,
                    Self::MAX_NAME_LEN
                ),
            });
        }

        if !name.chars().all(|c| c.is_alphanumeric() || c == '_') {
            return Err(TemplateError::InvalidValue {
                line: line_num,
                message: format!("name '{}' contains invalid characters", name),
            });
        }

        if !pattern.starts_with('(') || !pattern.ends_with(')') {
            return Err(TemplateError::InvalidValue {
                line: line_num,
                message: "regex must be wrapped in parentheses".into(),
            });
        }

        if pattern.ends_with("\\)") {
            return Err(TemplateError::InvalidValue {
                line: line_num,
                message: "regex cannot end with escaped parenthesis".into(),
            });
        }

        // Normalize pattern for Python-to-Rust regex compatibility
        let pattern = normalize_pattern(pattern);

        Regex::new(&pattern).map_err(|e| TemplateError::InvalidRegex {
            pattern: pattern.to_string(),
            message: e.to_string(),
        })?;

        // Create the named capture group version: (pattern) -> (?P<name>pattern)
        let inner_pattern = &pattern[1..pattern.len() - 1];
        let template_pattern = format!("(?P<{}>{})", name, inner_pattern);

        // For List values with nested groups, compile the regex
        let compiled_regex = if options.contains(&ValueOption::List) {
            let re = Regex::new(&pattern).ok();
            // Only store if there are nested groups
            re.filter(|r| r.captures_len() > 1)
        } else {
            None
        };

        Ok(Self {
            name,
            pattern,
            options,
            template_pattern,
            compiled_regex,
        })
    }

    fn parse_options(opts_str: &str, _line_num: usize) -> Result<ValueOptions, TemplateError> {
        let mut options = HashSet::new();

        // Note that the python implementation strictly requires no spaces between
        // commas in the options. It must be "Required,Filldown" not "Required, Filldown"
        for opt_name in opts_str.split(',') {
            let opt_name = opt_name.trim();
            let opt = ValueOption::parse(opt_name)
                .ok_or_else(|| TemplateError::UnknownOption(opt_name.into()))?;

            if !options.insert(opt) {
                return Err(TemplateError::DuplicateOption(opt_name.into()));
            }
        }

        Ok(options)
    }

    /// Check if this value has a specific option.
    pub fn has_option(&self, opt: ValueOption) -> bool {
        self.options.contains(&opt)
    }

}

/// Normalize a regex pattern for Python-to-Rust compatibility.
///
/// Python's `re` module is more lenient than Rust's `fancy-regex` in two
/// specific ways that affect real-world TextFSM templates:
///
/// ## 1. Backslash angle brackets
///
/// Python treats `\<` and `\>` as literal `<` and `>` because they are not
/// recognized escape sequences. Rust's fancy-regex treats them as word
/// boundary assertions (GNU-style `\<` = start-of-word, `\>` = end-of-word).
///
/// ## 2. Quantifiers on lookaround assertions
///
/// Lookaround assertions (`(?<=...)`, `(?<!...)`, `(?=...)`, `(?!...)`) are
/// zero-width — they match a position, not characters. Quantifying a
/// zero-width match (`(?<=x)+`) is semantically meaningless.
/// Python silently ignores the quantifier. fancy-regex rejects it as invalid.
///
/// Handled quantifiers: `+`, `*`, `?`, `{n}`, `{n,}`, `{n,m}` (and lazy variants).
pub(crate) fn normalize_pattern(pattern: &str) -> String {
    let mut result = String::with_capacity(pattern.len());
    let chars: Vec<char> = pattern.chars().collect();
    let len = chars.len();
    let mut i = 0;

    // Stack to track group types. Each entry is true if the group is a lookaround.
    let mut group_stack: Vec<bool> = Vec::new();

    while i < len {
        // Handle escape sequences
        if chars[i] == '\\' && i + 1 < len {
            if chars[i + 1] == '<' || chars[i + 1] == '>' {
                // Normalization 1: \< and \> → literal < and >
                result.push(chars[i + 1]);
                i += 2;
                continue;
            }
            // Other escape: copy both chars verbatim
            result.push(chars[i]);
            result.push(chars[i + 1]);
            i += 2;
            continue;
        }

        // Skip character class contents (parens inside [...] are literal)
        if chars[i] == '[' {
            result.push(chars[i]);
            i += 1;
            // Handle negation
            if i < len && chars[i] == '^' {
                result.push(chars[i]);
                i += 1;
            }
            // Handle literal ] at start of class
            if i < len && chars[i] == ']' {
                result.push(chars[i]);
                i += 1;
            }
            while i < len && chars[i] != ']' {
                if chars[i] == '\\' && i + 1 < len {
                    result.push(chars[i]);
                    result.push(chars[i + 1]);
                    i += 2;
                } else {
                    result.push(chars[i]);
                    i += 1;
                }
            }
            if i < len {
                result.push(chars[i]); // the ']'
                i += 1;
            }
            continue;
        }

        // Track group openings
        if chars[i] == '(' {
            let is_lookaround = if i + 2 < len && chars[i + 1] == '?' {
                // (?= or (?!
                chars[i + 2] == '=' || chars[i + 2] == '!'
                // (?<= or (?<!
                || (i + 3 < len
                    && chars[i + 2] == '<'
                    && (chars[i + 3] == '=' || chars[i + 3] == '!'))
            } else {
                false
            };
            group_stack.push(is_lookaround);
            result.push(chars[i]);
            i += 1;
            continue;
        }

        // Track group closings
        if chars[i] == ')' {
            let is_lookaround = group_stack.pop().unwrap_or(false);
            result.push(chars[i]);
            i += 1;

            // Normalization 2: strip quantifiers after lookaround close
            if is_lookaround && i < len {
                i = skip_quantifier(&chars, i);
            }
            continue;
        }

        result.push(chars[i]);
        i += 1;
    }

    result
}

/// Advance past a quantifier (`+`, `*`, `?`, `{n,m}`) and optional lazy modifier.
/// Returns the new index. If no quantifier is found, returns the input index unchanged.
fn skip_quantifier(chars: &[char], mut i: usize) -> usize {
    let len = chars.len();
    if i >= len {
        return i;
    }
    match chars[i] {
        '+' | '*' | '?' => {
            i += 1;
            // Also skip lazy modifier ?
            if i < len && chars[i] == '?' {
                i += 1;
            }
        }
        '{' => {
            let start = i;
            i += 1;
            // Expect digits
            if i >= len || !chars[i].is_ascii_digit() {
                return start; // Not a valid quantifier
            }
            while i < len && chars[i].is_ascii_digit() {
                i += 1;
            }
            if i < len && chars[i] == ',' {
                i += 1;
                while i < len && chars[i].is_ascii_digit() {
                    i += 1;
                }
            }
            if i < len && chars[i] == '}' {
                i += 1;
                // Also skip lazy modifier ?
                if i < len && chars[i] == '?' {
                    i += 1;
                }
            } else {
                return start; // Not a valid quantifier
            }
        }
        _ => {} // Not a quantifier, don't skip
    }
    i
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_simple_value() {
        let v = ValueDef::parse("Value Interface (\\S+)", 1).unwrap();
        assert_eq!(v.name, "Interface");
        assert_eq!(v.pattern, "(\\S+)");
        assert!(v.options.is_empty());
        assert_eq!(v.template_pattern, "(?P<Interface>\\S+)");
    }

    #[test]
    fn test_parse_value_with_options() {
        let v = ValueDef::parse("Value Required,Filldown Hostname (\\S+)", 1).unwrap();
        assert_eq!(v.name, "Hostname");
        assert!(v.has_option(ValueOption::Required));
        assert!(v.has_option(ValueOption::Filldown));
        assert!(!v.has_option(ValueOption::List));
    }

    #[test]
    fn test_parse_value_with_spaces_in_regex() {
        let v = ValueDef::parse("Value Status (up|down|administratively down)", 1).unwrap();
        assert_eq!(v.name, "Status");
        assert_eq!(v.pattern, "(up|down|administratively down)");
    }

    #[test]
    fn test_invalid_regex() {
        let result = ValueDef::parse("Value Bad ([invalid)", 1);
        assert!(matches!(result, Err(TemplateError::InvalidRegex { .. })));
    }

    #[test]
    fn test_missing_parens() {
        let result = ValueDef::parse("Value Name \\S+", 1);
        assert!(matches!(result, Err(TemplateError::InvalidValue { .. })));
    }

    #[test]
    fn test_normalize_angle_brackets() {
        // \< and \> should be converted to < and >
        let v = ValueDef::parse(r"Value DateTime (\S+\s+\d+\s+\d+|\<no date\>)", 1).unwrap();
        // The pattern should have literal < and > after normalization
        assert!(v.pattern.contains("<no date>"));
        assert!(!v.pattern.contains(r"\<"));
    }

    #[test]
    fn test_normalize_pattern_angle_brackets() {
        assert_eq!(normalize_pattern(r"^\s*\<\S+"), r"^\s*<\S+");
        assert_eq!(normalize_pattern(r"\<omited\>"), "<omited>");
        // Regular escapes should not be affected
        assert_eq!(normalize_pattern(r"\s+\d+"), r"\s+\d+");
        // Only \< and \> are affected, not < and > alone
        assert_eq!(normalize_pattern("<already>"), "<already>");
    }

    #[test]
    fn test_normalize_pattern_lookaround_quantifiers() {
        // Lookbehind with +
        assert_eq!(
            normalize_pattern(r"(?<=[^()\s])+"),
            r"(?<=[^()\s])"
        );
        // Lookahead with *
        assert_eq!(normalize_pattern(r"(?=foo)*"), r"(?=foo)");
        // Negative lookbehind with ?
        assert_eq!(normalize_pattern(r"(?<!bar)?"), r"(?<!bar)");
        // Negative lookahead with {2,3}
        assert_eq!(normalize_pattern(r"(?!baz){2,3}"), r"(?!baz)");
        // Lazy quantifier
        assert_eq!(normalize_pattern(r"(?<=x)+?"), r"(?<=x)");
    }

    #[test]
    fn test_normalize_pattern_preserves_normal_groups() {
        // Regular groups should keep their quantifiers
        assert_eq!(normalize_pattern(r"(foo)+"), r"(foo)+");
        assert_eq!(normalize_pattern(r"(?:bar)*"), r"(?:bar)*");
        assert_eq!(normalize_pattern(r"(?P<name>baz){2}"), r"(?P<name>baz){2}");
    }

    #[test]
    fn test_normalize_pattern_combined() {
        // Both normalizations in one pattern (like a real template rule)
        let input = r"^\s+\<omited\s+output\>(?<=[^()\s])+";
        let expected = r"^\s+<omited\s+output>(?<=[^()\s])";
        assert_eq!(normalize_pattern(input), expected);
    }

    #[test]
    fn test_normalize_pattern_char_class_with_parens() {
        // Parens inside character classes are literal, not group delimiters
        assert_eq!(
            normalize_pattern(r"(?<=[^()\s])+(\s+foo)"),
            r"(?<=[^()\s])(\s+foo)"
        );
    }
}