tastty-driver 0.1.0

Terminal automation driver built on tastty
//! Compiled search patterns.

use std::sync::Arc;

use regex::Regex;

use super::SearchError;

/// Default upper bound, in bytes, on the compiled regex's NFA size.
///
/// A Unicode-aware character class like `\w` already compiles to
/// roughly 50 KB, so 256 KB leaves comfortable headroom for ordinary
/// patterns while still rejecting the kinds of nested-quantifier
/// blow-ups an attacker could craft to exhaust memory at compile
/// time. The cap is applied at [`SearchPattern::regex`] construction
/// time; raise it for a specific pattern via
/// [`SearchPattern::regex_with_limit`].
pub const DEFAULT_NFA_SIZE_LIMIT: usize = 256 * 1024;

/// A compiled search pattern.
///
/// Construct via [`SearchPattern::regex`] or [`SearchPattern::literal`].
/// Patterns are inexpensive to clone (the compiled regex state is
/// reference-counted internally) so a caller building several searches
/// over the same pattern can clone the value rather than recompile.
#[non_exhaustive]
#[derive(Clone, Debug)]
pub enum SearchPattern {
    /// A compiled regular-expression pattern. Construct via
    /// [`SearchPattern::regex`].
    Regex(RegexPattern),
    /// A literal substring pattern. The wrapped string is the verbatim
    /// needle as supplied to [`SearchPattern::literal`]; case folding
    /// is governed by [`SearchOptions::case_insensitive`](crate::SearchOptions::case_insensitive) at find
    /// time.
    Literal(String),
}

/// Compiled regex carrier for [`SearchPattern::Regex`]. Construct
/// via [`SearchPattern::regex`].
#[derive(Clone, Debug)]
pub struct RegexPattern {
    pub(super) re: Arc<Regex>,
    pub(super) src: Arc<str>,
}

impl SearchPattern {
    /// Compile a regular-expression search pattern.
    ///
    /// Multi-line semantics are enabled internally so `^` and `$`
    /// anchor to logical-line boundaries; a caller does not need to
    /// set `(?m)` themselves. Set `(?i)` to enable case-insensitive
    /// matching for regex patterns;
    /// [`SearchOptions::case_insensitive`](crate::SearchOptions::case_insensitive) has no effect on regex
    /// patterns and is honored only for literals.
    ///
    /// The compiled NFA is capped at
    /// [`DEFAULT_NFA_SIZE_LIMIT`](self::DEFAULT_NFA_SIZE_LIMIT)
    /// (256 KB) to bound compile-time memory under untrusted input.
    /// Construct via [`SearchPattern::regex_with_limit`] when a
    /// larger ceiling is genuinely required.
    ///
    /// # Errors
    ///
    /// Returns [`SearchError::InvalidRegex`] if the pattern fails to
    /// compile (including the regex crate's `CompiledTooBig` when the
    /// pattern exceeds the default NFA size cap), or
    /// [`SearchError::EmptyPattern`] if the pattern is empty. Empty
    /// patterns are rejected because they would match at every cell
    /// boundary, which is rarely useful and trivially distinguishable
    /// from "no match".
    pub fn regex(pattern: &str) -> Result<Self, SearchError> {
        Self::regex_with_limit(pattern, DEFAULT_NFA_SIZE_LIMIT)
    }

    /// Compile a regular-expression search pattern with an explicit
    /// NFA size ceiling.
    ///
    /// Identical to [`SearchPattern::regex`] but lets the caller set
    /// the [`regex::RegexBuilder::size_limit`] used at construction
    /// time. The default-constructor cap of 256 KB is the
    /// security-by-default value; use this when the caller has
    /// already vetted the pattern source and needs to compile a
    /// larger automaton (e.g. Unicode-class-heavy patterns over
    /// ~256 KB).
    ///
    /// # Errors
    ///
    /// Same surface as [`SearchPattern::regex`].
    pub fn regex_with_limit(pattern: &str, size_limit: usize) -> Result<Self, SearchError> {
        if pattern.is_empty() {
            return Err(SearchError::EmptyPattern);
        }
        let re = regex::RegexBuilder::new(pattern)
            .multi_line(true)
            .size_limit(size_limit)
            .build()
            .map_err(|err| SearchError::InvalidRegex {
                pattern: pattern.to_owned(),
                source: err.into(),
            })?;
        Ok(SearchPattern::Regex(RegexPattern {
            re: Arc::new(re),
            src: Arc::from(pattern),
        }))
    }

    /// Build a literal-substring search pattern.
    ///
    /// Literal patterns search for the exact byte sequence of the
    /// needle, with no regex metacharacters. Empty needles are
    /// permitted at construction time but rejected by
    /// [`Session::find`](crate::Session::find).
    #[must_use]
    pub fn literal(needle: impl Into<String>) -> Self {
        SearchPattern::Literal(needle.into())
    }
}