rustik-highlight 0.1.0

Rustik code highlighter.
Documentation
//! End-pattern resolution for begin/end rules.
//!
//! TextMate end patterns can be static (a regex known at compile time) or
//! dynamic (numeric backrefs into the begin match resolved per occurrence).
//! Resolved dynamic end regexes are cached so that a multi-line construct does
//! not recompile its end regex on every line.

use std::collections::BTreeMap;
use std::sync::Arc;

use onig::Regex;

use super::pattern::RegexMatch;

/// Cache of regexes for resolved dynamic end patterns.
pub(super) type EndRegexCache = BTreeMap<String, Arc<Regex>>;

/// End-pattern definition stored on a compiled begin/end rule.
#[derive(Debug)]
pub(super) enum EndPattern {
    /// End regex compiled at grammar-compile time.
    Static {
        /// Original regex source.
        source: String,
        /// Compiled regex.
        regex: Regex,
    },
    /// End regex deferred until begin-match captures are known.
    Dynamic {
        /// Regex source containing numeric backrefs.
        source: String,
    },
}

/// End regex resolved against a specific begin match.
///
/// Static end patterns borrow their regex from the compiled grammar, while
/// dynamic patterns own a shared handle into the [`EndRegexCache`]. Both
/// variants expose the same [`Self::source`] and [`Self::regex`] accessors so
/// the tokenizer can treat them uniformly.
pub(super) enum EndRegex<'pat> {
    /// End regex compiled with the grammar.
    Static {
        /// Source string for tokenizer heuristics (e.g. escaped-quote skip).
        source: &'pat str,
        /// Compiled regex.
        regex: &'pat Regex,
    },
    /// End regex resolved from begin captures and shared via the cache.
    Dynamic {
        /// Resolved source string (after backref substitution).
        source: String,
        /// Compiled regex held jointly with the cache.
        regex: Arc<Regex>,
    },
}

impl EndPattern {
    /// Compiles an end pattern, deferring numeric-backref patterns until match time.
    pub(super) fn compile(source: &str) -> Option<Self> {
        if has_numeric_backrefs(source) {
            return Some(Self::Dynamic {
                source: source.to_owned(),
            });
        }
        let regex = Regex::new(source).ok()?;

        Some(Self::Static {
            source: source.to_owned(),
            regex,
        })
    }

    /// Resolves this end pattern when its begin/end rule first opens.
    pub(super) fn resolve_for_begin<'pat>(
        &'pat self,
        begin: &RegexMatch,
        line: &str,
        cache: &mut EndRegexCache,
    ) -> Option<EndRegex<'pat>> {
        match self {
            Self::Static { source, regex } => Some(EndRegex::Static { source, regex }),
            Self::Dynamic { source } => {
                let resolved = begin.expand_backrefs(source, line);
                let regex = lookup_or_compile(cache, &resolved)?;

                Some(EndRegex::Dynamic {
                    source: resolved,
                    regex,
                })
            }
        }
    }

    /// Resumes this end pattern using a dynamic source previously stored in line state.
    pub(super) fn resume<'pat>(
        &'pat self,
        dynamic_source: Option<&str>,
        cache: &mut EndRegexCache,
    ) -> Option<EndRegex<'pat>> {
        match self {
            Self::Static { source, regex } => Some(EndRegex::Static { source, regex }),
            Self::Dynamic { .. } => {
                let source = dynamic_source?;
                let regex = lookup_or_compile(cache, source)?;

                Some(EndRegex::Dynamic {
                    source: source.to_owned(),
                    regex,
                })
            }
        }
    }

    /// Returns true when this end pattern depends on begin-match captures.
    pub(super) fn is_dynamic(&self) -> bool {
        matches!(self, Self::Dynamic { .. })
    }
}

impl EndRegex<'_> {
    /// End-regex source string.
    pub(super) fn source(&self) -> &str {
        match self {
            Self::Static { source, .. } => source,
            Self::Dynamic { source, .. } => source.as_str(),
        }
    }

    /// Compiled regex used for searching.
    pub(super) fn regex(&self) -> &Regex {
        match self {
            Self::Static { regex, .. } => regex,
            Self::Dynamic { regex, .. } => regex,
        }
    }

    /// Returns the resolved dynamic source that should be stored in line state.
    pub(super) fn dynamic_source(&self) -> Option<String> {
        match self {
            Self::Static { .. } => None,
            Self::Dynamic { source, .. } => Some(source.clone()),
        }
    }
}

/// Returns the cached regex for `source`, compiling and inserting it on first use.
fn lookup_or_compile(cache: &mut EndRegexCache, source: &str) -> Option<Arc<Regex>> {
    if let Some(regex) = cache.get(source) {
        return Some(Arc::clone(regex));
    }
    let regex = Arc::new(Regex::new(source).ok()?);
    cache.insert(source.to_owned(), Arc::clone(&regex));
    Some(regex)
}

/// Returns true when `source` contains an unescaped numeric backreference.
fn has_numeric_backrefs(source: &str) -> bool {
    let mut backslashes = 0_usize;
    for byte in source.bytes() {
        if byte == b'\\' {
            backslashes += 1;
        } else {
            if byte.is_ascii_digit() && backslashes % 2 == 1 {
                return true;
            }
            backslashes = 0;
        }
    }
    false
}