tyrx 0.1.2

Typed, ergonomic regular expression library
Documentation
#![doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/README.md"))]
#![doc(html_logo_url = "https://narnium.com/tyrx_icons/tyrx_icon-512.png")]

use std::iter::FusedIterator;
use std::hash::{DefaultHasher, BuildHasherDefault};
use std::fmt::{self, Formatter};
use std::collections::HashMap;
use std::sync::Mutex;
use std::any::{type_name, TypeId};

use thiserror::Error;

use crate::util::PatternDisplay;

pub use regex::{Regex, Match, Captures, CaptureMatches};
pub use crate::util::{Spanned, MatchFromStr, ErasedLifetime};
pub use crate::error::{Error, Result};

// re-export derive macros
#[cfg(feature = "derive")]
pub use tyrx_macros::*;


pub mod builder;
pub mod util;
#[doc(hidden)]
pub mod impls;
pub mod error;


/// The main entry point. This trait is automatically implemented by
/// types that implement [`RegexPattern`] + [`FromMatch`] + [`ErasedLifetime`],
/// each of which can be `#[derive]`d.
pub trait TyRx<'h>: Sized + RegexPattern + FromMatch<'h> + ErasedLifetime {
    /// Given a match (and a set of captures), constructs an instance of `Self`.
    fn from_captures(captures: &Captures<'h>) -> Result<Self, Error> {
        Self::from_match("$root", captures.get_match(), captures)
    }

    /// Extracts the first occurrence of `Self` from the given string.
    fn from_str(haystack: &'h str) -> Result<Self, Error> {
        let regex = build_regex::<Self>();
        let captures = regex.captures(haystack).ok_or(Error::NoMatch)?;
        Self::from_captures(&captures)
    }

    /// Extracts all non-overlapping occurrences of `Self` from the given string.
    fn iter_from_str(haystack: &'h str) -> IterFromStr<'h, Self> {
        let regex = build_regex::<Self>();
        let iter = regex.captures_iter(haystack);
        let phantom = std::marker::PhantomData;

        IterFromStr { iter, phantom }
    }
}

impl<'h, T> TyRx<'h> for T
where
    T: RegexPattern + FromMatch<'h> + ErasedLifetime
{
}

/// Represents a type that corresponds to a regular expression pattern.
///
/// This trait can be `#[derive]`d on struct and enum types.
pub trait RegexPattern {
    /// Write the regex pattern into the given formatter.
    fn fmt_pattern(f: &mut Formatter<'_>) -> fmt::Result;

    /// Purely for convenience.
    fn pattern_display() -> PatternDisplay<Self> {
        PatternDisplay::default()
    }
}

/// Denotes a type which can be parsed from a matched substring.
///
/// This trait can be `#[derive]`d on struct and enum types.
pub trait FromMatch<'h>: Sized {
    fn from_match(name: &'static str, m: Match<'h>, captures: &Captures<'h>) -> Result<Self, Error>;
}

/// It is OK to use `DefaultHasher` without any random state, because the
/// keys are not controlled by an attacker, as we always insert type IDs.
/// Using the default non-random hasher is _necessary_ for `const`-ness.
static RX_CACHE: Mutex<HashMap<TypeId, &'static Regex, BuildHasherDefault<DefaultHasher>>> = Mutex::new(
    HashMap::with_hasher(BuildHasherDefault::new())
);

/// Compiles and globally caches a regex object for the pattern of the specified type.
pub fn build_regex<T>() -> &'static Regex
where
    T: RegexPattern + ErasedLifetime
{
    RX_CACHE
        .lock()
        .expect("TyRx regex cache mutex poisoned")
        .entry(TypeId::of::<T::Erased>())
        .or_insert_with(|| {
            let pattern = T::pattern_display().to_string();
            let rx = match Regex::new(&pattern) {
                Ok(rx) => rx,
                Err(err) => panic!("syntax error in regex pattern for `{ty}`: {err}", ty = type_name::<T>()),
            };

            // just leak the regex object, as there is at most one per type,
            // so this uses memory that is basically bounded at compile time.
            Box::leak(Box::new(rx))
        })
}

/// An iterator over all non-overlapping occurrences of `T` in a string.
#[derive(Debug)]
pub struct IterFromStr<'h, T> {
    iter: CaptureMatches<'static, 'h>,
    phantom: std::marker::PhantomData<fn() -> T>,
}

impl<'h, T> Iterator for IterFromStr<'h, T>
where
    T: TyRx<'h>,
{
    type Item = Result<T, Error>;

    fn next(&mut self) -> Option<Self::Item> {
        self.iter.next().as_ref().map(T::from_captures)
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        self.iter.size_hint()
    }

    /// forward explicitly, for more speed; see the docs for `CaptureMatches::count()`
    fn count(self) -> usize {
        self.iter.count()
    }
}

impl<'h, T: TyRx<'h>> FusedIterator for IterFromStr<'h, T> {}

/// Convenience extensions for extracting typed capture groups from a [`regex::Captures`].
pub trait CapturesExt<'h> {
    fn group<T: FromMatch<'h>>(&self, group: &'static str) -> Result<T, Error>;
}

impl<'h> CapturesExt<'h> for Captures<'h> {
    fn group<T: FromMatch<'h>>(&self, name: &'static str) -> Result<T, Error> {
        let m = self.name(name).ok_or(Error::NoGroup(name))?;
        T::from_match(name, m, self)
    }
}