rec 0.10.0

Regular Expression Constructor
Documentation
//! Implements character classes.
use crate::prelude::*;
use core::ops::{Add, BitOr};

/// An enumeration of predefined single character matches.
#[derive(Clone, Copy, Debug)]
pub enum Class {
    /// Matches any alphabetic character.
    ///
    /// # Examples
    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::Alpha, Rec::from("[[:alpha:]]"));
    /// ```
    Alpha,
    /// Matches any alphabetic or numerical digit character.
    ///
    /// # Examples
    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::AlphaNum, Rec::from("[[:alnum:]]"));
    /// ```
    AlphaNum,
    /// Matches any numerical digit character.
    ///
    /// # Examples
    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::Digit, Rec::from(r"\d"));
    /// ```
    Digit,
    /// Matches any whitespace character.
    ///
    /// # Examples
    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::Whitespace, Rec::from(r"\s"));
    /// ```
    Whitespace,
    /// Matches any character other than a newline.
    ///
    /// # Examples
    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::Any, Rec::from("."));
    /// ```
    Any,
    /// Matches with the start of the text.
    ///
    /// # Examples
    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::Start, Rec::from("^"));
    /// ```
    Start,
    /// Matches with the end of the text.
    ///
    /// # Examples
    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::End, Rec::from("$"));
    /// ```
    End,
    /// Matches with the sign character of a number.
    ///
    /// # Examples
    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::Sign, Rec::from(r"[+\-]"));
    /// ```
    Sign,
    /// Matches with any digit that is not `0`.
    ///
    /// # Examples
    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::NonZeroDigit, Rec::from(r"[1-9]"));
    /// ```
    NonZeroDigit,
    /// Matches with any hexidecimal digit.
    ///
    /// # Examples
    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::HexDigit, Rec::from("[[:xdigit:]]"));
    /// ```
    HexDigit,
}

impl<Rhs: Element> Add<Rhs> for Class {
    type Output = Rec;

    fn add(self, rhs: Rhs) -> Self::Output {
        self.concatenate(&rhs)
    }
}

impl Atom for Class {
    fn to_part(&self) -> String {
        match self {
            Class::Any => String::from("."),
            Class::Digit => String::from(r"\d"),
            Class::Whitespace => String::from(r"\s"),
            Class::Start => String::from("^"),
            Class::End => String::from("$"),
            Class::Alpha => String::from("[:alpha:]"),
            Class::AlphaNum => String::from("[:alnum:]"),
            Class::Sign => String::from(r"+\-"),
            Class::NonZeroDigit => String::from("1-9"),
            Class::HexDigit => String::from("[:xdigit:]"),
        }
    }
}

impl BitOr<char> for Class {
    type Output = Ch;

    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::Alpha | '0', Rec::from("[[:alpha:]0]"));
    /// ```
    fn bitor(self, rhs: char) -> Self::Output {
        Ch::Union(vec![self.to_part(), rhs.to_part()])
    }
}

// Class | Class has some cases where an output of Rec would not be ideal since it could still be
// bitor'd with another Atom. As a result, BitOr<Class> is defined here and BitOr<T> must be
// defined for T: Element.
impl BitOr<Class> for Class {
    // Although there are some cases where outputing a Class would be ideal, in the case of a union
    // of 2 classes, we must output a Ch.
    type Output = Ch;

    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::Alpha | Class::Whitespace, Rec::from(r"[[:alpha:]\s]"));
    /// ```
    ///
    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::Alpha | Class::Digit, Rec::from("[[:alnum:]]"));
    /// ```
    fn bitor(self, rhs: Self) -> Self::Output {
        if let Class::Alpha = self {
            if let Class::Digit = rhs {
                return Ch::Union(vec![Class::AlphaNum.to_part()]);
            }
        } else if let Class::Digit = self {
            if let Class::Alpha = rhs {
                return Ch::Union(vec![Class::AlphaNum.to_part()]);
            }
        }

        Ch::Union(vec![self.to_part(), rhs.to_part()])
    }
}

impl BitOr<&str> for Class {
    type Output = Rec;

    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!(Class::Alpha | "12", Rec::from("[[:alpha:]]|12"));
    /// ```
    fn bitor(self, rhs: &str) -> Self::Output {
        self.alternate(&rhs)
    }
}

impl BitOr<Rec> for Class {
    type Output = Rec;

    fn bitor(self, rhs: Rec) -> Self::Output {
        self.alternate(&rhs)
    }
}

impl Element for Class {
    fn to_regex(&self) -> String {
        let part = self.to_part();

        match self {
            Class::Alpha
            | Class::AlphaNum
            | Class::HexDigit
            | Class::Sign
            | Class::NonZeroDigit => format!("[{}]", part),
            _ => part,
        }
    }

    fn is_atom(&self) -> bool {
        true
    }
}

impl<T: Element> PartialEq<T> for Class {
    fn eq(&self, other: &T) -> bool {
        self.is_equal(other)
    }
}

/// Represents a match of one character.
#[derive(Debug)]
pub enum Ch {
    /// Matches any of the chars in the given &str.
    ///
    /// # Examples
    /// ```
    /// use rec::{Ch, prelude::*};
    ///
    /// assert_eq!(Ch::AnyOf("abc"), Rec::from("[abc]"));
    /// ```
    ///
    /// ## `-` is not interpreted as range
    /// ```
    /// use rec::{Ch, prelude::*};
    ///
    /// assert_eq!(Ch::AnyOf("a-c"), Rec::from(r"[a\-c]"));
    /// ```
    AnyOf(&'static str),
    /// Matches any of the given parts.
    Union(Vec<String>),
    /// Matches any character between (inclusive) the 2 given chars.
    Range(char, char),
}

impl Ch {
    /// ```
    /// use rec::{Ch, prelude::*};
    ///
    /// assert_eq!(Ch::spread(32, 45), Ch::Range(char::from(32), char::from(45)));
    /// ```
    pub fn spread<T: Into<char>>(start: T, end: T) -> Self {
        Ch::Range(start.into(), end.into())
    }

    /// Creates a `Ch` that matches the character with the given numeric value.
    /// ```
    /// use rec::{Ch, prelude::*};
    ///
    /// assert_eq!(Ch::value(0x20), Ch::AnyOf(" "));
    /// ```
    pub fn value<T: Into<char>>(value: T) -> Self {
        Ch::Union(vec![value.into().to_string()])
    }
}

impl<Rhs: Element> Add<Rhs> for Ch {
    type Output = Rec;

    fn add(self, rhs: Rhs) -> Self::Output {
        self.concatenate(&rhs)
    }
}

impl Atom for Ch {
    /// ```
    /// use rec::{Ch, prelude::*};
    ///
    /// assert_eq!(Ch::Range('a', 'c'), Rec::from("[a-c]"));
    /// ```
    fn to_part(&self) -> String {
        match self {
            Ch::AnyOf(chars) => chars.replace('-', r"\-"),
            Ch::Union(parts) => {
                let mut union = String::new();

                for atom in parts {
                    union.push_str(atom);
                }

                union
            }
            Ch::Range(start, end) => format!("{}-{}", start, end),
        }
    }
}

// Ch | Ch has some cases where an output of Rec would not be ideal since it could still be bitor'd
// with another Atom. As a result, BitOr<Ch> is defined here and BitOr<T> must be defined for T:
// Element.
impl BitOr for Ch {
    type Output = Self;

    /// ```
    /// use rec::{Ch, prelude::*};
    ///
    /// assert_eq!(Ch::AnyOf("ab") | Ch::AnyOf("cd"), Rec::from("[abcd]"));
    /// ```
    ///
    /// ```
    /// use rec::{Ch, prelude::*};
    ///
    /// assert_eq!(Ch::Range('a', 'c') | Ch::AnyOf("xyz"), Rec::from("[a-cxyz]"));
    /// ```
    fn bitor(self, rhs: Self) -> Self::Output {
        Ch::Union(vec![self.to_part(), rhs.to_part()])
    }
}

impl BitOr<char> for Ch {
    type Output = Self;

    /// ```
    /// use rec::{Ch, prelude::*};
    ///
    /// assert_eq!(Ch::AnyOf("ab") | 'c', Rec::from("[abc]"));
    /// ```
    fn bitor(self, rhs: char) -> Self::Output {
        Ch::Union(vec![self.to_part(), rhs.to_part()])
    }
}

impl BitOr<Rec> for Ch {
    type Output = Rec;

    fn bitor(self, rhs: Rec) -> Self::Output {
        self.alternate(&rhs)
    }
}

impl BitOr<&str> for Ch {
    type Output = Rec;

    fn bitor(self, rhs: &str) -> Self::Output {
        self.alternate(&rhs)
    }
}

impl Element for Ch {
    fn to_regex(&self) -> String {
        format!("[{}]", self.to_part())
    }

    fn is_atom(&self) -> bool {
        true
    }
}

impl<T: Element> PartialEq<T> for Ch {
    fn eq(&self, other: &T) -> bool {
        self.is_equal(other)
    }
}

impl Add<Ch> for char {
    type Output = Rec;

    fn add(self, rhs: Ch) -> Self::Output {
        self.concatenate(&rhs)
    }
}

// Required because cannot implement Add<T: Element> for char.
impl Add<Class> for char {
    type Output = Rec;

    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!('a' + Class::Digit, Rec::from(r"a\d"));
    /// ```
    fn add(self, rhs: Class) -> Self::Output {
        self.concatenate(&rhs)
    }
}

impl BitOr<Ch> for Rec {
    type Output = Self;

    fn bitor(self, rhs: Ch) -> Self::Output {
        let mut elements = self.elements;
        elements.push(rhs.to_regex());
        Self::alternation(elements)
    }
}

impl Add<Ch> for &str {
    type Output = Rec;

    /// ```
    /// use rec::{Ch, prelude::*};
    ///
    /// assert_eq!("25" + Ch::Range('0', '5'), Rec::from("25[0-5]"));
    /// ```
    fn add(self, rhs: Ch) -> Self::Output {
        self.concatenate(&rhs)
    }
}

// Required because cannot implement Add<T: Element> for &str.
impl Add<Class> for &str {
    type Output = Rec;

    /// ```
    /// use rec::{Class, prelude::*};
    ///
    /// assert_eq!("hello" + Class::Digit, Rec::from(r"hello\d"));
    /// ```
    fn add(self, rhs: Class) -> Self::Output {
        self.concatenate(&rhs)
    }
}