1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use std::fmt::{Display, Formatter};
use std::ops::{Range, RangeInclusive};
/// Represent a class of characters like in a regex
/// such as [a-z] or [^0-9]
#[derive(Clone, Debug, Serialize, Deserialize, Hash, Eq, PartialEq)]
pub enum CharacterClass {
    /// Inclusive range. Both `from` and `to` are inclusive
    RangeInclusive {
        from: char,
        // inclusive!
        to: char, // inclusive!
    },
    /// Exclusive range. `from` is inclusive but `to` is exclusive
    Range {
        from: char,
        // inclusive!
        to: char, // exclusive!
    },
    /// all characters in the vec are in the character class.
    Contained(Vec<char>),
    /// True when one of the character class parts is true
    Choice(Vec<CharacterClass>),
    /// inverts the outcome of the embedded character class
    Not(Box<CharacterClass>),
    /// Always false. Use Not(Nothing) for always true.
    Nothing,
}
/// This display trait is very heavily improvised, should be improved in the future!
impl Display for CharacterClass {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        match self {
            CharacterClass::RangeInclusive { from, to } if from == to => {
                write!(f, "{}", from)
            }
            CharacterClass::RangeInclusive { from, to } => {
                write!(f, "[{}-{}]", from, to)
            }
            CharacterClass::Range { from, to } => {
                write!(f, "[{}-{}] (exclusive)", from, to)
            }
            CharacterClass::Contained(list) => {
                write!(f, "{}", list.iter().join(""))
            }
            CharacterClass::Choice(ccs) => {
                write!(f, "{}", ccs.iter().map(|cc| cc.to_string()).join(" or "))
            }
            CharacterClass::Not(not) => {
                write!(f, "not {}", not)
            }
            CharacterClass::Nothing => {
                write!(f, "")
            }
        }
    }
}
impl CharacterClass {
    /// Contains returns true when a character is
    /// included in this character class.
    ///
    /// ```
    /// # use lwb_parser::sources::character_class::CharacterClass;
    ///
    /// let c = CharacterClass::from('a'..='z');
    /// assert!(c.contains('a'));
    /// assert!(c.contains('z'));
    /// assert!(!c.contains('0'));
    /// ```
    ///
    /// ```
    /// # use lwb_parser::sources::character_class::CharacterClass;
    ///
    /// // exclusive range so does not contain 'z'
    /// let c = CharacterClass::from('a'..'z');
    /// assert!(c.contains('a'));
    /// assert!(c.contains('y'));
    /// assert!(!c.contains('z'));
    /// assert!(!c.contains('0'));
    /// ```
    ///
    /// ```
    /// # use lwb_parser::sources::character_class::CharacterClass;
    ///
    /// // always return false
    /// let c = CharacterClass::Nothing;
    /// assert!(!c.contains('a'));
    /// assert!(!c.contains('0'));
    /// ```
    ///
    /// ```
    /// # use lwb_parser::sources::character_class::CharacterClass;
    ///
    /// // always return true
    /// let c = CharacterClass::Nothing.invert();
    /// assert!(c.contains('a'));
    /// assert!(c.contains('0'));
    /// ```
    pub fn contains(&self, c: char) -> bool {
        match self {
            CharacterClass::RangeInclusive { from, to } => {
                c as u32 >= *from as u32 && c as u32 <= *to as u32
            }
            CharacterClass::Range { from, to } => {
                (c as u32) >= *from as u32 && (c as u32) < *to as u32
            }
            CharacterClass::Choice(parts) => parts.iter().map(|i| i.contains(c)).any(|i| i),
            CharacterClass::Not(cls) => !cls.contains(c),
            CharacterClass::Nothing => false,
            CharacterClass::Contained(chars) => chars.contains(&c),
        }
    }
    /// returns a character class that contains all elements
    /// of the slice.
    pub const fn all_in_vec(chars: Vec<char>) -> Self {
        Self::Contained(chars)
    }
    /// Invert this character class. The new class accepts any character
    /// not in the original character class
    pub fn invert(self) -> Self {
        Self::Not(Box::new(self))
    }
    /// Combine two character classes such that the result
    /// contains all characters from either of the two character
    /// class sets.
    ///
    /// ```
    /// use lwb_parser::sources::character_class::CharacterClass;
    ///
    /// let a = CharacterClass::from('a'..'z');
    /// let b = CharacterClass::from('0'..'9');
    /// assert!(a.contains('a'));
    /// assert!(!a.contains('0'));
    /// assert!(!b.contains('a'));
    /// assert!(b.contains('0'));
    ///
    /// let c = a.combine(b);
    /// assert!(c.contains('a'));
    /// assert!(c.contains('0'));
    /// ```
    pub fn combine(self, other: CharacterClass) -> CharacterClass {
        CharacterClass::Choice(vec![self, other])
    }
}
impl From<RangeInclusive<char>> for CharacterClass {
    fn from(r: RangeInclusive<char>) -> Self {
        Self::RangeInclusive {
            from: *r.start(),
            to: *r.end(),
        }
    }
}
impl From<Range<char>> for CharacterClass {
    fn from(r: Range<char>) -> Self {
        Self::Range {
            from: r.start,
            to: r.end,
        }
    }
}
impl From<char> for CharacterClass {
    fn from(c: char) -> Self {
        Self::RangeInclusive { from: c, to: c }
    }
}
impl From<&[char]> for CharacterClass {
    fn from(s: &[char]) -> Self {
        Self::Contained(s.to_vec())
    }
}
impl From<Vec<char>> for CharacterClass {
    fn from(s: Vec<char>) -> Self {
        Self::Contained(s)
    }
}
impl From<String> for CharacterClass {
    fn from(s: String) -> Self {
        Self::Contained(s.chars().collect())
    }
}
impl<'a> From<&'a str> for CharacterClass {
    fn from(s: &'a str) -> Self {
        Self::Contained(s.chars().collect())
    }
}