lwb_parser/sources/
character_class.rs

1use itertools::Itertools;
2use serde::{Deserialize, Serialize};
3use std::fmt::{Display, Formatter};
4use std::ops::{Range, RangeInclusive};
5
6/// Represent a class of characters like in a regex
7/// such as [a-z] or [^0-9]
8#[derive(Clone, Debug, Serialize, Deserialize, Hash, Eq, PartialEq)]
9pub enum CharacterClass {
10    /// Inclusive range. Both `from` and `to` are inclusive
11    RangeInclusive {
12        from: char,
13        // inclusive!
14        to: char, // inclusive!
15    },
16    /// Exclusive range. `from` is inclusive but `to` is exclusive
17    Range {
18        from: char,
19        // inclusive!
20        to: char, // exclusive!
21    },
22    /// all characters in the vec are in the character class.
23    Contained(Vec<char>),
24    /// True when one of the character class parts is true
25    Choice(Vec<CharacterClass>),
26    /// inverts the outcome of the embedded character class
27    Not(Box<CharacterClass>),
28    /// Always false. Use Not(Nothing) for always true.
29    Nothing,
30}
31
32/// This display trait is very heavily improvised, should be improved in the future!
33impl Display for CharacterClass {
34    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
35        match self {
36            CharacterClass::RangeInclusive { from, to } if from == to => {
37                write!(f, "{}", from)
38            }
39            CharacterClass::RangeInclusive { from, to } => {
40                write!(f, "[{}-{}]", from, to)
41            }
42            CharacterClass::Range { from, to } => {
43                write!(f, "[{}-{}] (exclusive)", from, to)
44            }
45            CharacterClass::Contained(list) => {
46                write!(f, "{}", list.iter().join(""))
47            }
48            CharacterClass::Choice(ccs) => {
49                write!(f, "{}", ccs.iter().map(|cc| cc.to_string()).join(" or "))
50            }
51            CharacterClass::Not(not) => {
52                write!(f, "not {}", not)
53            }
54            CharacterClass::Nothing => {
55                write!(f, "")
56            }
57        }
58    }
59}
60
61impl CharacterClass {
62    /// Contains returns true when a character is
63    /// included in this character class.
64    ///
65    /// ```
66    /// # use lwb_parser::sources::character_class::CharacterClass;
67    ///
68    /// let c = CharacterClass::from('a'..='z');
69    /// assert!(c.contains('a'));
70    /// assert!(c.contains('z'));
71    /// assert!(!c.contains('0'));
72    /// ```
73    ///
74    /// ```
75    /// # use lwb_parser::sources::character_class::CharacterClass;
76    ///
77    /// // exclusive range so does not contain 'z'
78    /// let c = CharacterClass::from('a'..'z');
79    /// assert!(c.contains('a'));
80    /// assert!(c.contains('y'));
81    /// assert!(!c.contains('z'));
82    /// assert!(!c.contains('0'));
83    /// ```
84    ///
85    /// ```
86    /// # use lwb_parser::sources::character_class::CharacterClass;
87    ///
88    /// // always return false
89    /// let c = CharacterClass::Nothing;
90    /// assert!(!c.contains('a'));
91    /// assert!(!c.contains('0'));
92    /// ```
93    ///
94    /// ```
95    /// # use lwb_parser::sources::character_class::CharacterClass;
96    ///
97    /// // always return true
98    /// let c = CharacterClass::Nothing.invert();
99    /// assert!(c.contains('a'));
100    /// assert!(c.contains('0'));
101    /// ```
102    pub fn contains(&self, c: char) -> bool {
103        match self {
104            CharacterClass::RangeInclusive { from, to } => {
105                c as u32 >= *from as u32 && c as u32 <= *to as u32
106            }
107            CharacterClass::Range { from, to } => {
108                (c as u32) >= *from as u32 && (c as u32) < *to as u32
109            }
110            CharacterClass::Choice(parts) => parts.iter().map(|i| i.contains(c)).any(|i| i),
111            CharacterClass::Not(cls) => !cls.contains(c),
112            CharacterClass::Nothing => false,
113            CharacterClass::Contained(chars) => chars.contains(&c),
114        }
115    }
116
117    /// returns a character class that contains all elements
118    /// of the slice.
119    pub const fn all_in_vec(chars: Vec<char>) -> Self {
120        Self::Contained(chars)
121    }
122
123    /// Invert this character class. The new class accepts any character
124    /// not in the original character class
125    pub fn invert(self) -> Self {
126        Self::Not(Box::new(self))
127    }
128
129    /// Combine two character classes such that the result
130    /// contains all characters from either of the two character
131    /// class sets.
132    ///
133    /// ```
134    /// use lwb_parser::sources::character_class::CharacterClass;
135    ///
136    /// let a = CharacterClass::from('a'..'z');
137    /// let b = CharacterClass::from('0'..'9');
138    /// assert!(a.contains('a'));
139    /// assert!(!a.contains('0'));
140    /// assert!(!b.contains('a'));
141    /// assert!(b.contains('0'));
142    ///
143    /// let c = a.combine(b);
144    /// assert!(c.contains('a'));
145    /// assert!(c.contains('0'));
146    /// ```
147    pub fn combine(self, other: CharacterClass) -> CharacterClass {
148        CharacterClass::Choice(vec![self, other])
149    }
150}
151
152impl From<RangeInclusive<char>> for CharacterClass {
153    fn from(r: RangeInclusive<char>) -> Self {
154        Self::RangeInclusive {
155            from: *r.start(),
156            to: *r.end(),
157        }
158    }
159}
160
161impl From<Range<char>> for CharacterClass {
162    fn from(r: Range<char>) -> Self {
163        Self::Range {
164            from: r.start,
165            to: r.end,
166        }
167    }
168}
169
170impl From<char> for CharacterClass {
171    fn from(c: char) -> Self {
172        Self::RangeInclusive { from: c, to: c }
173    }
174}
175
176impl From<&[char]> for CharacterClass {
177    fn from(s: &[char]) -> Self {
178        Self::Contained(s.to_vec())
179    }
180}
181
182impl From<Vec<char>> for CharacterClass {
183    fn from(s: Vec<char>) -> Self {
184        Self::Contained(s)
185    }
186}
187
188impl From<String> for CharacterClass {
189    fn from(s: String) -> Self {
190        Self::Contained(s.chars().collect())
191    }
192}
193
194impl<'a> From<&'a str> for CharacterClass {
195    fn from(s: &'a str) -> Self {
196        Self::Contained(s.chars().collect())
197    }
198}