lwb_parser/sources/character_class.rs
1use itertools::Itertools;
2use serde::{Deserialize, Serialize};
3use std::fmt::{Display, Formatter};
4use std::ops::{Range, RangeInclusive};
5
6/// Represent a class of characters like in a regex
7/// such as [a-z] or [^0-9]
8#[derive(Clone, Debug, Serialize, Deserialize, Hash, Eq, PartialEq)]
9pub enum CharacterClass {
10 /// Inclusive range. Both `from` and `to` are inclusive
11 RangeInclusive {
12 from: char,
13 // inclusive!
14 to: char, // inclusive!
15 },
16 /// Exclusive range. `from` is inclusive but `to` is exclusive
17 Range {
18 from: char,
19 // inclusive!
20 to: char, // exclusive!
21 },
22 /// all characters in the vec are in the character class.
23 Contained(Vec<char>),
24 /// True when one of the character class parts is true
25 Choice(Vec<CharacterClass>),
26 /// inverts the outcome of the embedded character class
27 Not(Box<CharacterClass>),
28 /// Always false. Use Not(Nothing) for always true.
29 Nothing,
30}
31
32/// This display trait is very heavily improvised, should be improved in the future!
33impl Display for CharacterClass {
34 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
35 match self {
36 CharacterClass::RangeInclusive { from, to } if from == to => {
37 write!(f, "{}", from)
38 }
39 CharacterClass::RangeInclusive { from, to } => {
40 write!(f, "[{}-{}]", from, to)
41 }
42 CharacterClass::Range { from, to } => {
43 write!(f, "[{}-{}] (exclusive)", from, to)
44 }
45 CharacterClass::Contained(list) => {
46 write!(f, "{}", list.iter().join(""))
47 }
48 CharacterClass::Choice(ccs) => {
49 write!(f, "{}", ccs.iter().map(|cc| cc.to_string()).join(" or "))
50 }
51 CharacterClass::Not(not) => {
52 write!(f, "not {}", not)
53 }
54 CharacterClass::Nothing => {
55 write!(f, "")
56 }
57 }
58 }
59}
60
61impl CharacterClass {
62 /// Contains returns true when a character is
63 /// included in this character class.
64 ///
65 /// ```
66 /// # use lwb_parser::sources::character_class::CharacterClass;
67 ///
68 /// let c = CharacterClass::from('a'..='z');
69 /// assert!(c.contains('a'));
70 /// assert!(c.contains('z'));
71 /// assert!(!c.contains('0'));
72 /// ```
73 ///
74 /// ```
75 /// # use lwb_parser::sources::character_class::CharacterClass;
76 ///
77 /// // exclusive range so does not contain 'z'
78 /// let c = CharacterClass::from('a'..'z');
79 /// assert!(c.contains('a'));
80 /// assert!(c.contains('y'));
81 /// assert!(!c.contains('z'));
82 /// assert!(!c.contains('0'));
83 /// ```
84 ///
85 /// ```
86 /// # use lwb_parser::sources::character_class::CharacterClass;
87 ///
88 /// // always return false
89 /// let c = CharacterClass::Nothing;
90 /// assert!(!c.contains('a'));
91 /// assert!(!c.contains('0'));
92 /// ```
93 ///
94 /// ```
95 /// # use lwb_parser::sources::character_class::CharacterClass;
96 ///
97 /// // always return true
98 /// let c = CharacterClass::Nothing.invert();
99 /// assert!(c.contains('a'));
100 /// assert!(c.contains('0'));
101 /// ```
102 pub fn contains(&self, c: char) -> bool {
103 match self {
104 CharacterClass::RangeInclusive { from, to } => {
105 c as u32 >= *from as u32 && c as u32 <= *to as u32
106 }
107 CharacterClass::Range { from, to } => {
108 (c as u32) >= *from as u32 && (c as u32) < *to as u32
109 }
110 CharacterClass::Choice(parts) => parts.iter().map(|i| i.contains(c)).any(|i| i),
111 CharacterClass::Not(cls) => !cls.contains(c),
112 CharacterClass::Nothing => false,
113 CharacterClass::Contained(chars) => chars.contains(&c),
114 }
115 }
116
117 /// returns a character class that contains all elements
118 /// of the slice.
119 pub const fn all_in_vec(chars: Vec<char>) -> Self {
120 Self::Contained(chars)
121 }
122
123 /// Invert this character class. The new class accepts any character
124 /// not in the original character class
125 pub fn invert(self) -> Self {
126 Self::Not(Box::new(self))
127 }
128
129 /// Combine two character classes such that the result
130 /// contains all characters from either of the two character
131 /// class sets.
132 ///
133 /// ```
134 /// use lwb_parser::sources::character_class::CharacterClass;
135 ///
136 /// let a = CharacterClass::from('a'..'z');
137 /// let b = CharacterClass::from('0'..'9');
138 /// assert!(a.contains('a'));
139 /// assert!(!a.contains('0'));
140 /// assert!(!b.contains('a'));
141 /// assert!(b.contains('0'));
142 ///
143 /// let c = a.combine(b);
144 /// assert!(c.contains('a'));
145 /// assert!(c.contains('0'));
146 /// ```
147 pub fn combine(self, other: CharacterClass) -> CharacterClass {
148 CharacterClass::Choice(vec![self, other])
149 }
150}
151
152impl From<RangeInclusive<char>> for CharacterClass {
153 fn from(r: RangeInclusive<char>) -> Self {
154 Self::RangeInclusive {
155 from: *r.start(),
156 to: *r.end(),
157 }
158 }
159}
160
161impl From<Range<char>> for CharacterClass {
162 fn from(r: Range<char>) -> Self {
163 Self::Range {
164 from: r.start,
165 to: r.end,
166 }
167 }
168}
169
170impl From<char> for CharacterClass {
171 fn from(c: char) -> Self {
172 Self::RangeInclusive { from: c, to: c }
173 }
174}
175
176impl From<&[char]> for CharacterClass {
177 fn from(s: &[char]) -> Self {
178 Self::Contained(s.to_vec())
179 }
180}
181
182impl From<Vec<char>> for CharacterClass {
183 fn from(s: Vec<char>) -> Self {
184 Self::Contained(s)
185 }
186}
187
188impl From<String> for CharacterClass {
189 fn from(s: String) -> Self {
190 Self::Contained(s.chars().collect())
191 }
192}
193
194impl<'a> From<&'a str> for CharacterClass {
195 fn from(s: &'a str) -> Self {
196 Self::Contained(s.chars().collect())
197 }
198}