1use std::str::FromStr;
12use crate::set::*;
13#[derive(Debug, Clone, Eq, PartialEq)]
14pub enum AstNode {
15 Char(CharRange),
16 Options(Vec<AstNode>),
17 Multiple(Box<AstNode>),
18 EmptyOr(Box<AstNode>),
19 MultipleNonGreedy(Box<AstNode>),
20 Concat(Vec<AstNode>),
21 }
23use thiserror::Error as ThisError;
24
25#[derive(Debug, ThisError)]
26pub enum Error {
27 #[error("missing expression at {0}")]
28 MissingExpresion(usize),
29 #[error("missing first expr at {0}")]
30 MissingFirstExpr(usize),
31 #[error("'^' is unusable at {0}")]
32 ExceptNotUsable(usize),
33 #[error("unmatched char '{1}' at {0}")]
34 UnmatchedChar(usize, char),
35 #[error("unexpect end at {0}")]
36 UnexpectedEnd(usize),
37 #[error("unexpected char '{1}' at {0}")]
38 UnexpectedChar(usize, char),
39 #[error("found an empty string")]
40 EmptyString,
41}
42
43pub type Result<T> = std::result::Result<T,Error>;
60pub trait CharStream: Iterator<Item=char> {}
61
62
63struct Parser<Iter : CharStream> {
71 first : char,
72 iter : Iter,
73 pos: usize,
74}
75
76
77impl<Iter : CharStream> Parser<Iter> {
78 pub fn new(mut iter: Iter) -> Result<Self> {
80 Ok(Self {
81 first: iter.next().ok_or(Error::EmptyString)?,
82 iter,
83 pos: 0,
84 })
85 }
86
87 fn next(&mut self) -> char {
88 self.first = self.iter.next().unwrap_or('\0');
89 self.pos += 1;
90 self.first
91 }
92
93 fn next_matches(&mut self, _c : char) {
94 self.first = self.iter.next().unwrap_or('\0');
95 self.pos += 1;
96 }
97 pub fn parse_tree(&mut self, inside : bool) -> Result<AstNode> {
101 let mut ret = Vec::<AstNode>::new();
102 loop{
103 let op = self.parse_option()?;
104 ret.push(op);
105
106 if self.first == ')' || self.first == '\0' {
108 if self.first == ')' && !inside {
109 return Err(Error::UnmatchedChar(self.pos, ')'));
110 }
111 break;
112 }
113
114 self.next_matches('|');
115 }
116
117 if ret.len() == 0 {
118 return Err(Error::MissingExpresion(self.pos));
119 }
120
121 if ret.len() == 1 {
122 Ok(ret.pop().unwrap())
123 } else {
124 Ok(AstNode::Options(ret))
125 }
126 }
127
128 pub fn parse_option(&mut self) -> Result<AstNode> {
130
131 let mut ret = Vec::<AstNode>::new();
132 loop {
133 let element = self.parse_element()?;
134 ret.push(element);
135 if self.first == '|' || self.first == '\0' || self.first == ')' {
137 break;
138 }
139
140 }
142
143 if ret.len() == 0 {
144 return Err(Error::MissingExpresion(self.pos));
145 }
146
147 if ret.len() == 1 {
148 Ok(ret.pop().unwrap())
149 } else {
150 Ok(AstNode::Concat(ret))
151 }
152 }
153
154 pub fn parse_element(&mut self) -> Result<AstNode> {
156
157 let mut is_except = false;
158 if self.first == '^'{
159 is_except = true;
160 self.next_matches('^');
161 }
162
163 let mut ret = match self.first {
164 '(' => {
165 if is_except {
166 return Err(Error::ExceptNotUsable(self.pos));
167 }
168 self.next_matches('('); let ret = self.parse_tree(true)?;
170 self.next_matches(')');
171 ret
172 },
173 '[' => {
174 let ret = self.parse_charset(is_except)?;
176 ret
177 },
178 '.' => {
179 if is_except {
180 return Err(Error::ExceptNotUsable(self.pos));
181 }
182 self.next_matches('.');
183 AstNode::Char(CHAR_MIN..CHAR_MAX)
184 }
185 '\0' => { return Err(Error::UnexpectedEnd(self.pos));}
186 ')' | ']' | '|' | '*' | '+' | '?' => {
187 return Err(Error::UnexpectedChar(self.pos, self.first));
188 }
189 '\\' => {
190 let c = self.next();
191 self.next();
192 let c = match c {
193 'n' => '\n',
194 't' => '\t',
195 'r' => '\r',
196 _ => c
197 };
198 AstNode::Char(c..add1(c))
199 }
200 c => {
201 self.next();
202 AstNode::Char(c..add1(c))
203 }
204 };
205
206 if self.first == '*' {
207 self.next_matches('*');
208 if self.first == '?' {
209 self.next_matches('?');
210 ret = AstNode::MultipleNonGreedy(Box::new(ret));
211 }else {
212 ret = AstNode::Multiple(Box::new(ret));
213 }
214 }
215 if self.first == '+' {
216 self.next_matches('+');
217 ret = AstNode::Concat(vec![
218 ret.clone(),
219 AstNode::Multiple(Box::new(ret))
220 ])
221 }
222 if self.first == '?' {
223 self.next_matches('?');
224 ret = AstNode::EmptyOr(Box::new(ret));
225 }
226 Ok(ret)
227 }
228
229 fn parse_charset(&mut self, is_except: bool) -> Result<AstNode> {
230 self.next_matches('[');
231 let mut ret = Vec::<CharRange>::new();
232 loop {
233 match self.first {
234 ']' => {
235 self.next_matches(']');
236 break;
237 },
238 '-' => {
239 if let Some(range) = ret.pop() {
240 if add1(range.start) == range.end {
241 self.next();
242 ret.push(range.start..add1(self.first));
243 } else {
244 return Err(Error::MissingFirstExpr(self.pos));
245 }
246 } else {
247 return Err(Error::MissingFirstExpr(self.pos));
248 }
249 }
250 c => { ret.push(c..add1(c)) },
251 }
252 self.next();
253 }
254
255 if ret.len() == 0{
256 return Err(Error::MissingExpresion(self.pos));
257 }
258
259 if is_except {
260 if ret.len() == 1 {
261 let s = ret.pop().unwrap();
262 return Ok(AstNode::Options(vec![
263 AstNode::Char(
264 CHAR_MIN..s.start
265 ),
266 AstNode::Char(
267 s.end..CHAR_MAX
268 ),
269 ]))
270 }
271 panic!("not implemented!");
272 }
273
274 if ret.len() == 1 {
275 Ok(AstNode::Char(ret.pop().unwrap()))
276 } else {
277 Ok(AstNode::Options(
278 ret.iter().map(|c| AstNode::Char(c.clone())).collect()
279 ))
280 }
281 }
282}
283
284impl CharStream for core::str::Chars<'_> {}
285
286impl FromStr for AstNode {
287 type Err = Error;
288 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
289 let iter = s.chars();
290 Parser::new(iter)?.parse_tree(false)
291 }
292}
293
294
295#[cfg(test)]
296mod test {
297 use super::*;
298 use std::assert;
299 use AstNode::*;
300
301 fn charnode(c : char) -> AstNode { Char(c..add1(c)) }
302 fn charrange(c : char, d : char) -> AstNode {
303 Char(c..add1(d))
304 }
305
306 fn multi(n : AstNode) -> AstNode { Multiple(Box::new(n)) }
307 fn multi_non_greedy(n : AstNode) -> AstNode { MultipleNonGreedy(Box::new(n)) }
308
309
310
311 #[test]
312 fn basics() {
313 let ast : AstNode = r"12".parse::<AstNode>().unwrap();
314 assert_eq!(
315 ast, Concat(vec![
316 charnode('1'),
317 charnode('2'),
318 ])
319 );
320
321 let ast : AstNode = r"1|2".parse::<AstNode>().unwrap();
322 assert_eq!(
323 ast , Options(vec![
324 charnode('1'),
325 charnode('2'),
326 ])
327 );
328
329
330 let ast : AstNode = r"1|2*3(5|4)*".parse::<AstNode>().unwrap();
331 assert_eq!(
332 ast , Options(vec![
333 charnode('1'),
334 Concat(vec![
335 multi(charnode('2')),
336 charnode('3'),
337 multi(
338 Options(vec![
339 charnode('5'),
340 charnode('4'),
341 ])
342 ),
343 ])
344 ])
345 );
346
347 let ast = r"1([1-9][1-9])*?".parse::<AstNode>().unwrap();
348 assert_eq!(
349 ast , Concat(vec![
350 charnode('1'),
351 multi_non_greedy(
352 Concat(vec![
353 charrange('1', '9'),
354 charrange('1', '9'),
355 ])
356 )
357 ])
358 );
359
360 let ast2 = r"1(([1-9]([1-9])))*?".parse::<AstNode>().unwrap();
361 assert!(ast2 == ast);
362 }
363}