1use super::pattern::*;
2use pest::{
3 iterators::{Pair, Pairs},
4 Parser,
5};
6use pest_derive::Parser;
7
8type Pattern = crate::pattern::Pattern<Parsed>;
9type Group = crate::pattern::Group<Parsed>;
10type Segment = crate::pattern::Segment<Parsed>;
11type Item = crate::pattern::Item<Parsed>;
12
13#[derive(Parser)]
14#[grammar = "grammar.pest"]
15pub struct PatternParser;
16
17#[derive(thiserror::Error, Debug)]
18pub enum Error {
19 #[error("syntax error")]
20 Syntax(#[from] pest::error::Error<Rule>),
21}
22
23type Result<T, E = Error> = std::result::Result<T, E>;
24
25impl Pattern {
26 pub fn parse(input: &str) -> Result<Self> {
27 let pairs = PatternParser::parse(Rule::pattern, input)?;
28 Ok(Self::Group(Group::parse_pairs(pairs)?))
29 }
30
31 fn parse_pair(input: Pair<'_, Rule>) -> Result<Self> {
32 let pattern = match input.as_rule() {
33 Rule::literal => Literal::parse_pairs(input.into_inner())?.into(),
34 Rule::group => Group::parse_pairs(input.into_inner())?.into(),
35 Rule::set => Set::parse_pairs(input.into_inner())?.into(),
36 Rule::special => Special::parse_pairs(input.into_inner())?.into(),
37 _ => unreachable!(),
38 };
39 Ok(pattern)
40 }
41}
42
43impl Special {
44 fn parse_pairs(mut input: Pairs<'_, Rule>) -> Result<Self> {
45 let pair = input.next().unwrap();
46 let result = match pair.as_rule() {
47 Rule::wordlist => {
48 let name = pair.into_inner().next().unwrap();
49 Self::Wordlist(name.as_str().to_string())
50 }
51 Rule::markov => {
52 let name = pair.into_inner().next().unwrap();
53 Self::Markov(name.as_str().to_string())
54 }
55 _ => unreachable!(),
56 };
57 Ok(result)
58 }
59}
60
61impl Literal {
62 fn parse_pairs(mut input: Pairs<'_, Rule>) -> Result<Self> {
63 let pair = input.next().unwrap();
64 Self::parse_pair(pair)
65 }
66
67 fn parse_pair(pair: Pair<'_, Rule>) -> Result<Self> {
68 let value = match pair.as_rule() {
69 Rule::unicode => pair.as_str().chars().next().unwrap(),
70 Rule::escaped => pair.as_str().chars().nth(1).unwrap(),
71 _ => unreachable!(),
72 };
73 Ok(Literal { value })
74 }
75}
76
77impl Item {
78 fn parse_pairs(mut input: Pairs<'_, Rule>) -> Result<Self> {
79 let pattern = input.next().unwrap();
80
81 let mut item = Item::new(Pattern::parse_pair(pattern)?);
82
83 for pair in input {
84 match pair.as_rule() {
85 Rule::repeat => {
86 let mut amount = pair.into_inner();
87 let min = amount.next().unwrap().as_str().parse().unwrap();
88 item.repeat = min..=min;
89 if let Some(pair) = amount.next() {
90 let max = pair.as_str().parse().unwrap();
91 item.repeat = min..=max;
92 }
93 }
94 Rule::optional => item.optional = true,
95 _ => unreachable!(),
96 }
97 }
98
99 Ok(item)
100 }
101}
102
103impl Segment {
104 fn parse_pairs(input: Pairs<'_, Rule>) -> Result<Self> {
105 let mut segment = Self::default();
106 for pair in input {
107 match pair.as_rule() {
108 Rule::item => {
109 segment.items.push(Item::parse_pairs(pair.into_inner())?);
110 }
111 _ => unreachable!(),
112 }
113 }
114 Ok(segment)
115 }
116}
117
118impl Group {
119 fn parse_pairs(input: Pairs<'_, Rule>) -> Result<Self> {
120 let mut group = Self::default();
121 for pair in input {
122 match pair.as_rule() {
123 Rule::segment => {
124 group
125 .segments
126 .push(Segment::parse_pairs(pair.into_inner())?);
127 }
128 Rule::EOI => {}
129 _ => unreachable!("unexpected rule {:?}", pair.as_rule()),
130 }
131 }
132 Ok(group)
133 }
134}
135
136fn get_char(pair: Pair<'_, Rule>) -> Result<char> {
137 match pair.as_rule() {
138 Rule::literal => Literal::parse_pairs(pair.into_inner()).map(|literal| literal.value),
139 _ => unreachable!(),
140 }
141}
142
143impl Set {
144 fn parse_pairs(input: Pairs<'_, Rule>) -> Result<Self> {
145 let mut set = Self::default();
146 for range in input {
147 match range.as_rule() {
148 Rule::range => {
149 let mut inner = range.into_inner();
150 let min = get_char(inner.next().unwrap()).unwrap();
151 let mut range = min..=min;
152 if let Some(pair) = inner.next() {
153 let max = get_char(pair).unwrap();
154 range = min.min(max)..=max.max(min);
156 }
157
158 set.insert(range, 1);
159 }
160 _ => unreachable!(),
161 }
162 }
163 Ok(set)
164 }
165}
166
167#[cfg(test)]
168mod tests {
169 use super::*;
170 use test_strategy::*;
171
172 macro_rules! group {
173 ($($segments:tt)*) => {
174 Pattern::Group(Group {
175 segments: vec![$($segments)*],
176 })
177 };
178 }
179
180 macro_rules! segment {
181 ($($items:tt)*) => {
182 Segment {
183 items: vec![$($items)*],
184 }
185 };
186 }
187
188 macro_rules! set {
189 ($($range:expr),*) => {
190 {
191 let mut set = Set::default();
192 $(set.insert($range, 1);)*
193 set
194 }
195 };
196 }
197
198 #[test]
199 fn parse_literal() {
200 assert_eq!(
201 Pattern::parse("a").unwrap(),
202 group![segment![Item::new(Literal::new('a'))]]
203 );
204 }
205
206 #[test]
207 fn parse_literal_optional() {
208 assert_eq!(
209 Pattern::parse("a?").unwrap(),
210 group![segment![Item::new(Literal::new('a')).optional(true)]]
211 );
212 }
213
214 #[test]
215 fn parse_literal_repeat() {
216 assert_eq!(
217 Pattern::parse("a{1,3}").unwrap(),
218 group![segment![Item::new(Literal::new('a')).repeat(1..=3)]]
219 );
220 }
221
222 #[test]
223 fn parse_group() {
224 assert_eq!(
225 Pattern::parse("()").unwrap(),
226 group![segment![Item::new(group![segment![]])]],
227 );
228 }
229
230 #[test]
231 fn parse_group_segments() {
232 assert_eq!(
233 Pattern::parse("(a|b|c)").unwrap(),
234 group![segment![Item::new(group![
235 segment![Item::new(Literal::new('a'))],
236 segment![Item::new(Literal::new('b'))],
237 segment![Item::new(Literal::new('c'))],
238 ])]],
239 );
240 }
241
242 #[test]
243 fn parse_group_optional() {
244 assert_eq!(
245 Pattern::parse("()?").unwrap(),
246 group![segment![Item::new(group![segment![]]).optional(true)]],
247 );
248 }
249
250 #[test]
251 fn parse_group_repeat() {
252 assert_eq!(
253 Pattern::parse("(){1,3}").unwrap(),
254 group![segment![Item::new(group![segment![]]).repeat(1..=3)]],
255 );
256 }
257
258 #[test]
259 fn parse_set() {
260 assert_eq!(
261 Pattern::parse("[abc]").unwrap(),
262 group![segment![Item::new(set!['a'..='c'])]],
263 );
264 }
265
266 #[test]
267 fn parse_set_optional() {
268 assert_eq!(
269 Pattern::parse("[abc]?").unwrap(),
270 group![segment![Item::new(set!['a'..='c']).optional(true)]],
271 );
272 }
273
274 #[test]
275 fn parse_set_repeat() {
276 assert_eq!(
277 Pattern::parse("[abc]{1,3}").unwrap(),
278 group![segment![Item::new(set!['a'..='c']).repeat(1..=3)]],
279 );
280 }
281
282 #[proptest]
283 fn parse_repeat_exact(amount: usize) {
284 assert_eq!(
285 Pattern::parse(&format!("x{{{amount}}}")).unwrap(),
286 group![segment![
287 Item::new(Literal::new('x')).repeat(amount..=amount)
288 ]],
289 );
290 }
291
292 #[proptest]
293 fn parse_repeat_minmax(min: usize, max: usize) {
294 assert_eq!(
295 Pattern::parse(&format!("x{{{min},{max}}}")).unwrap(),
296 group![segment![Item::new(Literal::new('x')).repeat(min..=max)]],
297 );
298 }
299
300 #[proptest]
301 fn parse_optional(optional: bool) {
302 let optional_str = match optional {
303 true => "?",
304 false => "",
305 };
306 assert_eq!(
307 Pattern::parse(&format!("x{optional_str}")).unwrap(),
308 group![segment![Item::new(Literal::new('x')).optional(optional)]],
309 );
310 }
311
312 #[proptest]
313 fn parse_arbitrary(input: String) {
314 let result = Pattern::parse(&input);
315 let _ = std::hint::black_box(result);
316 }
317}