1use core::str::{self, Utf8Error};
2
3use nom::{
4 Finish, IResult, Parser,
5 branch::alt,
6 bytes::complete::{is_not, tag, take_while_m_n},
7 character::complete::{self, anychar, char, none_of},
8 combinator::{map, map_res, opt, peek, value, verify},
9 error::{self, ErrorKind},
10 multi::{fold, many1},
11 sequence::{delimited, preceded, separated_pair},
12};
13
14use super::{Context, Expr, Node, chars::Chars, generator::Generator};
15
16enum StringFragment<'a> {
17 Verbatim(&'a str),
18 Escaped(char),
19}
20
21enum CharFragment {
22 Single((char, char)),
23 Multi(&'static [(char, char)]),
24}
25
26pub type Error = nom::error::Error<String>;
27
28impl Expr<'_> {
29 pub fn parse(input: &str) -> Result<Self, Error> {
148 Ok(Expr::new(input.parse()?))
149 }
150}
151
152impl<'a> Expr<'a> {
153 pub fn parse_with_context(input: &str, context: &'a Context<'a>) -> Result<Self, Error> {
155 Ok(Expr::with_context(input.parse()?, context))
156 }
157}
158
159pub fn parse_node(input: &str) -> IResult<&str, Node> {
164 map(many1(parse_count), Node::from_iter).parse(input)
165}
166
167impl str::FromStr for Node {
168 type Err = Error;
169
170 fn from_str(s: &str) -> Result<Self, Self::Err> {
171 match parse_node(s).finish() {
172 Ok((remaining, node)) => {
173 if !remaining.is_empty() {
174 return Err(Error::new(s.to_string(), ErrorKind::Complete));
175 }
176 Ok(node)
177 }
178 Err(error::Error { input, code }) => Err(Error {
179 input: input.to_string(),
180 code,
181 }),
182 }
183 }
184}
185
186fn parse_count(input: &str) -> IResult<&str, Node> {
187 let (input, node) = parse_single(input)?;
188 let (remaining, count) = opt(delimited(
189 char('{'),
190 alt((
191 separated_pair(complete::u32, char(','), complete::u32),
192 map(complete::u32, |n| (n, n)),
193 map(preceded(char(','), complete::u32), |n| (0, n)),
194 )),
195 char('}'),
196 ))
197 .parse(input)?;
198 match count {
199 None => Ok((remaining, node)),
200 Some((min, max)) if max >= min => Ok((remaining, Node::Count(Box::new(node), min, max))),
201 _ => Err(nom::Err::Failure(error::Error::new(
202 input,
203 ErrorKind::Verify,
204 ))),
205 }
206}
207
208fn parse_single(input: &str) -> IResult<&str, Node> {
209 alt((
210 map(parse_literal, Node::Literal),
211 map(parse_chars, Node::Chars),
212 map(parse_generator, Node::Generator),
213 parse_list,
214 ))
215 .parse(input)
216}
217
218fn parse_literal(input: &str) -> IResult<&str, Box<str>> {
219 map(
220 fold(
221 1..,
222 parse_literal_fragment,
223 String::new,
224 |mut string, fragment| {
225 match fragment {
226 StringFragment::Escaped(c) => string.push(c),
227 StringFragment::Verbatim(s) => string.push_str(s),
228 }
229 string
230 },
231 ),
232 Into::into,
233 )
234 .parse(input)
235}
236
237fn parse_literal_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
238 alt((
239 map(parse_literal_verbatim, StringFragment::Verbatim),
240 map(parse_literal_escaped, StringFragment::Escaped),
241 ))
242 .parse(input)
243}
244
245fn parse_literal_verbatim(input: &str) -> IResult<&str, &str> {
246 let (input, res) = verify(is_not("\\[](){}|"), |s: &str| !s.is_empty()).parse(input)?;
247 Ok((input, res))
248}
249
250fn parse_literal_escaped(input: &str) -> IResult<&str, char> {
251 alt((
252 preceded(
253 char('\\'),
254 alt((
255 value('\n', char('n')),
256 value('\r', char('r')),
257 value('\t', char('t')),
258 verify(anychar, |&c| !c.is_ascii_alphanumeric()),
259 )),
260 ),
261 parse_hex_char,
262 parse_unicode_char,
263 ))
264 .parse(input)
265}
266
267fn parse_unicode_char(input: &str) -> IResult<&str, char> {
268 map_res(parse_unicode_digits, char::try_from).parse(input)
269}
270
271fn parse_unicode_digits(input: &str) -> IResult<&str, u32> {
272 preceded(
273 tag("\\u"),
274 map_res(
275 alt((
276 take_while_m_n(4, 4, |c: char| c.is_ascii_hexdigit()),
277 delimited(
278 char('{'),
279 take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()),
280 char('}'),
281 ),
282 )),
283 |s| u32::from_str_radix(s, 16),
284 ),
285 )
286 .parse(input)
287}
288
289fn parse_hex_char(input: &str) -> IResult<&str, char> {
290 let (input, b) = parse_hex_byte(input)?;
291 if b < 0b1000_0000 {
292 return Ok((input, b as char));
293 }
294 if b & 0b1110_0000 == 0b1100_0000 {
295 map_res(parse_hex_byte, |b2| {
296 let bs = [b, b2];
297 str_to_char(&bs)
298 })
299 .parse(input)
300 } else if b & 0b1111_0000 == 0b1110_0000 {
301 map_res((parse_hex_byte, parse_hex_byte), |(b2, b3)| {
302 let bs = [b, b2, b3];
303 str_to_char(&bs)
304 })
305 .parse(input)
306 } else if b & 0b1111_1000 == 0b1111_0000 {
307 map_res(
308 (parse_hex_byte, parse_hex_byte, parse_hex_byte),
309 |(b2, b3, b4)| {
310 let bs = [b, b2, b3, b4];
311 str_to_char(&bs)
312 },
313 )
314 .parse(input)
315 } else {
316 Err(nom::Err::Error(error::Error::new(input, ErrorKind::Verify)))
317 }
318}
319
320fn str_to_char(bs: &[u8]) -> Result<char, Utf8Error> {
321 let s = str::from_utf8(bs)?;
322 let mut iter = s.chars();
323 let c = iter.next().expect(s);
324 assert!(iter.next().is_none());
325 Ok(c)
326}
327
328fn parse_hex_byte(input: &str) -> IResult<&str, u8> {
329 preceded(
330 tag("\\x"),
331 map_res(take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit()), |s| {
332 u8::from_str_radix(s, 16)
333 }),
334 )
335 .parse(input)
336}
337
338fn parse_chars(input: &str) -> IResult<&str, Chars> {
339 alt((
340 parse_legacy_words_err,
341 parse_chars_brackets,
342 map(parse_chars_special, |ps| {
343 Chars::from_ranges(ps.iter().copied())
344 }),
345 ))
346 .parse(input)
347}
348
349fn parse_legacy_words_err(input: &str) -> IResult<&str, Chars> {
350 let res = alt((tag("[:word:]"), tag("[:Word:]"))).parse(input);
351 match res {
352 Ok(_) => Err(nom::Err::Failure(error::Error::new(
353 input,
354 ErrorKind::Verify,
355 ))),
356 Err(e) => Err(e),
357 }
358}
359
360fn parse_chars_brackets(input: &str) -> IResult<&str, Chars> {
361 delimited(
362 char('['),
363 map(
364 fold(
365 1..,
366 alt((
367 map(parse_chars_posix, CharFragment::Multi),
368 map(parse_chars_special, CharFragment::Multi),
369 map(parse_chars_range, CharFragment::Single),
370 )),
371 Vec::new,
372 |mut chars, fragment| {
373 match fragment {
374 CharFragment::Single(p) => chars.push(p),
375 CharFragment::Multi(ps) => chars.extend(ps),
376 }
377 chars
378 },
379 ),
380 Chars::from_ranges,
381 ),
382 char(']'),
383 )
384 .parse(input)
385}
386
387static LOWER: &[(char, char)] = &[('a', 'z')];
388static UPPER: &[(char, char)] = &[('A', 'Z')];
389static ALPHA: &[(char, char)] = &[('A', 'Z'), ('a', 'z')];
390static ALNUM: &[(char, char)] = &[('0', '9'), ('A', 'Z'), ('a', 'z')];
391static DIGIT: &[(char, char)] = &[('0', '9')];
392static XDIGIT: &[(char, char)] = &[('0', '9'), ('a', 'f')];
393static PUNCT: &[(char, char)] = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')];
394static PRINT: &[(char, char)] = &[(' ', '~')];
395static WORD: &[(char, char)] = &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')];
396
397fn parse_chars_posix(input: &str) -> IResult<&str, &'static [(char, char)]> {
398 delimited(
399 tag("[:"),
400 alt((
401 value(LOWER, tag("lower")),
402 value(UPPER, tag("upper")),
403 value(ALPHA, tag("alpha")),
404 value(ALNUM, tag("alnum")),
405 value(DIGIT, tag("digit")),
406 value(XDIGIT, tag("xdigit")),
407 value(PUNCT, tag("punct")),
408 value(PRINT, tag("print")),
409 )),
410 tag(":]"),
411 )
412 .parse(input)
413}
414
415fn parse_chars_range(input: &str) -> IResult<&str, (char, char)> {
416 if let (remaining, Some((a, b))) = opt(separated_pair(
417 parse_chars_single,
418 char('-'),
419 parse_chars_single,
420 ))
421 .parse(input)?
422 {
423 if a <= b {
424 return Ok((remaining, (a, b)));
425 }
426 return Err(nom::Err::Failure(error::Error::new(
427 input,
428 ErrorKind::Verify,
429 )));
430 }
431 map(parse_chars_single, |c| (c, c)).parse(input)
432}
433
434fn parse_chars_single(input: &str) -> IResult<&str, char> {
435 alt((none_of("\\]"), parse_literal_escaped)).parse(input)
436}
437
438fn parse_chars_special(input: &str) -> IResult<&str, &'static [(char, char)]> {
439 preceded(
440 char('\\'),
441 alt((value(WORD, char('w')), value(DIGIT, char('d')))),
442 )
443 .parse(input)
444}
445
446fn parse_generator(input: &str) -> IResult<&str, Generator> {
447 let verify_inner = peek(verify(anychar, |c| c.is_ascii_lowercase()));
448 let parse_inner = map(
449 fold(
450 1..,
451 parse_generator_fragment,
452 String::new,
453 |mut string, fragment| {
454 match fragment {
455 StringFragment::Escaped(c) => string.push(c),
456 StringFragment::Verbatim(s) => string.push_str(s),
457 }
458 string
459 },
460 ),
461 Generator::from,
462 );
463 delimited(char('{'), preceded(verify_inner, parse_inner), char('}')).parse(input)
464}
465
466fn parse_generator_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
467 alt((
468 map(parse_generator_verbatim, StringFragment::Verbatim),
469 map(parse_literal_escaped, StringFragment::Escaped),
470 ))
471 .parse(input)
472}
473
474fn parse_generator_verbatim(input: &str) -> IResult<&str, &str> {
475 verify(is_not("\\}"), |s: &str| !s.is_empty()).parse(input)
476}
477
478fn parse_list(input: &str) -> IResult<&str, Node> {
479 delimited(char('('), parse_node, char(')')).parse(input)
480}
481
482#[cfg(test)]
483mod tests {
484 use super::*;
485
486 #[test]
487 fn test_literal() {
488 let node = "cats".parse().unwrap();
489 assert_eq!(Node::Literal("cats".into()), node);
490 let node = r#"\\cats\tand\[dogs\]\{woof\}"#.parse().unwrap();
491 assert_eq!(Node::Literal("\\cats\tand[dogs]{woof}".into()), node);
492 }
493
494 #[test]
495 fn test_chars() {
496 assert_eq!(
497 Node::Chars(unsafe {
498 Chars::from_ranges_unchecked([('0', '9'), ('A', 'Z'), ('a', 'z')])
499 }),
500 "[A-Za-z0123-9]".parse::<Node>().unwrap(),
501 );
502 let res = "[z-a]".parse::<Node>();
503 assert!(res.is_err(), "{res:?}");
504 assert_eq!("error Verify at: z-a]", &format!("{}", res.unwrap_err()));
505 }
506
507 #[test]
508 fn test_chars_table() {
509 let tests = [
510 (vec![('A', 'Z')], "[A-MD-Z]"),
511 (vec![('A', 'Z')], "[D-ZA-M]"),
512 (vec![('a', 'j')], "[a-cb-ea-fb-j]"),
513 (vec![('a', 'a'), ('c', 'c')], "[ac]"),
514 (vec![('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')], "\\w"),
515 (vec![('a', 'z')], "[[:lower:]]"),
516 (
517 vec![('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
518 "[[:punct:]]",
519 ),
520 (vec![('!', '~')], "[[:punct:]\\w]"),
521 ];
522 for (ranges, inp) in tests {
523 assert_eq!(
524 Node::Chars(unsafe { Chars::from_ranges_unchecked(ranges) }),
525 inp.parse().unwrap(),
526 );
527 }
528 }
529
530 #[test]
531 fn test_generators() {
532 assert_eq!(
533 Node::Generator(Generator::new("word\tup}")),
534 "{word\\tup\\}}".parse().unwrap()
535 );
536 }
537
538 #[test]
539 fn test_multi() {
540 assert_eq!(
541 Node::List(
542 vec![
543 Node::Generator(Generator::new("word")),
544 Node::Count(
545 Node::List(
546 vec![
547 Node::Literal("-".into()),
548 Node::Generator(Generator::new("word")),
549 ]
550 .into()
551 )
552 .into(),
553 4,
554 4
555 ),
556 ]
557 .into()
558 ),
559 "{word}(-{word}){4}".parse().unwrap(),
560 );
561 }
562
563 #[test]
564 fn test_legacy_words_err() {
565 let res = "[:word:]".parse::<Node>();
566 assert_eq!(
567 "error Verify at: [:word:]",
568 &format!("{}", res.unwrap_err())
569 );
570 }
571
572 #[test]
573 fn test_literal_digits() {
574 assert_eq!(
575 Node::Literal("—".into()),
576 r#"\xe2\x80\x94"#.parse().unwrap()
577 );
578 assert_eq!(Node::Literal("—".into()), "\\u2014".parse().unwrap());
579 assert_eq!(Node::Literal("—".into()), "\\u{002014}".parse().unwrap());
580 assert_eq!(
581 Err(error::Error {
582 input: "\\x80".into(),
583 code: ErrorKind::Char
584 }),
585 "\\x80".parse::<Node>(),
586 );
587 assert_eq!(
588 Err(error::Error {
589 input: "\\ud800".into(),
590 code: ErrorKind::Char
591 }),
592 "\\ud800".parse::<Node>(),
593 );
594 }
595
596 #[test]
597 fn test_remaining() {
598 assert_eq!(
599 Err(Error::new("a\\".into(), ErrorKind::Complete)),
600 "a\\".parse::<Node>()
601 );
602 }
603}