use super::pattern::*;
use pest::{
iterators::{Pair, Pairs},
Parser,
};
use pest_derive::Parser;
type Pattern = crate::pattern::Pattern<Parsed>;
type Group = crate::pattern::Group<Parsed>;
type Segment = crate::pattern::Segment<Parsed>;
type Item = crate::pattern::Item<Parsed>;
#[derive(Parser)]
#[grammar = "grammar.pest"]
pub struct PatternParser;
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("syntax error")]
Syntax(#[from] pest::error::Error<Rule>),
}
type Result<T, E = Error> = std::result::Result<T, E>;
impl Pattern {
pub fn parse(input: &str) -> Result<Self> {
let pairs = PatternParser::parse(Rule::pattern, input)?;
Ok(Self::Group(Group::parse_pairs(pairs)?))
}
fn parse_pair(input: Pair<'_, Rule>) -> Result<Self> {
let pattern = match input.as_rule() {
Rule::literal => Literal::parse_pairs(input.into_inner())?.into(),
Rule::group => Group::parse_pairs(input.into_inner())?.into(),
Rule::set => Set::parse_pairs(input.into_inner())?.into(),
Rule::special => Special::parse_pairs(input.into_inner())?.into(),
_ => unreachable!(),
};
Ok(pattern)
}
}
impl Special {
fn parse_pairs(mut input: Pairs<'_, Rule>) -> Result<Self> {
let pair = input.next().unwrap();
let result = match pair.as_rule() {
Rule::wordlist => {
let name = pair.into_inner().next().unwrap();
Self::Wordlist(name.as_str().to_string())
}
Rule::markov => {
let name = pair.into_inner().next().unwrap();
Self::Markov(name.as_str().to_string())
}
_ => unreachable!(),
};
Ok(result)
}
}
impl Literal {
fn parse_pairs(mut input: Pairs<'_, Rule>) -> Result<Self> {
let pair = input.next().unwrap();
Self::parse_pair(pair)
}
fn parse_pair(pair: Pair<'_, Rule>) -> Result<Self> {
let value = match pair.as_rule() {
Rule::unicode => pair.as_str().chars().next().unwrap(),
Rule::escaped => pair.as_str().chars().nth(1).unwrap(),
_ => unreachable!(),
};
Ok(Literal { value })
}
}
impl Item {
fn parse_pairs(mut input: Pairs<'_, Rule>) -> Result<Self> {
let pattern = input.next().unwrap();
let mut item = Item::new(Pattern::parse_pair(pattern)?);
for pair in input {
match pair.as_rule() {
Rule::repeat => {
let mut amount = pair.into_inner();
let min = amount.next().unwrap().as_str().parse().unwrap();
item.repeat = min..=min;
if let Some(pair) = amount.next() {
let max = pair.as_str().parse().unwrap();
item.repeat = min..=max;
}
}
Rule::optional => item.optional = true,
_ => unreachable!(),
}
}
Ok(item)
}
}
impl Segment {
fn parse_pairs(input: Pairs<'_, Rule>) -> Result<Self> {
let mut segment = Self::default();
for pair in input {
match pair.as_rule() {
Rule::item => {
segment.items.push(Item::parse_pairs(pair.into_inner())?);
}
_ => unreachable!(),
}
}
Ok(segment)
}
}
impl Group {
fn parse_pairs(input: Pairs<'_, Rule>) -> Result<Self> {
let mut group = Self::default();
for pair in input {
match pair.as_rule() {
Rule::segment => {
group
.segments
.push(Segment::parse_pairs(pair.into_inner())?);
}
Rule::EOI => {}
_ => unreachable!("unexpected rule {:?}", pair.as_rule()),
}
}
Ok(group)
}
}
fn get_char(pair: Pair<'_, Rule>) -> Result<char> {
match pair.as_rule() {
Rule::literal => Literal::parse_pairs(pair.into_inner()).map(|literal| literal.value),
_ => unreachable!(),
}
}
impl Set {
fn parse_pairs(input: Pairs<'_, Rule>) -> Result<Self> {
let mut set = Self::default();
for range in input {
match range.as_rule() {
Rule::range => {
let mut inner = range.into_inner();
let min = get_char(inner.next().unwrap()).unwrap();
let mut range = min..=min;
if let Some(pair) = inner.next() {
let max = get_char(pair).unwrap();
range = min.min(max)..=max.max(min);
}
set.insert(range, 1);
}
_ => unreachable!(),
}
}
Ok(set)
}
}
#[cfg(test)]
mod tests {
use super::*;
use test_strategy::*;
macro_rules! group {
($($segments:tt)*) => {
Pattern::Group(Group {
segments: vec![$($segments)*],
})
};
}
macro_rules! segment {
($($items:tt)*) => {
Segment {
items: vec![$($items)*],
}
};
}
macro_rules! set {
($($range:expr),*) => {
{
let mut set = Set::default();
$(set.insert($range, 1);)*
set
}
};
}
#[test]
fn parse_literal() {
assert_eq!(
Pattern::parse("a").unwrap(),
group![segment![Item::new(Literal::new('a'))]]
);
}
#[test]
fn parse_literal_optional() {
assert_eq!(
Pattern::parse("a?").unwrap(),
group![segment![Item::new(Literal::new('a')).optional(true)]]
);
}
#[test]
fn parse_literal_repeat() {
assert_eq!(
Pattern::parse("a{1,3}").unwrap(),
group![segment![Item::new(Literal::new('a')).repeat(1..=3)]]
);
}
#[test]
fn parse_group() {
assert_eq!(
Pattern::parse("()").unwrap(),
group![segment![Item::new(group![segment![]])]],
);
}
#[test]
fn parse_group_segments() {
assert_eq!(
Pattern::parse("(a|b|c)").unwrap(),
group![segment![Item::new(group![
segment![Item::new(Literal::new('a'))],
segment![Item::new(Literal::new('b'))],
segment![Item::new(Literal::new('c'))],
])]],
);
}
#[test]
fn parse_group_optional() {
assert_eq!(
Pattern::parse("()?").unwrap(),
group![segment![Item::new(group![segment![]]).optional(true)]],
);
}
#[test]
fn parse_group_repeat() {
assert_eq!(
Pattern::parse("(){1,3}").unwrap(),
group![segment![Item::new(group![segment![]]).repeat(1..=3)]],
);
}
#[test]
fn parse_set() {
assert_eq!(
Pattern::parse("[abc]").unwrap(),
group![segment![Item::new(set!['a'..='c'])]],
);
}
#[test]
fn parse_set_optional() {
assert_eq!(
Pattern::parse("[abc]?").unwrap(),
group![segment![Item::new(set!['a'..='c']).optional(true)]],
);
}
#[test]
fn parse_set_repeat() {
assert_eq!(
Pattern::parse("[abc]{1,3}").unwrap(),
group![segment![Item::new(set!['a'..='c']).repeat(1..=3)]],
);
}
#[proptest]
fn parse_repeat_exact(amount: usize) {
assert_eq!(
Pattern::parse(&format!("x{{{amount}}}")).unwrap(),
group![segment![
Item::new(Literal::new('x')).repeat(amount..=amount)
]],
);
}
#[proptest]
fn parse_repeat_minmax(min: usize, max: usize) {
assert_eq!(
Pattern::parse(&format!("x{{{min},{max}}}")).unwrap(),
group![segment![Item::new(Literal::new('x')).repeat(min..=max)]],
);
}
#[proptest]
fn parse_optional(optional: bool) {
let optional_str = match optional {
true => "?",
false => "",
};
assert_eq!(
Pattern::parse(&format!("x{optional_str}")).unwrap(),
group![segment![Item::new(Literal::new('x')).optional(optional)]],
);
}
#[proptest]
fn parse_arbitrary(input: String) {
let result = Pattern::parse(&input);
let _ = std::hint::black_box(result);
}
}