1use crate::error::ErrorRepr;
2use crate::Error;
3use crate::{ir, regex::Regex, reserved::*, Visitor};
4
5use arbitrary::{Arbitrary, Unstructured};
6use std::{collections::HashSet, fmt, str::FromStr};
7
8#[derive(Debug)]
22pub struct Grammar(Vec<Expr>);
23
24impl Grammar {
25 pub fn expression<V: Visitor>(
29 &self,
30 u: &mut Unstructured<'_>,
31 max_depth: Option<usize>,
32 ) -> arbitrary::Result<V> {
33 let mut visitor = V::new();
39 let mut to_write = vec![(&self.0[0], 0)]; while let Some((expr, depth)) = to_write.pop() {
42 if depth > max_depth.unwrap_or(usize::MAX) {
43 return Err(arbitrary::Error::IncorrectFormat);
44 }
45
46 match expr {
47 Expr::Or(v) => {
48 let arb_index = u.int_in_range(0..=(v.len() - 1))?;
49 to_write.push((&v[arb_index], depth + 1));
50 visitor.visit_or(arb_index);
51 }
52 Expr::Concat(v) => {
53 to_write.extend(v.iter().map(|x| (x, depth + 1)));
54 visitor.visit_concat();
55 }
56 Expr::Optional(x) => {
57 let b = bool::arbitrary(u)?;
58 if b {
59 to_write.push((x, depth + 1));
60 }
61 visitor.visit_optional(b);
62 }
63 Expr::Repetition(x, min_rep) => {
64 let mut reps = 0;
65 u.arbitrary_loop(Some(*min_rep), Some(10), |_| {
66 to_write.push((x, depth + 1));
67 reps += 1;
68 Ok(std::ops::ControlFlow::Continue(()))
69 })?;
70 visitor.visit_repetition(reps);
71 }
72 Expr::Reference(index) => {
73 to_write.push((&self.0[*index], depth + 1));
74 visitor.visit_reference(*index);
75 }
76 Expr::Literal(s) => visitor.visit_literal(s.as_str()),
77 Expr::Bytes(b) => visitor.visit_bytes(b),
78 Expr::Regex(regex) => regex.visit(&mut visitor, u)?,
79 Expr::Group(x) => to_write.push((x, depth + 1)),
80 Expr::Predefined(v) => v.visit(&mut visitor, u)?,
81 }
82 }
83 Ok(visitor)
84 }
85}
86
87impl fmt::Display for Grammar {
93 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
94 for (i, rule) in self.0.iter().enumerate() {
95 writeln!(f, "{}: {}", i, rule)?;
96 }
97 Ok(())
98 }
99}
100
101impl FromStr for Grammar {
102 type Err = Error;
103
104 fn from_str(s: &str) -> Result<Self, Self::Err> {
105 let parsed_ir = ir::bnf::expr(s).map_err(|e| Error(ErrorRepr::Grammar(e)))?;
106 Self::try_from(parsed_ir)
107 }
108}
109
110impl TryFrom<Vec<(String, ir::Expr)>> for Grammar {
111 type Error = Error;
112
113 fn try_from(value: Vec<(String, ir::Expr)>) -> Result<Self, Self::Error> {
114 let (mut names, ir_exprs): (Vec<String>, Vec<ir::Expr>) = value.into_iter().unzip();
115
116 if let Some(dups) = find_duplicates(&names) {
117 return Err(Error(ErrorRepr::DuplicateVars(dups)));
118 }
119
120 if let Some(res) = filter_reserved_keywords(&names) {
121 return Err(Error(ErrorRepr::Reserved(res)));
122 }
123
124 names.extend(Predefined::all().map(|s| s.as_str().to_string()));
125
126 let mut exprs = ir_exprs
127 .into_iter()
128 .map(|ir_expr| Expr::try_new(ir_expr, &names))
129 .collect::<Result<Vec<Expr>, _>>()?;
130
131 exprs.extend(Predefined::all().map(Expr::Predefined));
132
133 assert_eq!(names.len(), exprs.len());
134 Ok(Self(exprs))
135 }
136}
137
138fn find_duplicates(names: &[String]) -> Option<HashSet<String>> {
139 let mut set: HashSet<String> = names.iter().cloned().collect();
140 let dups: HashSet<String> = names.iter().filter(|&n| !set.remove(n)).cloned().collect();
141 (!dups.is_empty()).then_some(dups)
142}
143
144#[derive(Debug)]
145enum Expr {
146 Or(Vec<Expr>),
147 Concat(Vec<Expr>),
148 Optional(Box<Expr>),
149 Repetition(Box<Expr>, u32),
150 Reference(usize),
151 Literal(String),
152 Regex(Regex),
153 Bytes(Vec<u8>),
154 Group(Box<Expr>),
155 Predefined(Predefined),
156}
157
158fn fmt_w_name<'a>(
159 name: &'static str,
160 x: impl Iterator<Item = &'a Expr>,
161 f: &mut fmt::Formatter<'_>,
162) -> Result<(), fmt::Error> {
163 write!(
164 f,
165 "{}({})",
166 name,
167 x.into_iter()
168 .map(|x| x.to_string())
169 .collect::<Vec<String>>()
170 .join(", ")
171 )
172}
173
174impl fmt::Display for Expr {
175 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
176 match self {
177 Self::Or(x) => fmt_w_name("or", x.iter(), f)?,
178 Self::Concat(x) => fmt_w_name("concat", x.iter().rev(), f)?,
179 Self::Optional(x) => write!(f, "option({})", x)?,
180 Self::Repetition(x, _) => write!(f, "repeat({})", x)?,
181 Self::Reference(index) => write!(f, "{}", index)?,
182 Self::Literal(l) => write!(f, "{:?}", l)?,
183 Self::Regex(_) => write!(f, "regex")?, Self::Bytes(b) => write!(f, "{:?}", b)?,
185 Self::Group(x) => write!(f, "({})", x)?,
186 Self::Predefined(p) => write!(f, "{}", p.as_str())?,
187 }
188 Ok(())
189 }
190}
191
192impl Expr {
193 fn try_new(ir_expr: ir::Expr, names: &[String]) -> Result<Self, Error> {
199 Ok(match ir_expr {
200 ir::Expr::Or(x) => Self::Or(
201 x.into_iter()
202 .map(|e| Self::try_new(e, names))
203 .collect::<Result<Vec<_>, _>>()?,
204 ),
205 ir::Expr::Concat(x) => {
206 let mut y = x
207 .into_iter()
208 .map(|e| Self::try_new(e, names))
209 .collect::<Result<Vec<_>, _>>()?;
210 y.reverse(); Self::Concat(y)
212 }
213 ir::Expr::Optional(x) => Self::Optional(Box::new(Self::try_new(*x, names)?)),
214 ir::Expr::Repetition(x, min) => {
215 Self::Repetition(Box::new(Self::try_new(*x, names)?), min)
216 }
217 ir::Expr::Group(x) => Self::Group(Box::new(Self::try_new(*x, names)?)),
218 ir::Expr::Reference(name) => match names.iter().position(|n| *n == name) {
219 Some(i) => Self::Reference(i),
220 None => return Err(Error(ErrorRepr::UnkownVar(name))),
221 },
222 ir::Expr::Literal(x) => Self::Literal(x),
223 ir::Expr::Regex(r) => {
224 Self::Regex(Regex::compile(&r, 10).map_err(|e| Error(ErrorRepr::Regex(e)))?)
225 }
226 ir::Expr::Bytes(x) => Self::Bytes(x),
227 })
228 }
229}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234
235 #[test]
236 fn catches_duplicates() {
237 for x in [
238 r#"x: y; x: z;"#,
239 r#"x: y; x: x;"#,
240 r#"ok: x; x: y; y: x; x: x;"#,
241 ] {
242 let result: Error = x.parse::<Grammar>().unwrap_err();
243 assert_eq!(
244 result,
245 Error(ErrorRepr::DuplicateVars(["x".into()].into_iter().collect()))
246 )
247 }
248
249 for x in [
250 r#"x: y; x: z; y: x; y: y;"#,
251 r#"x: x; y: x; y: y; x: y; x: z;"#,
252 ] {
253 let result: Error = x.parse::<Grammar>().unwrap_err();
254 assert_eq!(
255 result,
256 Error(ErrorRepr::DuplicateVars(
257 ["x".into(), "y".into()].into_iter().collect()
258 ))
259 )
260 }
261 }
262
263 #[test]
264 fn rejects_reserved() {
265 for x in [r#"u16: u16 ;"#, r#"u16: [8, 8] ;"#] {
266 let result: Error = x.parse::<Grammar>().unwrap_err();
267 assert_eq!(
268 result,
269 Error(ErrorRepr::Reserved(["u16".into()].into_iter().collect()))
270 )
271 }
272
273 for x in [r#"String: u16 ;"#, r#"String: [8, 8] ;"#] {
274 let result: Error = x.parse::<Grammar>().unwrap_err();
275 assert_eq!(
276 result,
277 Error(ErrorRepr::Reserved(["String".into()].into_iter().collect()))
278 )
279 }
280 }
281}