uri_parsing_rs/parser/parsers/
basic_parsers.rs

1use itertools::Itertools;
2use nom::AsChar;
3use nom::branch::alt;
4use nom::character::complete::{one_of, satisfy};
5use nom::character::complete;
6use nom::combinator::map;
7use nom::sequence::tuple;
8
9use crate::parser::parsers::{Elms, UResult};
10
11pub(crate) fn is_unreserved(c: char) -> bool {
12  let sc = ['-', '.', '_', '~'];
13  c.is_alphanum() || sc.contains(&c)
14}
15
16// reserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
17pub(crate) fn unreserved(i: Elms) -> UResult<Elms, char> {
18  satisfy(move |c| is_unreserved(c))(i)
19}
20
21pub fn is_gen_delims(c: char) -> bool {
22  ":/?#[]@".chars().contains(&c)
23}
24
25// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
26pub(crate) fn gen_delims(i: Elms) -> UResult<Elms, char> {
27  satisfy(|c| is_gen_delims(c))(i)
28}
29
30fn is_gen_delims_without_colon(c: char) -> bool {
31  "/?#[]@".chars().contains(&c)
32}
33
34fn gen_delims_without_colon(i: Elms) -> UResult<Elms, char> {
35  satisfy(|c| is_gen_delims_without_colon(c))(i)
36}
37
38pub(crate) fn is_sub_delims(c: char) -> bool {
39  "!$&'()*+,;=".chars().contains(&c)
40}
41
42// sub-delims    = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
43pub(crate) fn sub_delims(i: Elms) -> UResult<Elms, char> {
44  satisfy(|c| is_sub_delims(c))(i)
45}
46
47pub(crate) fn is_sub_delims_without_eq_and(c: char) -> bool {
48  "!$'()*+,;".chars().contains(&c)
49}
50
51pub(crate) fn sub_delims_without_eq_and(i: Elms) -> UResult<Elms, char> {
52  satisfy(|c| is_sub_delims_without_eq_and(c))(i)
53}
54
55pub(crate) fn reserved(i: Elms) -> UResult<Elms, char> {
56  alt((gen_delims, sub_delims))(i)
57}
58
59pub(crate) fn is_hex_digit(c: char) -> bool {
60  c.is_hex_digit()
61}
62
63pub(crate) fn hex_digit(i: Elms) -> UResult<Elms, char> {
64  satisfy(|c| is_hex_digit(c))(i)
65}
66
67pub(crate) fn is_digit(c: char) -> bool {
68  c.is_digit(10)
69}
70
71pub(crate) fn digit(i: Elms) -> UResult<Elms, char> {
72  satisfy(|c| is_digit(c))(i)
73}
74
75pub(crate) fn pct_encoded(i: Elms) -> UResult<Elms, String> {
76  map(
77    tuple((complete::char('%'), hex_digit, hex_digit)),
78    |(c1, c2, c3)| [c1, c2, c3].iter().collect(),
79  )(i)
80}
81
82pub(crate) fn pchar(i: Elms) -> UResult<Elms, String> {
83  alt((
84    map(unreserved, |c| c.into()),
85    pct_encoded,
86    map(sub_delims, |c| c.into()),
87    map(one_of(":@"), |c| c.into()),
88  ))(i)
89}
90
91pub(crate) fn pchar_without_eq_and(i: Elms) -> UResult<Elms, String> {
92  alt((
93    map(unreserved, |c| c.into()),
94    pct_encoded,
95    map(sub_delims_without_eq_and, |c| c.into()),
96    map(one_of(":@"), |c| c.into()),
97  ))(i)
98}
99
100pub(crate) fn pchar_without_colon(i: Elms) -> UResult<Elms, String> {
101  alt((
102    map(unreserved, |c| c.into()),
103    pct_encoded,
104    map(gen_delims_without_colon, |c| c.into()),
105    map(one_of(":@"), |c| c.into()),
106  ))(i)
107}
108
109#[cfg(test)]
110pub mod gens {
111  use itertools::Itertools;
112  use prop_check_rs::gen::{Gen, Gens};
113
114  pub fn to_option<F>(mut gen: F) -> Gen<Option<String>>
115  where
116    F: FnMut() -> Gen<String> + 'static,
117  {
118    Gens::one_bool().bind(move |b| {
119      if b {
120        gen().fmap(|v| Some(v))
121      } else {
122        Gen::<String>::unit(|| None)
123      }
124    })
125  }
126
127  // Generators
128  fn low_alpha_gen() -> Gen<char> {
129    let low_alpha_gen: Vec<char> = ('a'..='z').into_iter().collect_vec();
130    Gens::one_of_vec(low_alpha_gen)
131  }
132
133  fn high_alpha_gen() -> Gen<char> {
134    let low_alpha_gen: Vec<char> = ('A'..='Z').into_iter().collect_vec();
135    Gens::one_of_vec(low_alpha_gen)
136  }
137
138  pub fn alpha_char_gen() -> Gen<char> {
139    Gens::one_bool().bind(|b| if b { low_alpha_gen() } else { high_alpha_gen() })
140  }
141
142  pub fn digit_gen(min: char, max: char) -> Gen<char> {
143    let low_alpha_gen: Vec<char> = (min..=max).into_iter().collect_vec();
144    Gens::one_of_vec(low_alpha_gen)
145  }
146
147  pub fn hex_digit_char_gen() -> Gen<char> {
148    Gens::choose_u8(1, 3).bind(|n| match n {
149      1 => digit_gen('0', '9'),
150      2 => Gens::choose('A', 'F'),
151      3 => Gens::choose('a', 'f'),
152      x => panic!("x = {}", x),
153    })
154  }
155
156  pub fn rep_char_gen<F>(len: u8, mut f: F) -> Gen<String>
157  where
158    F: FnMut() -> Gen<char> + 'static,
159  {
160    Gens::choose_u8(1, len)
161      .bind(move |len| Gens::list_of_n(len as usize, || f()).fmap(|sl| sl.into_iter().collect()))
162  }
163
164  pub fn rep_str_gen<F>(min: u8, max: u8, mut f: F) -> Gen<String>
165  where
166    F: FnMut() -> Gen<String> + 'static,
167  {
168    Gens::choose_u8(min, max)
169      .bind(move |len| Gens::list_of_n(len as usize, || f()).fmap(|sl| sl.into_iter().collect()))
170  }
171
172  pub fn unreserved_char_gen() -> Gen<char> {
173    Gens::choose(1u8, 3).bind(|n| match n {
174      1 => alpha_char_gen(),
175      2 => digit_gen('0', '9'),
176      3 => Gens::one_of_vec(vec!['-', '.', '_', '~']),
177      x => panic!("x = {}", x),
178    })
179  }
180
181  pub fn unreserved_str_gen(len: u8) -> Gen<String> {
182    rep_char_gen(len, || unreserved_char_gen())
183  }
184
185  pub fn gen_delims_char_gen() -> Gen<char> {
186    Gens::one_of_vec(vec![':', '/', '?', '#', '[', ']', '@'])
187  }
188
189  pub fn gen_delims_str_gen(len: u8) -> Gen<String> {
190    rep_char_gen(len, || gen_delims_char_gen())
191  }
192
193  pub fn sub_delims_char_gen() -> Gen<char> {
194    Gens::one_of_vec(vec!['!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '='])
195  }
196
197  pub fn sub_delims_str_gen(len: u8) -> Gen<String> {
198    rep_char_gen(len, || sub_delims_char_gen())
199  }
200
201  fn reserved_char_gen() -> Gen<char> {
202    Gens::one_bool().bind(|b| {
203      if b {
204        gen_delims_char_gen()
205      } else {
206        sub_delims_char_gen()
207      }
208    })
209  }
210
211  pub fn reserved_str_gen(len: u8) -> Gen<String> {
212    rep_char_gen(len, || reserved_char_gen())
213  }
214
215  pub fn pct_encoded_str_gen() -> Gen<String> {
216    Gens::list_of_n(2, || hex_digit_char_gen()).fmap(|cl| {
217      let s = cl.into_iter().collect::<String>();
218      format!("%{}", s)
219    })
220  }
221
222  pub fn pchar_str_gen(min: u8, max: u8) -> Gen<String> {
223    rep_str_gen(min, max, || {
224      Gens::choose_u8(1, 4).bind(|n| match n {
225        1 => unreserved_char_gen().fmap(|c| c.into()),
226        2 => pct_encoded_str_gen(),
227        3 => sub_delims_char_gen().fmap(|c| c.into()),
228        4 => Gens::one_of_vec(vec![':', '@']).fmap(|c| c.into()),
229        x => panic!("x = {}", x),
230      })
231    })
232  }
233}
234
235#[cfg(test)]
236mod tests {
237  use std::env;
238
239  use anyhow::Result;
240  use nom::multi::many1;
241  use prop_check_rs::prop;
242  use prop_check_rs::prop::TestCases;
243  use prop_check_rs::rng::RNG;
244
245  use super::*;
246  use super::gens::*;
247
248  const TEST_COUNT: TestCases = 100;
249
250  fn init() {
251    env::set_var("RUST_LOG", "debug");
252    let _ = env_logger::builder().is_test(true).try_init();
253  }
254
255  #[test]
256  fn test_unreserved() -> Result<()> {
257    init();
258    let mut counter = 0;
259    let prop = prop::for_all(
260      || unreserved_str_gen(u8::MAX - 1),
261      move |s| {
262        counter += 1;
263        log::debug!("{:>03}, value = {}", counter, s);
264        let (_, r) = many1(unreserved)(Elms::new(s.as_bytes())).ok().unwrap();
265        assert_eq!(r.into_iter().collect::<String>(), s);
266        true
267      },
268    );
269    prop::test_with_prop(prop, 5, TEST_COUNT, RNG::new())
270  }
271
272  #[test]
273  fn test_gen_delims() -> Result<()> {
274    init();
275    let mut counter = 0;
276    let prop = prop::for_all(
277      || gen_delims_str_gen(u8::MAX - 1),
278      move |s| {
279        counter += 1;
280        log::debug!("{:>03}, value = {}", counter, s);
281        let (_, r) = many1(gen_delims)(Elms::new(s.as_bytes())).ok().unwrap();
282        r.into_iter().collect::<String>() == s
283      },
284    );
285    prop::test_with_prop(prop, 5, TEST_COUNT, RNG::new())
286  }
287
288  #[test]
289  fn test_sub_delims() -> Result<()> {
290    init();
291    let mut counter = 0;
292    let prop = prop::for_all(
293      || sub_delims_str_gen(u8::MAX - 1),
294      move |s| {
295        counter += 1;
296        log::debug!("{:>03}, value = {}", counter, s);
297        let (_, r) = many1(sub_delims)(Elms::new(s.as_bytes())).ok().unwrap();
298        r.into_iter().collect::<String>() == s
299      },
300    );
301    prop::test_with_prop(prop, 5, TEST_COUNT, RNG::new())
302  }
303
304  #[test]
305  fn test_reserved() -> Result<()> {
306    init();
307    let mut counter = 0;
308    let prop = prop::for_all(
309      || reserved_str_gen(u8::MAX - 1),
310      move |s| {
311        counter += 1;
312        log::debug!("{:>03}, value = {}", counter, s);
313        let (_, r) = many1(reserved)(Elms::new(s.as_bytes())).ok().unwrap();
314        r.into_iter().collect::<String>() == s
315      },
316    );
317    prop::test_with_prop(prop, 5, 1000, RNG::new())
318  }
319
320  #[test]
321  fn test_pct_encoded() -> Result<()> {
322    init();
323    let mut counter = 0;
324    let prop = prop::for_all(
325      || pct_encoded_str_gen(),
326      move |s| {
327        counter += 1;
328        log::debug!("{:>03}, value = {}", counter, s);
329        let (_, r) = pct_encoded(Elms::new(s.as_bytes())).ok().unwrap();
330        r == s
331      },
332    );
333    prop::test_with_prop(prop, 5, TEST_COUNT, RNG::new())
334  }
335
336  #[test]
337  fn test_pchar() -> Result<()> {
338    init();
339    let mut counter = 0;
340    let prop = prop::for_all(
341      || pchar_str_gen(1, u8::MAX - 1),
342      move |s| {
343        counter += 1;
344        log::debug!("{:>03}, value = {}", counter, s);
345        let (_, r) = many1(pchar)(Elms::new(s.as_bytes())).ok().unwrap();
346        r.into_iter().collect::<String>() == s
347      },
348    );
349    prop::test_with_prop(prop, 5, TEST_COUNT, RNG::new())
350  }
351}