boreal_parser/
types.rs

1use std::ops::Range;
2
3use super::error::Error;
4use nom::{
5    error::{ErrorKind, ParseError as NomParseError},
6    Compare, CompareResult, Err, IResult,
7};
8
9#[derive(Clone, Copy, Debug)]
10pub(crate) struct Input<'a> {
11    /// Whole input being parsed.
12    ///
13    /// This reference is never modified.
14    input: &'a str,
15
16    /// Cursor pointing to the string slice currently being parsed.
17    ///
18    /// This is a reference on the same slice as [`input`], updated
19    /// as we go through the parsing.
20    cursor: &'a str,
21
22    /// Saved position before the last applied rtrim.
23    cursor_before_last_rtrim: &'a str,
24
25    /// Counter on string recursion (regex and hex-string).
26    ///
27    /// This is used in combinators using recursions, but only if no other recursive combinator
28    /// can be present in it.
29    /// For example, recursion to parse hex-strings and regexes uses this counter, but recursion
30    /// to parse expressions do not (as expressions can contain regexes).
31    pub string_recursion_counter: u8,
32
33    /// Counter on expression recursion.
34    pub expr_recursion_counter: u8,
35
36    /// Parameters used during parsing.
37    pub params: Params,
38}
39
40/// Parameters used during parsing.
41#[derive(Copy, Clone, Debug)]
42pub struct Params {
43    /// Maximum value for the string recursion counter.
44    ///
45    /// When reached, an error is immediately returned to prevent any stack
46    /// overflow.
47    pub(crate) string_recursion_limit: u8,
48
49    /// Maximum value for the expression recursion counter.
50    ///
51    /// When reached, an error is immediately returned to prevent any stack
52    /// overflow.
53    pub(crate) expr_recursion_limit: u8,
54}
55
56impl Default for Params {
57    fn default() -> Self {
58        Self {
59            expr_recursion_limit: 50,
60            string_recursion_limit: 30,
61        }
62    }
63}
64
65impl Params {
66    /// Maximum recursion depth allowed when parsing an expression.
67    ///
68    /// This is a defensive limit to prevent the parsing of the rule to
69    /// trigger a stack overflow.
70    ///
71    /// The default value used for this limit should only be reached in
72    /// rules written to try to trigger a stack overflow. However, should
73    /// this limit be too low for real rules, it can be raised.
74    ///
75    /// Default value is `50`.
76    #[must_use]
77    pub fn expression_recursion_limit(mut self, limit: u8) -> Self {
78        self.expr_recursion_limit = limit;
79        self
80    }
81
82    /// Maximum recursion depth allowed when parsing a regex or a hex-string.
83    ///
84    /// This is a defensive limit to prevent the parsing of the rule to
85    /// trigger a stack overflow.
86    ///
87    /// The default value used for this limit should only be reached in
88    /// rules written to try to trigger a stack overflow. However, should
89    /// this limit be too low for real rules, it can be raised.
90    ///
91    /// Default value is `30`.
92    #[must_use]
93    pub fn string_recursion_limit(mut self, limit: u8) -> Self {
94        self.string_recursion_limit = limit;
95        self
96    }
97}
98
99/// Position inside the input.
100#[derive(Clone, Copy, Debug)]
101pub(crate) struct Position<'a> {
102    cursor: &'a str,
103}
104
105pub(crate) type ParseResult<'a, O> = IResult<Input<'a>, O, Error>;
106
107impl<'a> Input<'a> {
108    pub(crate) fn new(input: &'a str) -> Self {
109        Self::with_params(input, Params::default())
110    }
111
112    pub(crate) fn with_params(input: &'a str, params: Params) -> Self {
113        Self {
114            input,
115            cursor: input,
116            cursor_before_last_rtrim: input,
117            string_recursion_counter: 0,
118            expr_recursion_counter: 0,
119            params,
120        }
121    }
122
123    pub(crate) fn pos(&self) -> Position<'a> {
124        Position {
125            cursor: self.cursor,
126        }
127    }
128
129    pub(crate) fn cursor(&self) -> &'a str {
130        self.cursor
131    }
132
133    pub(crate) fn advance(&mut self, count: usize) {
134        if self.cursor.len() >= count {
135            self.cursor = &self.cursor[count..];
136        } else {
137            self.cursor = &self.cursor[self.cursor.len()..];
138        }
139    }
140
141    pub(crate) fn strip_prefix(&self, prefix: &str) -> Option<Self> {
142        self.cursor
143            .strip_prefix(prefix)
144            .map(|cursor| Self { cursor, ..*self })
145    }
146
147    pub(crate) fn save_cursor_before_rtrim(&mut self) {
148        self.cursor_before_last_rtrim = self.cursor;
149    }
150
151    pub(crate) fn get_position_offset(&self) -> usize {
152        (self.cursor.as_ptr() as usize) - (self.input.as_ptr() as usize)
153    }
154
155    /// Generate a span from a starting position.
156    ///
157    /// The given input is the start of the span.
158    /// The end of the span is the cursor saved before the last rtrim.
159    pub(crate) fn get_span_from(&self, start: Position) -> Range<usize> {
160        let input = self.input.as_ptr() as usize;
161
162        let start = start.cursor.as_ptr() as usize - input;
163        let end = self.cursor_before_last_rtrim.as_ptr() as usize - input;
164        if start <= end {
165            Range { start, end }
166        } else {
167            // Can happen when generating errors when entering a combinator, before any parsing is
168            // done, which is the case for recursion checks
169            Range { start, end: start }
170        }
171    }
172
173    /// Generate a span from a starting position, without considering rtrims.
174    ///
175    /// The given input is the start of the span.
176    /// The end of the span is the current position of the cursor.
177    pub(crate) fn get_span_from_no_rtrim(&self, start: Position) -> Range<usize> {
178        let input = self.input.as_ptr() as usize;
179
180        Range {
181            start: start.cursor.as_ptr() as usize - input,
182            end: self.cursor.as_ptr() as usize - input,
183        }
184    }
185}
186
187impl<'a> nom::Input for Input<'a> {
188    type Item = char;
189    type Iter = std::str::Chars<'a>;
190    type IterIndices = std::str::CharIndices<'a>;
191
192    fn input_len(&self) -> usize {
193        self.cursor.input_len()
194    }
195
196    fn take(&self, count: usize) -> Self {
197        Self {
198            cursor: self.cursor.take(count),
199            ..*self
200        }
201    }
202
203    fn take_from(&self, count: usize) -> Self {
204        Self {
205            cursor: self.cursor.take_from(count),
206            ..*self
207        }
208    }
209
210    fn take_split(&self, count: usize) -> (Self, Self) {
211        let (suffix, prefix) = self.cursor.take_split(count);
212        (
213            Self {
214                cursor: suffix,
215                ..*self
216            },
217            Self {
218                cursor: prefix,
219                ..*self
220            },
221        )
222    }
223
224    fn position<P>(&self, predicate: P) -> Option<usize>
225    where
226        P: Fn(Self::Item) -> bool,
227    {
228        self.cursor.position(predicate)
229    }
230
231    fn iter_elements(&self) -> Self::Iter {
232        self.cursor.iter_elements()
233    }
234
235    fn iter_indices(&self) -> Self::IterIndices {
236        self.cursor.iter_indices()
237    }
238
239    fn slice_index(&self, count: usize) -> Result<usize, nom::Needed> {
240        self.cursor.slice_index(count)
241    }
242
243    fn split_at_position<P, E: NomParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E>
244    where
245        P: Fn(Self::Item) -> bool,
246    {
247        match self.position(predicate) {
248            Some(n) => Ok(self.take_split(n)),
249            None => Err(Err::Incomplete(nom::Needed::new(1))),
250        }
251    }
252
253    fn split_at_position1<P, E: NomParseError<Self>>(
254        &self,
255        predicate: P,
256        e: ErrorKind,
257    ) -> IResult<Self, Self, E>
258    where
259        P: Fn(Self::Item) -> bool,
260    {
261        match self.position(predicate) {
262            Some(0) => Err(Err::Error(E::from_error_kind(*self, e))),
263            Some(n) => Ok(self.take_split(n)),
264            None => Err(Err::Incomplete(nom::Needed::new(1))),
265        }
266    }
267
268    fn split_at_position_complete<P, E: NomParseError<Self>>(
269        &self,
270        predicate: P,
271    ) -> IResult<Self, Self, E>
272    where
273        P: Fn(Self::Item) -> bool,
274    {
275        match self.split_at_position(predicate) {
276            Err(Err::Incomplete(_)) => Ok(self.take_split(self.input_len())),
277            res => res,
278        }
279    }
280
281    fn split_at_position1_complete<P, E: NomParseError<Self>>(
282        &self,
283        predicate: P,
284        e: ErrorKind,
285    ) -> IResult<Self, Self, E>
286    where
287        P: Fn(Self::Item) -> bool,
288    {
289        match self.position(predicate) {
290            Some(0) => Err(Err::Error(E::from_error_kind(*self, e))),
291            Some(n) => Ok(self.take_split(n)),
292            None => {
293                if self.input_len() == 0 {
294                    Err(Err::Error(E::from_error_kind(*self, e)))
295                } else {
296                    Ok(self.take_split(self.input_len()))
297                }
298            }
299        }
300    }
301}
302
303impl<'a> nom::FindSubstring<&'a str> for Input<'_> {
304    fn find_substring(&self, substr: &'a str) -> Option<usize> {
305        self.cursor.find_substring(substr)
306    }
307}
308
309impl<'a> Compare<&'a str> for Input<'_> {
310    fn compare(&self, t: &'a str) -> CompareResult {
311        self.cursor.compare(t)
312    }
313
314    fn compare_no_case(&self, t: &'a str) -> CompareResult {
315        self.cursor.compare_no_case(t)
316    }
317}
318
319impl nom::Offset for Input<'_> {
320    fn offset(&self, second: &Self) -> usize {
321        self.cursor.offset(second.cursor())
322    }
323}
324
325impl std::ops::Deref for Input<'_> {
326    type Target = str;
327
328    fn deref(&self) -> &Self::Target {
329        self.cursor
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use nom::error::ErrorKind;
336    use nom::{Compare, CompareResult, Input};
337
338    use crate::error::Error;
339    use crate::test_helpers::test_public_type;
340
341    use super::Input as I;
342
343    #[test]
344    fn test_input_advance() {
345        let mut input = I::new("rule a { condition: true }");
346
347        input.advance(0);
348        assert_eq!(input.cursor(), "rule a { condition: true }");
349        assert_eq!(input.get_position_offset(), 0);
350        input.advance(1);
351        assert_eq!(input.cursor(), "ule a { condition: true }");
352        assert_eq!(input.get_position_offset(), 1);
353        input.advance(20);
354        assert_eq!(input.cursor(), "rue }");
355        assert_eq!(input.get_position_offset(), 21);
356        input.advance(9);
357        assert_eq!(input.cursor(), "");
358        assert_eq!(input.get_position_offset(), 26);
359        input.advance(9);
360        assert_eq!(input.cursor(), "");
361        assert_eq!(input.get_position_offset(), 26);
362    }
363
364    #[test]
365    fn test_input_strip_prefix() {
366        let input = I::new("rule a { condition: true }");
367
368        let input = input.strip_prefix("rule").unwrap();
369        assert_eq!(input.cursor(), " a { condition: true }");
370        assert_eq!(input.get_position_offset(), 4);
371
372        assert!(input.strip_prefix("condition").is_none());
373
374        let input = input.strip_prefix(" a { condition: ").unwrap();
375        assert_eq!(input.cursor(), "true }");
376        assert_eq!(input.get_position_offset(), 20);
377    }
378
379    #[test]
380    fn test_input_take_trait() {
381        let mut input = I::new("rule a { condition: true }");
382
383        let take = input.take(15);
384        assert_eq!(take.cursor(), "rule a { condit");
385        assert_eq!(take.get_position_offset(), 0);
386
387        let take = input.take_from(15);
388        assert_eq!(take.cursor(), "ion: true }");
389        assert_eq!(take.get_position_offset(), 15);
390
391        input.advance(7);
392        let (post, pre) = input.take_split(13);
393        assert_eq!(pre.cursor(), "{ condition: ");
394        assert_eq!(pre.get_position_offset(), 7);
395        assert_eq!(post.cursor(), "true }");
396        assert_eq!(post.get_position_offset(), 20);
397    }
398
399    #[test]
400    fn test_input_take_at_position_trait() {
401        let mut input = I::new("rule a { condition: true }");
402        input.advance(5);
403
404        // split_at_position
405        assert!(input.split_at_position::<_, Error>(|c| c == '/').is_err());
406
407        let (post, pre) = input.split_at_position::<_, Error>(|c| c == ':').unwrap();
408        assert_eq!(pre.cursor(), "a { condition");
409        assert_eq!(pre.get_position_offset(), 5);
410        assert_eq!(post.cursor(), ": true }");
411        assert_eq!(post.get_position_offset(), 18);
412
413        let (post, pre) = input.split_at_position::<_, Error>(|c| c == 'a').unwrap();
414        assert_eq!(pre.cursor(), "");
415        assert_eq!(pre.get_position_offset(), 5);
416        assert_eq!(post.cursor(), "a { condition: true }");
417        assert_eq!(post.get_position_offset(), 5);
418
419        // split_at_position1
420        assert!(input
421            .split_at_position1::<_, Error>(|c| c == '/', ErrorKind::Char)
422            .is_err());
423
424        let (post, pre) = input
425            .split_at_position1::<_, Error>(|c| c == ':', ErrorKind::Char)
426            .unwrap();
427        assert_eq!(pre.cursor(), "a { condition");
428        assert_eq!(pre.get_position_offset(), 5);
429        assert_eq!(post.cursor(), ": true }");
430        assert_eq!(post.get_position_offset(), 18);
431
432        assert!(input
433            .split_at_position1::<_, Error>(|c| c == 'a', ErrorKind::Char)
434            .is_err());
435
436        // split_at_position_complete
437        let (post, pre) = input
438            .split_at_position_complete::<_, Error>(|c| c == '/')
439            .unwrap();
440        assert_eq!(pre.cursor(), "a { condition: true }");
441        assert_eq!(pre.get_position_offset(), 5);
442        assert_eq!(post.cursor(), "");
443        assert_eq!(post.get_position_offset(), 26);
444
445        let (post, pre) = input
446            .split_at_position_complete::<_, Error>(|c| c == ':')
447            .unwrap();
448        assert_eq!(pre.cursor(), "a { condition");
449        assert_eq!(pre.get_position_offset(), 5);
450        assert_eq!(post.cursor(), ": true }");
451        assert_eq!(post.get_position_offset(), 18);
452
453        let (post, pre) = input
454            .split_at_position_complete::<_, Error>(|c| c == 'a')
455            .unwrap();
456        assert_eq!(pre.cursor(), "");
457        assert_eq!(pre.get_position_offset(), 5);
458        assert_eq!(post.cursor(), "a { condition: true }");
459        assert_eq!(post.get_position_offset(), 5);
460
461        // split_at_position1_complete
462        let (post, pre) = input
463            .split_at_position1_complete::<_, Error>(|c| c == '/', ErrorKind::Char)
464            .unwrap();
465        assert_eq!(pre.cursor(), "a { condition: true }");
466        assert_eq!(pre.get_position_offset(), 5);
467        assert_eq!(post.cursor(), "");
468        assert_eq!(post.get_position_offset(), 26);
469
470        let (post, pre) = input
471            .split_at_position1_complete::<_, Error>(|c| c == ':', ErrorKind::Char)
472            .unwrap();
473        assert_eq!(pre.cursor(), "a { condition");
474        assert_eq!(pre.get_position_offset(), 5);
475        assert_eq!(post.cursor(), ": true }");
476        assert_eq!(post.get_position_offset(), 18);
477
478        assert!(input
479            .split_at_position1_complete::<_, Error>(|c| c == 'a', ErrorKind::Char)
480            .is_err());
481    }
482
483    #[test]
484    fn test_input_iter() {
485        let input = I::new("condition: true");
486        let (post, pre) = input.take_split(7);
487
488        assert_eq!(
489            pre.iter_elements().collect::<Vec<_>>(),
490            ['c', 'o', 'n', 'd', 'i', 't', 'i']
491        );
492        assert_eq!(
493            post.iter_indices().collect::<Vec<_>>(),
494            [
495                (0, 'o'),
496                (1, 'n'),
497                (2, ':'),
498                (3, ' '),
499                (4, 't'),
500                (5, 'r'),
501                (6, 'u'),
502                (7, 'e'),
503            ]
504        );
505    }
506
507    #[test]
508    fn test_compare_trait() {
509        let mut input = I::new("rule a { condition: true }");
510        input.advance(9);
511
512        assert_eq!(input.compare("true"), CompareResult::Error);
513        assert_eq!(input.compare("condition"), CompareResult::Ok);
514        assert_eq!(input.compare("CONDITION"), CompareResult::Error);
515        assert_eq!(
516            input.take(5).compare("condition"),
517            CompareResult::Incomplete
518        );
519        assert_eq!(input.take(5).compare("CONDITION"), CompareResult::Error);
520
521        assert_eq!(input.compare_no_case("true"), CompareResult::Error);
522        assert_eq!(input.compare_no_case("condition"), CompareResult::Ok);
523        assert_eq!(input.compare_no_case("CONDITION"), CompareResult::Ok);
524        assert_eq!(
525            input.take(5).compare_no_case("condition"),
526            CompareResult::Incomplete
527        );
528        assert_eq!(
529            input.take(5).compare_no_case("CONDITION"),
530            CompareResult::Incomplete
531        );
532    }
533
534    #[test]
535    fn test_public_types() {
536        test_public_type(I::new(r"a"));
537        test_public_type(I::new(r"a").pos());
538    }
539}