parcours/
str.rs

1//! Parsers for [`&str`] input.
2
3use crate::{from_fn, Combinator, Parser};
4
5/// Collect longest prefix of a [`&str`] whose bytes satisfy the given condition.
6pub fn take_while<S, F: FnMut(&u8, &mut S) -> bool>(f: F) -> TakeWhile<F> {
7    TakeWhile(f)
8}
9
10/// A parser returned by [`take_while`].
11#[derive(Clone)]
12pub struct TakeWhile<F>(F);
13
14impl<'a, S, F: FnMut(&u8, &mut S) -> bool> Parser<&'a str, S> for TakeWhile<F> {
15    type O = &'a str;
16
17    fn parse(mut self, input: &'a str, state: &mut S) -> Option<(Self::O, &'a str)> {
18        let len = input.bytes().take_while(|c| self.0(c, state)).count();
19        Some((&input[..len], &input[len..]))
20    }
21}
22
23type TakeWhile1<F> = crate::combinator::Filter<TakeWhile<F>, fn(&&str) -> bool>;
24
25/// Collect longest *non-empty* prefix of a [`&str`] whose bytes satisfy the given condition.
26///
27/// If the prefix is empty, this returns no output, unlike [`take_while`].
28pub fn take_while1<S, F: FnMut(&u8, &mut S) -> bool>(f: F) -> TakeWhile1<F> {
29    take_while(f).filter(|n| !n.is_empty())
30}
31
32/// If the input starts with the given string, return `()` and remove the string from the input.
33pub fn matches<'a, 'i: 'a, S>(x: &'a str) -> impl Parser<&'i str, S, O = ()> + Clone + 'a {
34    from_fn(move |input: &str, _| Some(((), input.strip_prefix(x)?)))
35}
36
37/// Subtract one string slice from another.
38///
39/// Case 1 (reading from the left; most frequent):
40///
41/// ~~~ text
42///        large
43/// -------------------
44///             small
45///        ------------===
46/// \-----/
47///    | what we want
48/// ~~~
49///
50/// Case 2 (reading from the right):
51///
52/// ~~~ text
53///           large
54///    -------------------
55///      small
56/// ===------------
57///                \-----/
58///                   | what we want
59/// ~~~
60///
61/// Here, the parts indicated by `===` are anomalies that are not expected to occur,
62/// but which are supported nonetheless by this function.
63///
64fn minus<'a>(large: &'a str, small: &'a str) -> Option<&'a str> {
65    let small_start = small.as_ptr() as usize;
66    let large_start = large.as_ptr() as usize;
67    let small_end = small_start.wrapping_add(small.len());
68    let large_end = large_start.wrapping_add(large.len());
69
70    if small_start >= large_start && small_end >= large_end {
71        Some(&large[..small_start.wrapping_sub(large_start)])
72    } else if small_start <= large_start && small_end <= large_end {
73        Some(&large[small_end.wrapping_sub(large_start)..])
74    } else {
75        None
76    }
77}
78
79/// If the beginning of `inner` lies inside `outer`, return its offset.
80///
81/// Example:
82///
83/// ~~~
84/// let outer = "Hello world!";
85/// assert_eq!(parcours::str::offset(outer, &outer[6..]), Some(6));
86/// // here, `inner` exceeds `outer`, but it's okay, because
87/// // the start of `inner` still lies within `outer`
88/// assert_eq!(parcours::str::offset(&outer[..7], &outer[6..]), Some(6));
89/// assert_eq!(parcours::str::offset(outer, "something else"), None);
90/// ~~~
91pub fn offset<'a>(outer: &'a str, inner: &'a str) -> Option<usize> {
92    let outer_ptr = outer.as_ptr() as usize;
93    let inner_ptr = inner.as_ptr() as usize;
94    if inner_ptr < outer_ptr || inner_ptr > outer_ptr.wrapping_add(outer.len()) {
95        None
96    } else {
97        Some(inner_ptr.wrapping_sub(outer_ptr))
98    }
99}
100
101/// Run the given parser and combine its output with the slice of the input string it consumed.
102///
103/// You can use this to find out via [`offset`] the span of the parsed element.
104///
105/// Example:
106///
107/// ~~~
108/// # use parcours::str::{take_while, with_consumed};
109/// # use parcours::{Parser, Combinator};
110/// let digits = take_while(|c, _| c.is_ascii_digit());
111/// let alphas = take_while(|c, _| c.is_ascii_alphabetic());
112/// let parser = with_consumed(digits.then(alphas));
113/// let input = "123abcäöü";
114/// let result = ("123", "abc");
115/// let consumed = "123abc";
116/// let rest = "äöü";
117/// assert_eq!(parser.parse(input, &mut ()), Some(((result, consumed), rest)));
118///
119/// ~~~
120pub fn with_consumed<'a, S, P: Parser<&'a str, S>>(p: P) -> WithConsumed<P> {
121    WithConsumed(p)
122}
123
124/// A parser returned by [`with_consumed`].
125#[derive(Clone)]
126pub struct WithConsumed<P>(P);
127
128impl<'a, S, P: Parser<&'a str, S>> Parser<&'a str, S> for WithConsumed<P> {
129    type O = (P::O, &'a str);
130
131    fn parse(self, input: &'a str, state: &mut S) -> Option<(Self::O, &'a str)> {
132        let (y, rest) = self.0.parse(input, state)?;
133        Some(((y, minus(input, rest)?), rest))
134    }
135}