winnow_regex/
lib.rs

1#![doc = include_str!("../README.md")]
2pub use winnow;
3
4pub mod bytes;
5pub mod regex_trait;
6
7use core::fmt::Debug;
8use core::ops::{Index, Range};
9use regex_trait::*;
10use winnow::{
11    Parser,
12    error::{Needed, ParserError},
13    stream::{Offset, Stream, StreamIsPartial},
14};
15
16#[derive(Debug, Clone, thiserror::Error)]
17#[error(transparent)]
18pub enum Error {
19    Regex(#[from] regex::Error),
20}
21
22/// A trait representing types that can be converted into a compiled [`Regex`] pattern.
23///
24/// This is used by the `regex` parser to generically accept either a `&str` or an already-compiled
25/// [`Regex`] object. Implementors of this trait can be converted into a `Regex` via the
26/// [`try_into_regex`] method, allowing flexible API usage.
27///
28/// # Associated Types
29///
30/// - `Error`: The error type returned if regex compilation fails.
31///
32/// # Required Methods
33///
34/// - `try_into_regex(self) -> Result<Regex, Self::Error>`: Attempts to compile or convert
35///   the input into a [`Regex`] object.
36pub trait RegexPattern {
37    type Error;
38    type Output;
39
40    fn try_into_regex(self) -> Result<Self::Output, Self::Error>;
41
42    /// Converts the pattern into a regex, panicking if it fails.
43    /// ## Panics
44    ///
45    /// Panics if the regex pattern fails to compile.
46    fn into_regex(self) -> Self::Output
47    where
48        Self: Sized,
49        Self::Error: Debug,
50    {
51        self.try_into_regex()
52            .unwrap_or_else(|e| panic!("failed to compile regex for bytes parser: {:?}", e))
53    }
54}
55
56impl RegexPattern for &str {
57    type Error = Error;
58    type Output = regex::Regex;
59
60    #[inline(always)]
61    fn try_into_regex(self) -> Result<Self::Output, Self::Error> {
62        Ok(Self::Output::new(self)?)
63    }
64}
65
66impl RegexPattern for String {
67    type Error = Error;
68    type Output = regex::Regex;
69
70    #[inline(always)]
71    fn try_into_regex(self) -> Result<Self::Output, Self::Error> {
72        Ok(Self::Output::new(&self)?)
73    }
74}
75
76impl RegexPattern for regex::Regex {
77    type Error = Error;
78    type Output = regex::Regex;
79
80    #[inline(always)]
81    fn try_into_regex(self) -> Result<Self::Output, Self::Error> {
82        Ok(self)
83    }
84}
85
86impl RegexPattern for regex::bytes::Regex {
87    type Error = Error;
88    type Output = regex::bytes::Regex;
89
90    #[inline(always)]
91    fn try_into_regex(self) -> Result<Self::Output, Self::Error> {
92        Ok(self)
93    }
94}
95
96pub struct Captures<Slice, L>
97where
98    L: CaptureLocations,
99{
100    slice: Slice,
101    locs: L,
102}
103
104impl<Slice, L, T: ?Sized> AsRef<T> for Captures<Slice, L>
105where
106    Slice: AsRef<T>,
107    L: CaptureLocations,
108{
109    fn as_ref(&self) -> &T {
110        self.slice.as_ref()
111    }
112}
113
114impl<Slice, L> core::ops::Index<usize> for Captures<Slice, L>
115where
116    Slice: AsRef<L::Input>,
117    L: CaptureLocations,
118    L::Input: Index<Range<usize>, Output = L::Input>,
119{
120    type Output = L::Input;
121
122    fn index(&self, i: usize) -> &Self::Output {
123        let (start, end) = self.locs.get(i).expect("index out of bounds");
124        &self.slice.as_ref()[start..end]
125    }
126}
127
128pub struct RegexParser<'h, I, R, E>
129where
130    I: Stream + StreamIsPartial + Offset + Clone,
131    R: Regex<Haystack<'h> = <I as Stream>::Slice>,
132    E: ParserError<I>,
133{
134    re: R,
135    _marker: core::marker::PhantomData<(&'h (), I, E)>,
136}
137
138impl<'h, I, R, E> Parser<I, <I as Stream>::Slice, E> for RegexParser<'h, I, R, E>
139where
140    I: Stream + StreamIsPartial + Offset + Clone,
141    R: Regex<Haystack<'h> = <I as Stream>::Slice>,
142    E: ParserError<I>,
143{
144    fn parse_next(&mut self, input: &mut I) -> Result<<I as Stream>::Slice, E> {
145        if <I as StreamIsPartial>::is_partial_supported() {
146            captures_impl::<_, _, _, true>(input, &self.re)
147        } else {
148            captures_impl::<_, _, _, false>(input, &self.re)
149        }
150        .map(|caps| caps.slice)
151    }
152}
153
154pub struct CapturesParser<'h, I, R, E>
155where
156    I: Stream,
157    R: Regex,
158    E: ParserError<I>,
159{
160    re: R,
161    _marker: core::marker::PhantomData<(&'h (), I, E)>,
162}
163
164impl<'h, I, R, E> Parser<I, Captures<<I as Stream>::Slice, R::CaptureLocations>, E>
165    for CapturesParser<'h, I, R, E>
166where
167    I: Stream + StreamIsPartial + Offset + Clone,
168    R: Regex<Haystack<'h> = <I as Stream>::Slice>,
169    E: ParserError<I>,
170{
171    fn parse_next(
172        &mut self,
173        input: &mut I,
174    ) -> Result<Captures<<I as Stream>::Slice, R::CaptureLocations>, E> {
175        if <I as StreamIsPartial>::is_partial_supported() {
176            captures_impl::<_, _, _, true>(input, &self.re)
177        } else {
178            captures_impl::<_, _, _, false>(input, &self.re)
179        }
180    }
181}
182
183/// Creates a parser that matches input using a regular expression.
184///
185/// This parser takes a regular expression pattern (implementing [`RegexPattern`])
186/// and returns a parser that attempts to match from the **beginning** of the input.
187/// If the regular expression does not match at position 0, the parser fails.
188///
189/// Internally, this uses a precompiled [`Regex`] from the [`regex`] crate and supports
190/// both complete and partial input modes via the [`StreamIsPartial`] trait.
191///
192/// # Panics
193///
194/// Panics if the regex pattern fails to compile.
195///
196/// # Example
197///
198/// ```
199/// use winnow::prelude::*;
200/// use winnow_regex::regex;
201///
202/// fn digits<'i>(s: &mut &'i str) -> ModalResult<&'i str> {
203///     regex(r"^\d+").parse_next(s)
204/// }
205///
206/// assert_eq!(digits.parse_peek("42abc"), Ok(("abc", "42")));
207/// assert!(digits.parse_peek("abc42").is_err());
208///
209/// // Example with precompiled regex
210/// fn word<'i>(s: &mut &'i str) -> ModalResult<&'i str> {
211///     let re = regex::Regex::new(r"^\w+").unwrap();
212///     regex(re).parse_next(s)
213/// }
214///
215/// assert_eq!(word.parse_peek("hello world"), Ok((" world", "hello")));
216/// assert!(word.parse_peek("!hello").is_err());
217/// ```
218#[inline(always)]
219pub fn regex<'h, Input, Re, Error>(re: Re) -> RegexParser<'h, Input, Re::Output, Error>
220where
221    Input: StreamIsPartial + Stream + Offset + Clone,
222    Re: RegexPattern,
223    Re::Output: Regex<Haystack<'h> = <Input as Stream>::Slice>,
224    Re::Error: Debug,
225    Error: ParserError<Input> + 'static,
226{
227    let re = re.into_regex();
228
229    RegexParser {
230        re,
231        _marker: core::marker::PhantomData,
232    }
233}
234
235/// # Example
236/// ```
237/// use winnow::prelude::*;
238/// use winnow_regex::{captures, Captures};
239///
240/// fn digits<'i>(s: &mut &'i str) -> ModalResult<(i32, i32)> {
241///    captures(r"^(\d+)x(\d+)").map(|c: Captures<&str, _>| (c[1].parse().unwrap(), c[2].parse().unwrap())).parse_next(s)
242/// }
243///
244/// assert_eq!(digits.parse_peek("11x42abc"), Ok(("abc", (11, 42))));
245/// ```
246#[inline(always)]
247pub fn captures<'h, Input, Re, Error>(re: Re) -> CapturesParser<'h, Input, Re::Output, Error>
248where
249    Input: StreamIsPartial + Stream + Offset + Clone,
250    Re: RegexPattern,
251    Re::Output: Regex,
252    Re::Error: Debug,
253    Error: ParserError<Input> + 'static,
254{
255    let re = re.into_regex();
256
257    CapturesParser {
258        re,
259        _marker: core::marker::PhantomData,
260    }
261}
262
263fn captures_impl<'h, I, Re, E, const PARTIAL: bool>(
264    input: &mut I,
265    re: &Re,
266) -> Result<Captures<<I as Stream>::Slice, Re::CaptureLocations>, E>
267where
268    I: Stream + StreamIsPartial + Offset + Clone,
269    Re: Regex<Haystack<'h> = <I as Stream>::Slice>,
270    E: ParserError<I>,
271{
272    let hay = input.peek_finish();
273    let mut locs = re.capture_locations();
274
275    match re.captures_read(&mut locs, hay) {
276        Some((start, end)) if start == 0 => {
277            let len = end;
278            if PARTIAL && input.is_partial() && input.eof_offset() == end {
279                Err(E::incomplete(input, Needed::Unknown))
280            } else {
281                Ok(Captures {
282                    slice: input.next_slice(len),
283                    locs,
284                })
285            }
286        }
287        _ if PARTIAL && input.is_partial() => Err(E::incomplete(input, Needed::Unknown)),
288        _ => Err(ParserError::from_input(input)),
289    }
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295    use winnow::error::{ContextError, EmptyError, ErrMode};
296    use winnow::prelude::*;
297
298    #[test]
299    fn regex_parser() {
300        let mut p: RegexParser<&str, regex::Regex, EmptyError> = RegexParser {
301            re: regex::Regex::new(r"^\d+").unwrap(),
302            _marker: core::marker::PhantomData,
303        };
304        assert_eq!(p.parse_peek("42abc"), Ok(("abc", "42")));
305    }
306
307    #[test]
308    fn ok_with_literal_pattern() {
309        fn digits<'i>(s: &mut &'i str) -> ModalResult<&'i str> {
310            regex(r"^\d+").parse_next(s)
311        }
312        assert_eq!(digits.parse_peek("42xyz"), Ok(("xyz", "42")));
313    }
314
315    #[test]
316    fn unicode_partial() {
317        let mut s = "あいう123";
318        let re = regex::<_, _, EmptyError>(r"^[ぁ-ん]+")
319            .parse_next(&mut s)
320            .unwrap();
321        assert_eq!(re, "あいう");
322    }
323
324    #[test]
325    fn partial() {
326        use winnow::stream::Partial;
327        fn partial<'i>(i: &mut Partial<&'i [u8]>) -> ModalResult<&'i [u8], ContextError> {
328            regex(regex::bytes::Regex::new(r"^\d+").unwrap()).parse_next(i)
329        }
330        assert_eq!(
331            partial.parse_peek(Partial::new(&b"123abc"[..])),
332            Ok((Partial::new(&b"abc"[..]), &b"123"[..]))
333        );
334        assert_eq!(
335            partial.parse_peek(Partial::new(&b"123"[..])),
336            Err(ErrMode::Incomplete(Needed::Unknown))
337        );
338    }
339
340    #[test]
341    fn test_re() {
342        let re = regex::Regex::new(r"\d+").unwrap();
343        assert!(re.find_at("1abc123", 0).is_some());
344        assert!(re.find_at("1abc123", 1).is_some());
345        assert!(re.find("abc123").is_some());
346    }
347}