1#![doc = include_str!("../README.md")]
2pub use winnow;
3
4pub mod bytes;
5pub mod regex_trait;
6
7use core::fmt::Debug;
8use core::ops::{Index, Range};
9use regex_trait::*;
10use winnow::{
11 Parser,
12 error::{Needed, ParserError},
13 stream::{Offset, Stream, StreamIsPartial},
14};
15
16#[derive(Debug, Clone, thiserror::Error)]
17#[error(transparent)]
18pub enum Error {
19 Regex(#[from] regex::Error),
20}
21
22pub trait RegexPattern {
37 type Error;
38 type Output;
39
40 fn try_into_regex(self) -> Result<Self::Output, Self::Error>;
41
42 fn into_regex(self) -> Self::Output
47 where
48 Self: Sized,
49 Self::Error: Debug,
50 {
51 self.try_into_regex()
52 .unwrap_or_else(|e| panic!("failed to compile regex for bytes parser: {:?}", e))
53 }
54}
55
56impl RegexPattern for &str {
57 type Error = Error;
58 type Output = regex::Regex;
59
60 #[inline(always)]
61 fn try_into_regex(self) -> Result<Self::Output, Self::Error> {
62 Ok(Self::Output::new(self)?)
63 }
64}
65
66impl RegexPattern for String {
67 type Error = Error;
68 type Output = regex::Regex;
69
70 #[inline(always)]
71 fn try_into_regex(self) -> Result<Self::Output, Self::Error> {
72 Ok(Self::Output::new(&self)?)
73 }
74}
75
76impl RegexPattern for regex::Regex {
77 type Error = Error;
78 type Output = regex::Regex;
79
80 #[inline(always)]
81 fn try_into_regex(self) -> Result<Self::Output, Self::Error> {
82 Ok(self)
83 }
84}
85
86impl RegexPattern for regex::bytes::Regex {
87 type Error = Error;
88 type Output = regex::bytes::Regex;
89
90 #[inline(always)]
91 fn try_into_regex(self) -> Result<Self::Output, Self::Error> {
92 Ok(self)
93 }
94}
95
96pub struct Captures<Slice, L>
97where
98 L: CaptureLocations,
99{
100 slice: Slice,
101 locs: L,
102}
103
104impl<Slice, L, T: ?Sized> AsRef<T> for Captures<Slice, L>
105where
106 Slice: AsRef<T>,
107 L: CaptureLocations,
108{
109 fn as_ref(&self) -> &T {
110 self.slice.as_ref()
111 }
112}
113
114impl<Slice, L> core::ops::Index<usize> for Captures<Slice, L>
115where
116 Slice: AsRef<L::Input>,
117 L: CaptureLocations,
118 L::Input: Index<Range<usize>, Output = L::Input>,
119{
120 type Output = L::Input;
121
122 fn index(&self, i: usize) -> &Self::Output {
123 let (start, end) = self.locs.get(i).expect("index out of bounds");
124 &self.slice.as_ref()[start..end]
125 }
126}
127
128pub struct RegexParser<'h, I, R, E>
129where
130 I: Stream + StreamIsPartial + Offset + Clone,
131 R: Regex<Haystack<'h> = <I as Stream>::Slice>,
132 E: ParserError<I>,
133{
134 re: R,
135 _marker: core::marker::PhantomData<(&'h (), I, E)>,
136}
137
138impl<'h, I, R, E> Parser<I, <I as Stream>::Slice, E> for RegexParser<'h, I, R, E>
139where
140 I: Stream + StreamIsPartial + Offset + Clone,
141 R: Regex<Haystack<'h> = <I as Stream>::Slice>,
142 E: ParserError<I>,
143{
144 fn parse_next(&mut self, input: &mut I) -> Result<<I as Stream>::Slice, E> {
145 if <I as StreamIsPartial>::is_partial_supported() {
146 captures_impl::<_, _, _, true>(input, &self.re)
147 } else {
148 captures_impl::<_, _, _, false>(input, &self.re)
149 }
150 .map(|caps| caps.slice)
151 }
152}
153
154pub struct CapturesParser<'h, I, R, E>
155where
156 I: Stream,
157 R: Regex,
158 E: ParserError<I>,
159{
160 re: R,
161 _marker: core::marker::PhantomData<(&'h (), I, E)>,
162}
163
164impl<'h, I, R, E> Parser<I, Captures<<I as Stream>::Slice, R::CaptureLocations>, E>
165 for CapturesParser<'h, I, R, E>
166where
167 I: Stream + StreamIsPartial + Offset + Clone,
168 R: Regex<Haystack<'h> = <I as Stream>::Slice>,
169 E: ParserError<I>,
170{
171 fn parse_next(
172 &mut self,
173 input: &mut I,
174 ) -> Result<Captures<<I as Stream>::Slice, R::CaptureLocations>, E> {
175 if <I as StreamIsPartial>::is_partial_supported() {
176 captures_impl::<_, _, _, true>(input, &self.re)
177 } else {
178 captures_impl::<_, _, _, false>(input, &self.re)
179 }
180 }
181}
182
183#[inline(always)]
219pub fn regex<'h, Input, Re, Error>(re: Re) -> RegexParser<'h, Input, Re::Output, Error>
220where
221 Input: StreamIsPartial + Stream + Offset + Clone,
222 Re: RegexPattern,
223 Re::Output: Regex<Haystack<'h> = <Input as Stream>::Slice>,
224 Re::Error: Debug,
225 Error: ParserError<Input> + 'static,
226{
227 let re = re.into_regex();
228
229 RegexParser {
230 re,
231 _marker: core::marker::PhantomData,
232 }
233}
234
235#[inline(always)]
247pub fn captures<'h, Input, Re, Error>(re: Re) -> CapturesParser<'h, Input, Re::Output, Error>
248where
249 Input: StreamIsPartial + Stream + Offset + Clone,
250 Re: RegexPattern,
251 Re::Output: Regex,
252 Re::Error: Debug,
253 Error: ParserError<Input> + 'static,
254{
255 let re = re.into_regex();
256
257 CapturesParser {
258 re,
259 _marker: core::marker::PhantomData,
260 }
261}
262
263fn captures_impl<'h, I, Re, E, const PARTIAL: bool>(
264 input: &mut I,
265 re: &Re,
266) -> Result<Captures<<I as Stream>::Slice, Re::CaptureLocations>, E>
267where
268 I: Stream + StreamIsPartial + Offset + Clone,
269 Re: Regex<Haystack<'h> = <I as Stream>::Slice>,
270 E: ParserError<I>,
271{
272 let hay = input.peek_finish();
273 let mut locs = re.capture_locations();
274
275 match re.captures_read(&mut locs, hay) {
276 Some((start, end)) if start == 0 => {
277 let len = end;
278 if PARTIAL && input.is_partial() && input.eof_offset() == end {
279 Err(E::incomplete(input, Needed::Unknown))
280 } else {
281 Ok(Captures {
282 slice: input.next_slice(len),
283 locs,
284 })
285 }
286 }
287 _ if PARTIAL && input.is_partial() => Err(E::incomplete(input, Needed::Unknown)),
288 _ => Err(ParserError::from_input(input)),
289 }
290}
291
292#[cfg(test)]
293mod tests {
294 use super::*;
295 use winnow::error::{ContextError, EmptyError, ErrMode};
296 use winnow::prelude::*;
297
298 #[test]
299 fn regex_parser() {
300 let mut p: RegexParser<&str, regex::Regex, EmptyError> = RegexParser {
301 re: regex::Regex::new(r"^\d+").unwrap(),
302 _marker: core::marker::PhantomData,
303 };
304 assert_eq!(p.parse_peek("42abc"), Ok(("abc", "42")));
305 }
306
307 #[test]
308 fn ok_with_literal_pattern() {
309 fn digits<'i>(s: &mut &'i str) -> ModalResult<&'i str> {
310 regex(r"^\d+").parse_next(s)
311 }
312 assert_eq!(digits.parse_peek("42xyz"), Ok(("xyz", "42")));
313 }
314
315 #[test]
316 fn unicode_partial() {
317 let mut s = "あいう123";
318 let re = regex::<_, _, EmptyError>(r"^[ぁ-ん]+")
319 .parse_next(&mut s)
320 .unwrap();
321 assert_eq!(re, "あいう");
322 }
323
324 #[test]
325 fn partial() {
326 use winnow::stream::Partial;
327 fn partial<'i>(i: &mut Partial<&'i [u8]>) -> ModalResult<&'i [u8], ContextError> {
328 regex(regex::bytes::Regex::new(r"^\d+").unwrap()).parse_next(i)
329 }
330 assert_eq!(
331 partial.parse_peek(Partial::new(&b"123abc"[..])),
332 Ok((Partial::new(&b"abc"[..]), &b"123"[..]))
333 );
334 assert_eq!(
335 partial.parse_peek(Partial::new(&b"123"[..])),
336 Err(ErrMode::Incomplete(Needed::Unknown))
337 );
338 }
339
340 #[test]
341 fn test_re() {
342 let re = regex::Regex::new(r"\d+").unwrap();
343 assert!(re.find_at("1abc123", 0).is_some());
344 assert!(re.find_at("1abc123", 1).is_some());
345 assert!(re.find("abc123").is_some());
346 }
347}