async_regex/
lib.rs

1//! # async-regex
2//! 
3//! **Empower regex with streaming capabilities!**
4//! 
5//! This crate provides high-performance streaming pattern search that leverages the power of regex
6//! for pattern matching while adding streaming capabilities for processing large data streams
7//! without loading everything into memory.
8//! 
9//! ## Key Features
10//! 
11//! - **Regex-powered pattern matching**: Uses the robust `regex` crate for pattern matching
12//! - **Streaming support**: Process data as it arrives without loading everything into memory
13//! - **Async and sync APIs**: Both async and synchronous versions available
14//! - **Multi-byte pattern support**: Unlike standard `read_until` which only supports single bytes
15//! - **Memory efficient**: Minimal memory footprint with zero-copy operations where possible
16//! 
17//! ## Why This Crate?
18//! 
19//! The standard `std::io::BufRead::read_until` and `tokio::io::AsyncBufRead::read_until` methods
20//! only support single-byte delimiters. This crate extends that functionality to support
21//! multi-byte patterns using regex, making it perfect for parsing protocols, log files,
22//! and other structured data streams.
23
24use std::io;
25use std::io::BufRead;
26use std::pin::Pin;
27
28use futures::{AsyncBufRead, AsyncBufReadExt};
29use regex::bytes::Regex;
30
31/// Carriage Return + Line Feed constant for convenience
32pub const CRLF: &[u8] = &[0xd, 0xa];
33
34
35/// Read from a BufRead stream until a regex pattern is found.
36/// 
37/// This function extends the standard `read_until` functionality to support multi-byte patterns
38/// using regex. It reads data from the stream until the specified regex pattern is found,
39/// storing all data up to and including the match in the provided buffer.
40/// 
41/// # Arguments
42/// 
43/// * `reader` - A BufRead stream to read from
44/// * `pattern` - A regex pattern string to search for
45/// * `to` - Buffer to store the read data
46/// 
47/// # Returns
48/// 
49/// Returns a tuple containing:
50/// - The matched substring as bytes
51/// - The total number of bytes read
52/// 
53/// # Examples
54/// 
55/// ```rust
56/// use async_regex::read_until_pattern;
57/// use std::io::Cursor;
58/// 
59/// let mut reader = Cursor::new(b"hello world test pattern");
60/// let mut buffer = Vec::new();
61/// 
62/// let (matched, size) = read_until_pattern(&mut reader, r"\w+", &mut buffer).unwrap();
63/// assert_eq!(matched, b"hello");
64/// assert_eq!(buffer, b"hello");
65/// ```
66pub fn read_until_pattern(
67    reader: &mut impl BufRead,
68    pattern: &str,
69    to: &mut Vec<u8>,
70) -> io::Result<(Vec<u8>, usize)> {
71    if pattern.is_empty() {
72        return Ok((Vec::new(), 0));
73    }
74    let regex = Regex::new(pattern).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
75    let original_len = to.len();
76    let mut search_buffer = Vec::new();
77
78    loop {
79        let (found_pattern, consume_amount, matched_substring) = {
80            let available = reader.fill_buf()?;
81            if available.is_empty() {
82                (false, 0, Vec::new())
83            } else {
84                let available_len = available.len();
85                search_buffer.extend_from_slice(available);
86
87                if let Some((pos, match_len)) = find_pattern(&search_buffer, &regex) {
88                    let end_pos = pos + match_len;
89                    to.extend_from_slice(&search_buffer[..end_pos]);
90                    
91                    let matched = search_buffer[pos..end_pos].to_vec();
92                    let consume_from_current =
93                        end_pos.saturating_sub(search_buffer.len() - available_len);
94                    (true, consume_from_current, matched)
95                } else {
96                    let keep_size = (pattern.len() - 1).min(search_buffer.len());
97
98                    if search_buffer.len() > keep_size {
99                        let move_to_output = search_buffer.len() - keep_size;
100                        to.extend_from_slice(&search_buffer[..move_to_output]);
101                        search_buffer.drain(..move_to_output);
102                    }
103
104                    (false, available_len, Vec::new())
105                }
106            }
107        };
108
109        if found_pattern {
110            reader.consume(consume_amount);
111            return Ok((matched_substring, to.len() - original_len));
112        }
113
114        if consume_amount == 0 {
115            to.extend_from_slice(&search_buffer);
116            break;
117        }
118
119        reader.consume(consume_amount);
120    }
121
122    Ok((Vec::new(), to.len() - original_len))
123}
124
125/// Find a regex pattern in a byte slice.
126/// 
127/// This is a utility function that uses regex to find patterns in byte data.
128/// It's used internally by the streaming functions but can also be used directly
129/// for in-memory pattern matching.
130/// 
131/// # Arguments
132/// 
133/// * `haystack` - The byte slice to search in
134/// * `needle` - The compiled regex pattern to search for
135/// 
136/// # Returns
137/// 
138/// Returns `Some((start, length))` if the pattern is found, `None` otherwise.
139/// 
140/// # Examples
141/// 
142/// ```rust
143/// use async_regex::find_pattern;
144/// use regex::bytes::Regex;
145/// 
146/// let regex = Regex::new(r"\w+").unwrap();
147/// let data = b"hello world";
148/// 
149/// if let Some((start, len)) = find_pattern(data, &regex) {
150///     println!("Found pattern at position {} with length {}", start, len);
151/// }
152/// ```
153#[inline]
154pub fn find_pattern(haystack: &[u8], needle: &Regex) -> Option<(usize, usize)> {
155    needle.find(haystack).map(|m| (m.start(), m.len()))
156}
157
158/// Async version of `read_until_pattern` for streaming data.
159/// 
160/// This function provides the same functionality as `read_until_pattern` but works with
161/// async streams, making it perfect for processing network data, file streams, or any
162/// other async I/O operations.
163/// 
164/// # Arguments
165/// 
166/// * `reader` - An AsyncBufRead stream to read from
167/// * `pattern` - A regex pattern string to search for
168/// * `to` - Buffer to store the read data
169/// 
170/// # Returns
171/// 
172/// Returns a tuple containing:
173/// - The matched substring as bytes
174/// - The total number of bytes read
175/// 
176/// # Examples
177/// 
178/// ```rust
179/// use async_regex::read_until_pattern_async;
180/// use futures::io::Cursor;
181/// use tokio::runtime::Runtime;
182/// 
183/// let rt = Runtime::new().unwrap();
184/// rt.block_on(async {
185///     let mut reader = Cursor::new(b"hello world test pattern");
186///     let mut buffer = Vec::new();
187/// 
188///     let (matched, size) = read_until_pattern_async(&mut reader, r"\w+", &mut buffer).await.unwrap();
189///     assert_eq!(matched, b"hello");
190///     assert_eq!(buffer, b"hello");
191/// });
192/// ```
193pub async fn read_until_pattern_async<R: AsyncBufRead + Unpin>(
194    reader: &mut R,
195    pattern: &str,
196    to: &mut Vec<u8>,
197) -> io::Result<(Vec<u8>, usize)> {
198    if pattern.is_empty() {
199        return Ok((Vec::new(), 0));
200    }
201    let regex = Regex::new(pattern).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
202    let original_len = to.len();
203    let mut search_buffer = Vec::new();
204
205    loop {
206        let (found_pattern, consume_amount, matched_substring) = {
207            let available = reader.fill_buf().await?;
208            if available.is_empty() {
209                (false, 0, Vec::new())
210            } else {
211                let available_len = available.len();
212
213                search_buffer.extend_from_slice(available);
214
215                if let Some((pos, match_len)) = find_pattern(&search_buffer, &regex) {
216                    let end_pos = pos + match_len;
217                    to.extend_from_slice(&search_buffer[..end_pos]);
218                    
219                    // Extract the matched substring
220                    let matched = search_buffer[pos..end_pos].to_vec();
221
222                    let consume_from_current =
223                        end_pos.saturating_sub(search_buffer.len() - available_len);
224                    (true, consume_from_current, matched)
225                } else {
226                    let keep_size = (pattern.len() - 1).min(search_buffer.len());
227
228                    if search_buffer.len() > keep_size {
229                        let move_to_output = search_buffer.len() - keep_size;
230                        to.extend_from_slice(&search_buffer[..move_to_output]);
231                        search_buffer.drain(..move_to_output);
232                    }
233
234                    (false, available_len, Vec::new())
235                }
236            }
237        };
238
239        if found_pattern {
240            Pin::new(&mut *reader).consume(consume_amount);
241            return Ok((matched_substring, to.len() - original_len));
242        }
243
244        if consume_amount == 0 {
245            to.extend_from_slice(&search_buffer);
246            break;
247        }
248
249        Pin::new(&mut *reader).consume(consume_amount);
250    }
251
252    Ok((Vec::new(), to.len() - original_len))
253}
254
255
256/// Read from a BufRead stream while any of the specified bytes are found.
257/// 
258/// This function reads data from the stream while the current byte matches any byte
259/// in the provided check set. It stops when it encounters a byte not in the set.
260/// 
261/// # Arguments
262/// 
263/// * `reader` - A BufRead stream to read from
264/// * `check_set` - A slice of bytes to match against
265/// * `to` - Buffer to store the read data
266/// 
267/// # Returns
268/// 
269/// Returns a tuple containing:
270/// - The first byte that didn't match (the stop byte)
271/// - The number of bytes read
272/// 
273/// # Examples
274/// 
275/// ```rust
276/// use async_regex::read_while_any;
277/// use std::io::Cursor;
278/// 
279/// let mut reader = Cursor::new(b"aaaaabbbccc");
280/// let mut buffer = Vec::new();
281/// let check_set = b"ab";
282/// 
283/// let (stop_byte, count) = read_while_any(&mut reader, check_set, &mut buffer).unwrap();
284/// assert_eq!(buffer, b"aaaaabbb");
285/// assert_eq!(stop_byte, b'c');
286/// ```
287pub fn read_while_any(
288    reader: &mut impl BufRead,
289    check_set: &[u8],
290    to: &mut Vec<u8>,
291) -> io::Result<(u8, usize)> {
292    if check_set.is_empty() {
293        let available = reader.fill_buf()?;
294        if available.is_empty() {
295            return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "EOF reached"));
296        }
297        let first_byte = available[0];
298        reader.consume(1);
299        return Ok((first_byte, 0));
300    }
301
302    let original_len = to.len();
303
304    let mut lookup = [false; 256];
305    for &byte in check_set {
306        lookup[byte as usize] = true;
307    }
308
309    loop {
310        let (stop_byte, consume_amount) = {
311            let available = reader.fill_buf()?;
312            if available.is_empty() {
313                return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "EOF reached"));
314            }
315            let mut pos = 0;
316            while pos < available.len() {
317                let byte = available[pos];
318                if !lookup[byte as usize] {
319                    to.extend_from_slice(&available[..pos]);
320                    break;
321                }
322                pos += 1;
323            }
324
325            if pos < available.len() {
326                (Some(available[pos]), pos)
327            } else {
328                to.extend_from_slice(available);
329                (None, available.len())
330            }
331        };
332
333        reader.consume(consume_amount);
334
335        if let Some(byte) = stop_byte {
336            return Ok((byte, to.len() - original_len));
337        }
338    }
339}
340
341/// Async version of `read_while_any` for streaming data.
342/// 
343/// This function provides the same functionality as `read_while_any` but works with
344/// async streams, making it perfect for processing network data, file streams, or any
345/// other async I/O operations.
346/// 
347/// # Arguments
348/// 
349/// * `reader` - An AsyncBufRead stream to read from
350/// * `check_set` - A slice of bytes to match against
351/// * `to` - Buffer to store the read data
352/// 
353/// # Returns
354/// 
355/// Returns a tuple containing:
356/// - The first byte that didn't match (the stop byte)
357/// - The number of bytes read
358/// 
359/// # Examples
360/// 
361/// ```rust
362/// use async_regex::read_while_any_async;
363/// use futures::io::Cursor;
364/// use tokio::runtime::Runtime;
365/// 
366/// let rt = Runtime::new().unwrap();
367/// rt.block_on(async {
368///     let mut reader = Cursor::new(b"aaaaabbbccc");
369///     let mut buffer = Vec::new();
370///     let check_set = b"ab";
371/// 
372///     let (stop_byte, count) = read_while_any_async(&mut reader, check_set, &mut buffer).await.unwrap();
373///     assert_eq!(buffer, b"aaaaabbb");
374///     assert_eq!(stop_byte, b'c');
375/// });
376/// ```
377pub async fn read_while_any_async<R: AsyncBufRead + Unpin>(
378    reader: &mut R,
379    check_set: &[u8],
380    to: &mut Vec<u8>,
381) -> io::Result<(u8, usize)> {
382    if check_set.is_empty() {
383        let available = reader.fill_buf().await?;
384        if available.is_empty() {
385            return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "EOF reached"));
386        }
387        let first_byte = available[0];
388        Pin::new(&mut *reader).consume(1);
389        return Ok((first_byte, 0));
390    }
391
392    let original_len = to.len();
393    let mut lookup = [false; 256];
394    for &byte in check_set {
395        lookup[byte as usize] = true;
396    }
397
398    loop {
399        let (stop_byte, consume_amount) = {
400            let available = reader.fill_buf().await?;
401            if available.is_empty() {
402                return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "EOF reached"));
403            }
404            let mut pos = 0;
405            while pos < available.len() {
406                let byte = available[pos];
407                if !lookup[byte as usize] {
408                    to.extend_from_slice(&available[..pos]);
409                    break;
410                }
411                pos += 1;
412            }
413
414            if pos < available.len() {
415                (Some(available[pos]), pos)
416            } else {
417                to.extend_from_slice(available);
418                (None, available.len())
419            }
420        };
421
422        Pin::new(&mut *reader).consume(consume_amount);
423
424        if let Some(byte) = stop_byte {
425            return Ok((byte, to.len() - original_len));
426        }
427    }
428}
429
430#[cfg(test)]
431mod tests {
432    use futures::executor::block_on;
433
434    use super::*;
435    use std::io;
436    use std::io::Cursor;
437    use std::str;
438    pub const CDATA_TAG: &str = "![CDATA[";
439    pub const COMMENT_TAG: &str = "!--";
440
441    #[test]
442    fn test_read_until_pattern() -> io::Result<()> {
443        let mut string = "<a b:b1 c:c1 d:d1 />\n<a b:b2 c:c2 d:d2 />".as_bytes();
444        let mut to = Vec::new();
445        let (matched, size) = read_until_pattern(&mut string, "c:c2", &mut to)?;
446        str::from_utf8(&to).unwrap();
447        assert_eq!(
448            str::from_utf8(&to).unwrap(),
449            "<a b:b1 c:c1 d:d1 />\n<a b:b2 c:c2"
450        );
451        assert_eq!(size, "<a b:b1 c:c1 d:d1 />\n<a b:b2 c:c2".len());
452        assert_eq!(matched, "c:c2".as_bytes());
453
454        let mut string = "<a b:b1 c:c1 d:d1 />\n<anode b:b2 c:c2 d:d2 />".as_bytes();
455        let mut to = Vec::new();
456        let (matched, size) = read_until_pattern(&mut string, "<anode", &mut to)?;
457        assert_eq!(str::from_utf8(&to).unwrap(), "<a b:b1 c:c1 d:d1 />\n<anode");
458        assert_eq!(size, "<a b:b1 c:c1 d:d1 />\n<anode".len());
459        assert_eq!(matched, "<anode".as_bytes());
460
461        let mut string = "<a b:b1 c:c1 d:d1 />\n<anode b:b2 c:c2 d:d2 />".as_bytes();
462        let mut to = Vec::new();
463        let (matched, size) = read_until_pattern(&mut string, "<bnode", &mut to)?;
464        assert_eq!(string, "".as_bytes());
465        assert_eq!(
466            to,
467            "<a b:b1 c:c1 d:d1 />\n<anode b:b2 c:c2 d:d2 />".as_bytes()
468        );
469        assert_eq!(size, 45);
470        assert_eq!(matched, Vec::<u8>::new());
471
472        let mut string = "1-x-xx2".as_bytes();
473        let mut to = Vec::new();
474        let (matched, size) = read_until_pattern(&mut string, "-xx", &mut to)?;
475        assert_eq!(string, "2".as_bytes());
476        assert_eq!(to, "1-x-xx".as_bytes());
477        assert_eq!(size, 6);
478        assert_eq!(matched, "-xx".as_bytes());
479
480        let mut string = "$1131132$".as_bytes();
481        let mut to = Vec::new();
482        let (matched, size) = read_until_pattern(&mut string, "1132", &mut to)?;
483        assert_eq!(string, "$".as_bytes());
484        assert_eq!(to, "$1131132".as_bytes());
485        assert_eq!(size, 8);
486        assert_eq!(matched, "1132".as_bytes());
487
488        let mut string = "12".as_bytes();
489        let mut to = Vec::new();
490        let (matched, size) = read_until_pattern(&mut string, "13", &mut to)?;
491        assert_eq!(string, "".as_bytes());
492        assert_eq!(to, "12".as_bytes());
493        assert_eq!(size, 2);
494        assert_eq!(matched, Vec::<u8>::new());
495
496        let mut string = "222222".as_bytes();
497        let mut to = Vec::new();
498        let (matched, size) = read_until_pattern(&mut string, "33333333", &mut to)?;
499        assert_eq!(string, "".as_bytes());
500        assert_eq!(to, "222222".as_bytes());
501        assert_eq!(size, 6);
502        assert_eq!(matched, Vec::<u8>::new());
503
504        Ok(())
505    }
506
507    #[test]
508    fn test_read_until_pattern_async() -> io::Result<()> {
509        block_on(async {
510            let mut string = "<a b:b1 c:c1 d:d1 />\n<a b:b2 c:c2 d:d2 />".as_bytes();
511            let mut to = Vec::new();
512            let (matched, size) = read_until_pattern_async(&mut string, "c:c2", &mut to)
513                .await
514                .unwrap();
515            str::from_utf8(&to).unwrap();
516            assert_eq!(
517                str::from_utf8(&to).unwrap(),
518                "<a b:b1 c:c1 d:d1 />\n<a b:b2 c:c2"
519            );
520            assert_eq!(size, "<a b:b1 c:c1 d:d1 />\n<a b:b2 c:c2".len());
521            assert_eq!(matched, "c:c2".as_bytes());
522
523            let mut string = "<a b:b1 c:c1 d:d1 />\n<anode b:b2 c:c2 d:d2 />".as_bytes();
524            let mut to = Vec::new();
525            let (matched, size) = read_until_pattern_async(&mut string, "<anode", &mut to)
526                .await
527                .unwrap();
528            assert_eq!(str::from_utf8(&to).unwrap(), "<a b:b1 c:c1 d:d1 />\n<anode");
529            assert_eq!(size, "<a b:b1 c:c1 d:d1 />\n<anode".len());
530            assert_eq!(matched, "<anode".as_bytes());
531
532            let mut string = "<a b:b1 c:c1 d:d1 />\n<anode b:b2 c:c2 d:d2 />".as_bytes();
533            let mut to = Vec::new();
534            let (matched, size) = read_until_pattern_async(&mut string, "<bnode", &mut to)
535                .await
536                .unwrap();
537            assert_eq!(string, "".as_bytes());
538            assert_eq!(
539                to,
540                "<a b:b1 c:c1 d:d1 />\n<anode b:b2 c:c2 d:d2 />".as_bytes()
541            );
542            assert_eq!(size, 45);
543            assert_eq!(matched, Vec::<u8>::new());
544
545            let mut string = "1-x-xx2".as_bytes();
546            let mut to = Vec::new();
547            let (matched, size) = read_until_pattern_async(&mut string, "-xx", &mut to)
548                .await
549                .unwrap();
550            assert_eq!(string, "2".as_bytes());
551            assert_eq!(to, "1-x-xx".as_bytes());
552            assert_eq!(size, 6);
553            assert_eq!(matched, "-xx".as_bytes());
554
555            let mut string = "$1131132$".as_bytes();
556            let mut to = Vec::new();
557            let (matched, size) = read_until_pattern_async(&mut string, "1132", &mut to)
558                .await
559                .unwrap();
560            assert_eq!(string, "$".as_bytes());
561            assert_eq!(to, "$1131132".as_bytes());
562            assert_eq!(size, 8);
563            assert_eq!(matched, "1132".as_bytes());
564
565            let mut string = "12".as_bytes();
566            let mut to = Vec::new();
567            let (matched, size) = read_until_pattern_async(&mut string, "13", &mut to)
568                .await
569                .unwrap();
570            assert_eq!(string, "".as_bytes());
571            assert_eq!(to, "12".as_bytes());
572            assert_eq!(size, 2);
573            assert_eq!(matched, Vec::<u8>::new());
574
575            let mut string = "222222".as_bytes();
576            let mut to = Vec::new();
577            let (matched, size) = read_until_pattern_async(&mut string, "33333333", &mut to)
578                .await
579                .unwrap();
580            assert_eq!(string, "".as_bytes());
581            assert_eq!(to, "222222".as_bytes());
582            assert_eq!(size, 6);
583            assert_eq!(matched, Vec::<u8>::new());
584        });
585
586        Ok(())
587    }
588
589    #[test]
590    fn test_read_until_pattern_matched_substring() -> io::Result<()> {
591        let mut string = "hello world test pattern".as_bytes();
592        let mut to = Vec::new();
593        let (matched, size) = read_until_pattern(&mut string, r"\w+", &mut to)?;
594        assert_eq!(str::from_utf8(&to).unwrap(), "hello");
595        assert_eq!(size, 5);
596        assert_eq!(matched, "hello".as_bytes());
597
598        let mut string = "email@example.com and more text".as_bytes();
599        let mut to = Vec::new();
600        let (matched, size) = read_until_pattern(&mut string, r"\w+@\w+\.\w+", &mut to)?;
601        assert_eq!(str::from_utf8(&to).unwrap(), "email@example.com");
602        assert_eq!(size, 17);
603        assert_eq!(matched, "email@example.com".as_bytes());
604
605        let mut string = "no match here".as_bytes();
606        let mut to = Vec::new();
607        let (matched, size) = read_until_pattern(&mut string, "xyz", &mut to)?;
608        assert_eq!(str::from_utf8(&to).unwrap(), "no match here");
609        assert_eq!(size, 13);
610        assert_eq!(matched, Vec::<u8>::new());
611
612        let mut string = "some text".as_bytes();
613        let mut to = Vec::new();
614        let (matched, size) = read_until_pattern(&mut string, "", &mut to)?;
615        assert_eq!(size, 0);
616        assert_eq!(matched, Vec::<u8>::new());
617
618        Ok(())
619    }
620
621    #[test]
622    fn test_read_until_pattern_async_matched_substring() -> io::Result<()> {
623        block_on(async {
624            // Test with regex pattern that matches multiple characters
625            let mut string = "hello world test pattern".as_bytes();
626            let mut to = Vec::new();
627            let (matched, size) = read_until_pattern_async(&mut string, r"\w+", &mut to)
628                .await
629                .unwrap();
630            assert_eq!(str::from_utf8(&to).unwrap(), "hello");
631            assert_eq!(size, 5);
632            assert_eq!(matched, "hello".as_bytes());
633
634            // Test with complex regex pattern
635            let mut string = "email@example.com and more text".as_bytes();
636            let mut to = Vec::new();
637            let (matched, size) = read_until_pattern_async(&mut string, r"\w+@\w+\.\w+", &mut to)
638                .await
639                .unwrap();
640            assert_eq!(str::from_utf8(&to).unwrap(), "email@example.com");
641            assert_eq!(size, 17);
642            assert_eq!(matched, "email@example.com".as_bytes());
643
644            // Test with pattern that doesn't match
645            let mut string = "no match here".as_bytes();
646            let mut to = Vec::new();
647            let (matched, size) = read_until_pattern_async(&mut string, "xyz", &mut to)
648                .await
649                .unwrap();
650            assert_eq!(str::from_utf8(&to).unwrap(), "no match here");
651            assert_eq!(size, 13);
652            assert_eq!(matched, Vec::<u8>::new());
653
654            // Test with empty pattern
655            let mut string = "some text".as_bytes();
656            let mut to = Vec::new();
657            let (matched, size) = read_until_pattern_async(&mut string, "", &mut to)
658                .await
659                .unwrap();
660            assert_eq!(size, 0);
661            assert_eq!(matched, Vec::<u8>::new());
662        });
663
664        Ok(())
665    }
666
667    #[test]
668    fn test_read_while_any() -> io::Result<()> {
669        let mut string = b"aaaaabbbccc" as &[u8];
670        let mut to = Vec::new();
671        let up_to = [b"a"[0], b"b"[0]];
672        let (byte, _) = read_while_any(&mut string, &up_to, &mut to)?;
673        assert_eq!(str::from_utf8(&to).unwrap(), "aaaaabbb");
674        assert_eq!(byte, "c".as_bytes()[0]);
675
676        let mut data = b"12345abc" as &[u8];
677        let mut result = Vec::new();
678        let digits = b"0123456789";
679        let (stop_byte, count) = read_while_any(&mut data, digits, &mut result)?;
680        assert_eq!(result, b"12345");
681        assert_eq!(stop_byte, b'a');
682        assert_eq!(count, 5);
683
684        let data = b"   \t\n  text";
685        let mut reader = Cursor::new(data);
686        let mut result = Vec::new();
687        let whitespace = b" \t\n\r";
688        let (stop_byte, count) = read_while_any(&mut reader, whitespace, &mut result)?;
689        assert_eq!(result, b"   \t\n  ");
690        assert_eq!(stop_byte, b't');
691        assert_eq!(count, 7);
692
693        let data = b"hello";
694        let mut reader = Cursor::new(data);
695        let mut result = Vec::new();
696        let empty_set = b"";
697        let (stop_byte, count) = read_while_any(&mut reader, empty_set, &mut result)?;
698        assert_eq!(result.len(), 0);
699        assert_eq!(stop_byte, b'h');
700        assert_eq!(count, 0);
701
702        Ok(())
703    }
704
705    #[test]
706    fn test_read_while_any_async() -> io::Result<()> {
707        block_on(async {
708            let mut string = b"aaaaabbbccc" as &[u8];
709            let mut to = Vec::new();
710            let up_to = [b"a"[0], b"b"[0]];
711            let (byte, _) = read_while_any_async(&mut string, &up_to, &mut to)
712                .await
713                .unwrap();
714            assert_eq!(str::from_utf8(&to).unwrap(), "aaaaabbb");
715            assert_eq!(byte, "c".as_bytes()[0]);
716
717            let mut data = b"12345abc" as &[u8];
718            let mut result = Vec::new();
719            let digits = b"0123456789";
720            let (stop_byte, count) = read_while_any_async(&mut data, digits, &mut result)
721                .await
722                .unwrap();
723            assert_eq!(result, b"12345");
724            assert_eq!(stop_byte, b'a');
725            assert_eq!(count, 5);
726
727            let mut data = b"   \t\n  text" as &[u8];
728            let mut result = Vec::new();
729            let whitespace = b" \t\n\r";
730            let (stop_byte, count) = read_while_any_async(&mut data, whitespace, &mut result)
731                .await
732                .unwrap();
733            assert_eq!(result, b"   \t\n  ");
734            assert_eq!(stop_byte, b't');
735            assert_eq!(count, 7);
736
737            let mut data = b"hello" as &[u8];
738            let mut result = Vec::new();
739            let empty_set = b"";
740            let (stop_byte, count) = read_while_any_async(&mut data, empty_set, &mut result)
741                .await
742                .unwrap();
743            assert_eq!(result.len(), 0);
744            assert_eq!(stop_byte, b'h');
745            assert_eq!(count, 0);
746        });
747
748        Ok(())
749    }
750
751    #[test]
752    fn test_read_until_any() -> io::Result<()> {
753        let mut string = b"123456789" as &[u8];
754        let mut to = Vec::new();
755        let check_set = "[43]";
756        let (sep, _) = read_until_pattern(&mut string, &check_set, &mut to)?;
757        assert_eq!(str::from_utf8(&to).unwrap(), "123");
758        assert_eq!(String::from_utf8(sep).unwrap(), "3");
759
760        let mut data = b"abc123def" as &[u8];
761        let mut result = Vec::new();
762        let digits_pattern = "[0-9]";
763        let (found_pattern, count) = read_until_pattern(&mut data, digits_pattern, &mut result)?;
764        assert_eq!(result, b"abc1");
765        assert_eq!(found_pattern, b"1");
766        assert_eq!(count, 4);
767        assert_eq!(data, b"23def");
768
769        let mut data = b"hello world!" as &[u8];
770        let mut result = Vec::new();
771        let punctuation_pattern = r"[!@#$%^&*(),.?]";
772        let (found_pattern, count) = read_until_pattern(&mut data, punctuation_pattern, &mut result)?;
773        assert_eq!(result, b"hello world!");
774        assert_eq!(found_pattern, b"!");
775        assert_eq!(count, 12);
776        assert_eq!(data, b"");
777
778        let mut data = b"!hello" as &[u8];
779        let mut result = Vec::new();
780        let punctuation_pattern = r"!";
781        let (found_pattern, count) = read_until_pattern(&mut data, punctuation_pattern, &mut result)?;
782        assert_eq!(result, b"!");
783        assert_eq!(found_pattern, b"!");
784        assert_eq!(count, 1);
785        assert_eq!(data, b"hello"); // Remaining data
786
787        let mut data = b"abc,def.ghi!" as &[u8];
788        let punctuation_pattern = r"[,.!]";
789        let mut result1 = Vec::new();
790        let (found1, _count1) = read_until_pattern(&mut data, punctuation_pattern, &mut result1)?;
791        assert_eq!(result1, b"abc,");
792        assert_eq!(found1, b",");
793        assert_eq!(data, b"def.ghi!");
794        let mut result2 = Vec::new();
795        let (found2, _count2) = read_until_pattern(&mut data, punctuation_pattern, &mut result2)?;
796        assert_eq!(result2, b"def.");
797        assert_eq!(found2, b".");
798        assert_eq!(data, b"ghi!");
799        let mut result3 = Vec::new();
800        let (found3, _count3) = read_until_pattern(&mut data, punctuation_pattern, &mut result3)?;
801        assert_eq!(result3, b"ghi!");
802        assert_eq!(found3, b"!");
803        assert_eq!(data, b"");
804
805        let mut data = b"hello world" as &[u8];
806        let mut result = Vec::new();
807        let digits_pattern = "[0-9]";
808        let (found_pattern, count) = read_until_pattern(&mut data, digits_pattern, &mut result)?;
809        assert_eq!(result, b"hello world");
810        assert_eq!(found_pattern, Vec::<u8>::new());
811        assert_eq!(count, 11);
812        assert_eq!(data, b"");
813
814        let mut data = b"hello" as &[u8];
815        let mut result = Vec::new();
816        let empty_pattern = "";
817        let (found_pattern, count) = read_until_pattern(&mut data, empty_pattern, &mut result)?;
818        assert_eq!(count, 0);
819        assert_eq!(found_pattern, Vec::<u8>::new());
820
821        Ok(())
822    }
823
824    #[test]
825    fn test_read_until_any_async() -> io::Result<()> {
826        block_on(async {
827            let mut string = b"123456789" as &[u8];
828            let mut to = Vec::new();
829            let check_set = "[43]";
830            let (sep, _) = read_until_pattern_async(&mut string, &check_set, &mut to)
831                .await
832                .unwrap();
833            assert_eq!(str::from_utf8(&to).unwrap(), "123");
834            assert_eq!(str::from_utf8(&sep).unwrap(), "3");
835
836            let mut data = b"abc123def" as &[u8];
837            let mut result = Vec::new();
838            let digits_pattern = "[0-9]";
839            let (found_pattern, count) = read_until_pattern_async(&mut data, digits_pattern, &mut result)
840                .await
841                .unwrap();
842            assert_eq!(result, b"abc1");
843            assert_eq!(found_pattern, b"1");
844            assert_eq!(count, 4);
845            assert_eq!(data, b"23def");
846
847            let mut data = b"hello world!" as &[u8];
848            let mut result = Vec::new();
849            let punctuation_pattern = r"[!@#$%^&*(),.?]";
850            let (found_pattern, count) = read_until_pattern_async(&mut data, punctuation_pattern, &mut result)
851                .await
852                .unwrap();
853            assert_eq!(result, b"hello world!");
854            assert_eq!(found_pattern, b"!");
855            assert_eq!(count, 12);
856            assert_eq!(data, b"");
857
858            let mut data = b"!hello" as &[u8];
859            let mut result = Vec::new();
860            let punctuation_pattern = r"!";
861            let (found_pattern, count) = read_until_pattern_async(&mut data, punctuation_pattern, &mut result)
862                .await
863                .unwrap();
864            assert_eq!(result, b"!");
865            assert_eq!(found_pattern, b"!");
866            assert_eq!(count, 1);
867            assert_eq!(data, b"hello"); // Remaining data
868
869            let mut data = b"abc,def.ghi!" as &[u8];
870            let punctuation_pattern = r"[,.!]";
871            let mut result1 = Vec::new();
872            let (found1, _count1) = read_until_pattern_async(&mut data, punctuation_pattern, &mut result1)
873                .await
874                .unwrap();
875            assert_eq!(result1, b"abc,");
876            assert_eq!(found1, b",");
877            assert_eq!(data, b"def.ghi!");
878            let mut result2 = Vec::new();
879            let (found2, _count2) = read_until_pattern_async(&mut data, punctuation_pattern, &mut result2)
880                .await
881                .unwrap();
882            assert_eq!(result2, b"def.");
883            assert_eq!(found2, b".");
884            assert_eq!(data, b"ghi!");
885            let mut result3 = Vec::new();
886            let (found3, _count3) = read_until_pattern_async(&mut data, punctuation_pattern, &mut result3)
887                .await
888                .unwrap();
889            assert_eq!(result3, b"ghi!");
890            assert_eq!(found3, b"!");
891            assert_eq!(data, b"");
892
893            let mut data = b"hello world" as &[u8];
894            let mut result = Vec::new();
895            let digits_pattern = "[0-9]";
896            let (found_pattern, count) = read_until_pattern_async(&mut data, digits_pattern, &mut result)
897                .await
898                .unwrap();
899            assert_eq!(result, b"hello world");
900            assert_eq!(found_pattern, Vec::<u8>::new());
901            assert_eq!(count, 11);
902            assert_eq!(data, b"");
903
904            let mut data = b"hello" as &[u8];
905            let mut result = Vec::new();
906            let empty_pattern = "";
907            let (found_pattern, count) = read_until_pattern_async(&mut data, empty_pattern, &mut result)
908                .await
909                .unwrap();
910            assert_eq!(count, 0);
911            assert_eq!(found_pattern, Vec::<u8>::new());
912        });
913
914        Ok(())
915    }
916
917    #[test]
918    fn test_read_until_any_pattern() -> io::Result<()> {
919        let mut string = "![CD!-![CDATA!--?abcd".as_bytes();
920        let mut to = Vec::new();
921        let pattern = format!("{}|{}|\\?", regex::escape(CDATA_TAG), regex::escape(COMMENT_TAG));
922        let (matched, size) = read_until_pattern(&mut string, &pattern, &mut to)?;
923        assert_eq!(str::from_utf8(&matched).unwrap(), COMMENT_TAG);
924        assert_eq!(size, "![CD!-![CDATA!--".len());
925        assert_eq!(str::from_utf8(&to).unwrap(), "![CD!-![CDATA!--");
926
927        let string = r##"<div class="1" >
928            <div class="2">
929                <div class="3">
930                    <div class="4"></div>
931                </div>
932            </div>
933            <div class="5"></div>
934        </div>
935        "##;
936
937        let tag = "div";
938        let mut to = vec![];
939        let open_pattern = format!("<{}", tag);
940        let close_pattern = format!("</{}>", tag);
941        let pattern = format!("{}|{}", regex::escape(&close_pattern), regex::escape(&open_pattern));
942        let reader = &mut string.as_bytes();
943        io::BufRead::read_until(reader, ">".as_bytes()[0], &mut to)?;
944        let (matched, _size) = read_until_pattern(reader, &pattern, &mut to)?;
945        assert_eq!(str::from_utf8(&matched).unwrap(), "<div");
946
947        let mut string = "$1131132$".as_bytes();
948        let mut to = Vec::new();
949        let pattern = "1132|115";
950        let (matched, size) = read_until_pattern(&mut string, pattern, &mut to)?;
951        assert_eq!(string, "$".as_bytes());
952        assert_eq!(to, "$1131132".as_bytes());
953        assert_eq!(size, 8);
954        assert_eq!(str::from_utf8(&matched).unwrap(), "1132");
955
956        let string = "<a />\
957            <b></b>\
958        </link>";
959        let mut to = Vec::new();
960        let pattern = "</link>|<link>";
961        let (matched, size) = read_until_pattern(&mut string.as_bytes(), pattern, &mut to)?;
962        assert_eq!(to, string.as_bytes());
963        assert_eq!(std::str::from_utf8(&matched).unwrap(), "</link>");
964        assert_eq!(size, string.len());
965
966        let mut string = "1</link>".as_bytes();
967        let mut to = Vec::new();
968        let pattern = "123|890";
969        let (matched, size) = read_until_pattern(&mut string, pattern, &mut to)?;
970        assert_eq!(to, "1</link>".as_bytes());
971        assert_eq!(size, 8);
972        assert_eq!(matched, Vec::<u8>::new());
973
974        Ok(())
975    }
976
977    #[test]
978    fn test_read_until_any_pattern_async() -> io::Result<()> {
979        block_on(async {
980            let mut string = "![CD!-![CDATA!--?abcd".as_bytes();
981            let mut to = Vec::new();
982            let pattern = format!("{}|{}|\\?", regex::escape(CDATA_TAG), regex::escape(COMMENT_TAG));
983            let (matched, size) = read_until_pattern_async(&mut string, &pattern, &mut to)
984                .await
985                .unwrap();
986            assert_eq!(str::from_utf8(&matched).unwrap(), COMMENT_TAG);
987            assert_eq!(size, "![CD!-![CDATA!--".len());
988            assert_eq!(str::from_utf8(&to).unwrap(), "![CD!-![CDATA!--");
989
990            let string = r##"<div class="1" >
991                <div class="2">
992                    <div class="3">
993                        <div class="4"></div>
994                    </div>
995                </div>
996                <div class="5"></div>
997            </div>
998            "##;
999
1000            let tag = "div";
1001            let mut to = vec![];
1002            let open_pattern = format!("<{}", tag);
1003            let close_pattern = format!("</{}>", tag);
1004            let pattern = format!("{}|{}", regex::escape(&close_pattern), regex::escape(&open_pattern));
1005            let reader = &mut string.as_bytes();
1006            io::BufRead::read_until(reader, ">".as_bytes()[0], &mut to).unwrap();
1007            let (matched, _size) = read_until_pattern_async(reader, &pattern, &mut to)
1008                .await
1009                .unwrap();
1010            assert_eq!(str::from_utf8(&matched).unwrap(), "<div");
1011
1012            let mut string = "$1131132$".as_bytes();
1013            let mut to = Vec::new();
1014            let pattern = "1132|115";
1015            let (matched, size) = read_until_pattern_async(&mut string, pattern, &mut to)
1016                .await
1017                .unwrap();
1018            assert_eq!(string, "$".as_bytes());
1019            assert_eq!(to, "$1131132".as_bytes());
1020            assert_eq!(size, 8);
1021            assert_eq!(str::from_utf8(&matched).unwrap(), "1132");
1022
1023            let string = "<a />\
1024                <b></b>\
1025            </link>";
1026            let mut to = Vec::new();
1027            let pattern = "</link>|<link>";
1028            let (matched, size) = read_until_pattern_async(&mut string.as_bytes(), pattern, &mut to)
1029                .await
1030                .unwrap();
1031            assert_eq!(to, string.as_bytes());
1032            assert_eq!(std::str::from_utf8(&matched).unwrap(), "</link>");
1033            assert_eq!(size, string.len());
1034
1035            let mut string = "1</link>".as_bytes();
1036            let mut to = Vec::new();
1037            let pattern = "123|890";
1038            let (matched, size) = read_until_pattern_async(&mut string, pattern, &mut to)
1039                .await
1040                .unwrap();
1041            assert_eq!(to, "1</link>".as_bytes());
1042            assert_eq!(size, 8);
1043            assert_eq!(matched, Vec::<u8>::new());
1044        });
1045
1046        Ok(())
1047    }
1048
1049}