1use std::io;
25use std::io::BufRead;
26use std::pin::Pin;
27
28use futures::{AsyncBufRead, AsyncBufReadExt};
29use regex::bytes::Regex;
30
31pub const CRLF: &[u8] = &[0xd, 0xa];
33
34
35pub fn read_until_pattern(
67 reader: &mut impl BufRead,
68 pattern: &str,
69 to: &mut Vec<u8>,
70) -> io::Result<(Vec<u8>, usize)> {
71 if pattern.is_empty() {
72 return Ok((Vec::new(), 0));
73 }
74 let regex = Regex::new(pattern).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
75 let original_len = to.len();
76 let mut search_buffer = Vec::new();
77
78 loop {
79 let (found_pattern, consume_amount, matched_substring) = {
80 let available = reader.fill_buf()?;
81 if available.is_empty() {
82 (false, 0, Vec::new())
83 } else {
84 let available_len = available.len();
85 search_buffer.extend_from_slice(available);
86
87 if let Some((pos, match_len)) = find_pattern(&search_buffer, ®ex) {
88 let end_pos = pos + match_len;
89 to.extend_from_slice(&search_buffer[..end_pos]);
90
91 let matched = search_buffer[pos..end_pos].to_vec();
92 let consume_from_current =
93 end_pos.saturating_sub(search_buffer.len() - available_len);
94 (true, consume_from_current, matched)
95 } else {
96 let keep_size = (pattern.len() - 1).min(search_buffer.len());
97
98 if search_buffer.len() > keep_size {
99 let move_to_output = search_buffer.len() - keep_size;
100 to.extend_from_slice(&search_buffer[..move_to_output]);
101 search_buffer.drain(..move_to_output);
102 }
103
104 (false, available_len, Vec::new())
105 }
106 }
107 };
108
109 if found_pattern {
110 reader.consume(consume_amount);
111 return Ok((matched_substring, to.len() - original_len));
112 }
113
114 if consume_amount == 0 {
115 to.extend_from_slice(&search_buffer);
116 break;
117 }
118
119 reader.consume(consume_amount);
120 }
121
122 Ok((Vec::new(), to.len() - original_len))
123}
124
125#[inline]
154pub fn find_pattern(haystack: &[u8], needle: &Regex) -> Option<(usize, usize)> {
155 needle.find(haystack).map(|m| (m.start(), m.len()))
156}
157
158pub async fn read_until_pattern_async<R: AsyncBufRead + Unpin>(
194 reader: &mut R,
195 pattern: &str,
196 to: &mut Vec<u8>,
197) -> io::Result<(Vec<u8>, usize)> {
198 if pattern.is_empty() {
199 return Ok((Vec::new(), 0));
200 }
201 let regex = Regex::new(pattern).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
202 let original_len = to.len();
203 let mut search_buffer = Vec::new();
204
205 loop {
206 let (found_pattern, consume_amount, matched_substring) = {
207 let available = reader.fill_buf().await?;
208 if available.is_empty() {
209 (false, 0, Vec::new())
210 } else {
211 let available_len = available.len();
212
213 search_buffer.extend_from_slice(available);
214
215 if let Some((pos, match_len)) = find_pattern(&search_buffer, ®ex) {
216 let end_pos = pos + match_len;
217 to.extend_from_slice(&search_buffer[..end_pos]);
218
219 let matched = search_buffer[pos..end_pos].to_vec();
221
222 let consume_from_current =
223 end_pos.saturating_sub(search_buffer.len() - available_len);
224 (true, consume_from_current, matched)
225 } else {
226 let keep_size = (pattern.len() - 1).min(search_buffer.len());
227
228 if search_buffer.len() > keep_size {
229 let move_to_output = search_buffer.len() - keep_size;
230 to.extend_from_slice(&search_buffer[..move_to_output]);
231 search_buffer.drain(..move_to_output);
232 }
233
234 (false, available_len, Vec::new())
235 }
236 }
237 };
238
239 if found_pattern {
240 Pin::new(&mut *reader).consume(consume_amount);
241 return Ok((matched_substring, to.len() - original_len));
242 }
243
244 if consume_amount == 0 {
245 to.extend_from_slice(&search_buffer);
246 break;
247 }
248
249 Pin::new(&mut *reader).consume(consume_amount);
250 }
251
252 Ok((Vec::new(), to.len() - original_len))
253}
254
255
256pub fn read_while_any(
288 reader: &mut impl BufRead,
289 check_set: &[u8],
290 to: &mut Vec<u8>,
291) -> io::Result<(u8, usize)> {
292 if check_set.is_empty() {
293 let available = reader.fill_buf()?;
294 if available.is_empty() {
295 return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "EOF reached"));
296 }
297 let first_byte = available[0];
298 reader.consume(1);
299 return Ok((first_byte, 0));
300 }
301
302 let original_len = to.len();
303
304 let mut lookup = [false; 256];
305 for &byte in check_set {
306 lookup[byte as usize] = true;
307 }
308
309 loop {
310 let (stop_byte, consume_amount) = {
311 let available = reader.fill_buf()?;
312 if available.is_empty() {
313 return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "EOF reached"));
314 }
315 let mut pos = 0;
316 while pos < available.len() {
317 let byte = available[pos];
318 if !lookup[byte as usize] {
319 to.extend_from_slice(&available[..pos]);
320 break;
321 }
322 pos += 1;
323 }
324
325 if pos < available.len() {
326 (Some(available[pos]), pos)
327 } else {
328 to.extend_from_slice(available);
329 (None, available.len())
330 }
331 };
332
333 reader.consume(consume_amount);
334
335 if let Some(byte) = stop_byte {
336 return Ok((byte, to.len() - original_len));
337 }
338 }
339}
340
341pub async fn read_while_any_async<R: AsyncBufRead + Unpin>(
378 reader: &mut R,
379 check_set: &[u8],
380 to: &mut Vec<u8>,
381) -> io::Result<(u8, usize)> {
382 if check_set.is_empty() {
383 let available = reader.fill_buf().await?;
384 if available.is_empty() {
385 return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "EOF reached"));
386 }
387 let first_byte = available[0];
388 Pin::new(&mut *reader).consume(1);
389 return Ok((first_byte, 0));
390 }
391
392 let original_len = to.len();
393 let mut lookup = [false; 256];
394 for &byte in check_set {
395 lookup[byte as usize] = true;
396 }
397
398 loop {
399 let (stop_byte, consume_amount) = {
400 let available = reader.fill_buf().await?;
401 if available.is_empty() {
402 return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "EOF reached"));
403 }
404 let mut pos = 0;
405 while pos < available.len() {
406 let byte = available[pos];
407 if !lookup[byte as usize] {
408 to.extend_from_slice(&available[..pos]);
409 break;
410 }
411 pos += 1;
412 }
413
414 if pos < available.len() {
415 (Some(available[pos]), pos)
416 } else {
417 to.extend_from_slice(available);
418 (None, available.len())
419 }
420 };
421
422 Pin::new(&mut *reader).consume(consume_amount);
423
424 if let Some(byte) = stop_byte {
425 return Ok((byte, to.len() - original_len));
426 }
427 }
428}
429
430#[cfg(test)]
431mod tests {
432 use futures::executor::block_on;
433
434 use super::*;
435 use std::io;
436 use std::io::Cursor;
437 use std::str;
438 pub const CDATA_TAG: &str = "![CDATA[";
439 pub const COMMENT_TAG: &str = "!--";
440
441 #[test]
442 fn test_read_until_pattern() -> io::Result<()> {
443 let mut string = "<a b:b1 c:c1 d:d1 />\n<a b:b2 c:c2 d:d2 />".as_bytes();
444 let mut to = Vec::new();
445 let (matched, size) = read_until_pattern(&mut string, "c:c2", &mut to)?;
446 str::from_utf8(&to).unwrap();
447 assert_eq!(
448 str::from_utf8(&to).unwrap(),
449 "<a b:b1 c:c1 d:d1 />\n<a b:b2 c:c2"
450 );
451 assert_eq!(size, "<a b:b1 c:c1 d:d1 />\n<a b:b2 c:c2".len());
452 assert_eq!(matched, "c:c2".as_bytes());
453
454 let mut string = "<a b:b1 c:c1 d:d1 />\n<anode b:b2 c:c2 d:d2 />".as_bytes();
455 let mut to = Vec::new();
456 let (matched, size) = read_until_pattern(&mut string, "<anode", &mut to)?;
457 assert_eq!(str::from_utf8(&to).unwrap(), "<a b:b1 c:c1 d:d1 />\n<anode");
458 assert_eq!(size, "<a b:b1 c:c1 d:d1 />\n<anode".len());
459 assert_eq!(matched, "<anode".as_bytes());
460
461 let mut string = "<a b:b1 c:c1 d:d1 />\n<anode b:b2 c:c2 d:d2 />".as_bytes();
462 let mut to = Vec::new();
463 let (matched, size) = read_until_pattern(&mut string, "<bnode", &mut to)?;
464 assert_eq!(string, "".as_bytes());
465 assert_eq!(
466 to,
467 "<a b:b1 c:c1 d:d1 />\n<anode b:b2 c:c2 d:d2 />".as_bytes()
468 );
469 assert_eq!(size, 45);
470 assert_eq!(matched, Vec::<u8>::new());
471
472 let mut string = "1-x-xx2".as_bytes();
473 let mut to = Vec::new();
474 let (matched, size) = read_until_pattern(&mut string, "-xx", &mut to)?;
475 assert_eq!(string, "2".as_bytes());
476 assert_eq!(to, "1-x-xx".as_bytes());
477 assert_eq!(size, 6);
478 assert_eq!(matched, "-xx".as_bytes());
479
480 let mut string = "$1131132$".as_bytes();
481 let mut to = Vec::new();
482 let (matched, size) = read_until_pattern(&mut string, "1132", &mut to)?;
483 assert_eq!(string, "$".as_bytes());
484 assert_eq!(to, "$1131132".as_bytes());
485 assert_eq!(size, 8);
486 assert_eq!(matched, "1132".as_bytes());
487
488 let mut string = "12".as_bytes();
489 let mut to = Vec::new();
490 let (matched, size) = read_until_pattern(&mut string, "13", &mut to)?;
491 assert_eq!(string, "".as_bytes());
492 assert_eq!(to, "12".as_bytes());
493 assert_eq!(size, 2);
494 assert_eq!(matched, Vec::<u8>::new());
495
496 let mut string = "222222".as_bytes();
497 let mut to = Vec::new();
498 let (matched, size) = read_until_pattern(&mut string, "33333333", &mut to)?;
499 assert_eq!(string, "".as_bytes());
500 assert_eq!(to, "222222".as_bytes());
501 assert_eq!(size, 6);
502 assert_eq!(matched, Vec::<u8>::new());
503
504 Ok(())
505 }
506
507 #[test]
508 fn test_read_until_pattern_async() -> io::Result<()> {
509 block_on(async {
510 let mut string = "<a b:b1 c:c1 d:d1 />\n<a b:b2 c:c2 d:d2 />".as_bytes();
511 let mut to = Vec::new();
512 let (matched, size) = read_until_pattern_async(&mut string, "c:c2", &mut to)
513 .await
514 .unwrap();
515 str::from_utf8(&to).unwrap();
516 assert_eq!(
517 str::from_utf8(&to).unwrap(),
518 "<a b:b1 c:c1 d:d1 />\n<a b:b2 c:c2"
519 );
520 assert_eq!(size, "<a b:b1 c:c1 d:d1 />\n<a b:b2 c:c2".len());
521 assert_eq!(matched, "c:c2".as_bytes());
522
523 let mut string = "<a b:b1 c:c1 d:d1 />\n<anode b:b2 c:c2 d:d2 />".as_bytes();
524 let mut to = Vec::new();
525 let (matched, size) = read_until_pattern_async(&mut string, "<anode", &mut to)
526 .await
527 .unwrap();
528 assert_eq!(str::from_utf8(&to).unwrap(), "<a b:b1 c:c1 d:d1 />\n<anode");
529 assert_eq!(size, "<a b:b1 c:c1 d:d1 />\n<anode".len());
530 assert_eq!(matched, "<anode".as_bytes());
531
532 let mut string = "<a b:b1 c:c1 d:d1 />\n<anode b:b2 c:c2 d:d2 />".as_bytes();
533 let mut to = Vec::new();
534 let (matched, size) = read_until_pattern_async(&mut string, "<bnode", &mut to)
535 .await
536 .unwrap();
537 assert_eq!(string, "".as_bytes());
538 assert_eq!(
539 to,
540 "<a b:b1 c:c1 d:d1 />\n<anode b:b2 c:c2 d:d2 />".as_bytes()
541 );
542 assert_eq!(size, 45);
543 assert_eq!(matched, Vec::<u8>::new());
544
545 let mut string = "1-x-xx2".as_bytes();
546 let mut to = Vec::new();
547 let (matched, size) = read_until_pattern_async(&mut string, "-xx", &mut to)
548 .await
549 .unwrap();
550 assert_eq!(string, "2".as_bytes());
551 assert_eq!(to, "1-x-xx".as_bytes());
552 assert_eq!(size, 6);
553 assert_eq!(matched, "-xx".as_bytes());
554
555 let mut string = "$1131132$".as_bytes();
556 let mut to = Vec::new();
557 let (matched, size) = read_until_pattern_async(&mut string, "1132", &mut to)
558 .await
559 .unwrap();
560 assert_eq!(string, "$".as_bytes());
561 assert_eq!(to, "$1131132".as_bytes());
562 assert_eq!(size, 8);
563 assert_eq!(matched, "1132".as_bytes());
564
565 let mut string = "12".as_bytes();
566 let mut to = Vec::new();
567 let (matched, size) = read_until_pattern_async(&mut string, "13", &mut to)
568 .await
569 .unwrap();
570 assert_eq!(string, "".as_bytes());
571 assert_eq!(to, "12".as_bytes());
572 assert_eq!(size, 2);
573 assert_eq!(matched, Vec::<u8>::new());
574
575 let mut string = "222222".as_bytes();
576 let mut to = Vec::new();
577 let (matched, size) = read_until_pattern_async(&mut string, "33333333", &mut to)
578 .await
579 .unwrap();
580 assert_eq!(string, "".as_bytes());
581 assert_eq!(to, "222222".as_bytes());
582 assert_eq!(size, 6);
583 assert_eq!(matched, Vec::<u8>::new());
584 });
585
586 Ok(())
587 }
588
589 #[test]
590 fn test_read_until_pattern_matched_substring() -> io::Result<()> {
591 let mut string = "hello world test pattern".as_bytes();
592 let mut to = Vec::new();
593 let (matched, size) = read_until_pattern(&mut string, r"\w+", &mut to)?;
594 assert_eq!(str::from_utf8(&to).unwrap(), "hello");
595 assert_eq!(size, 5);
596 assert_eq!(matched, "hello".as_bytes());
597
598 let mut string = "email@example.com and more text".as_bytes();
599 let mut to = Vec::new();
600 let (matched, size) = read_until_pattern(&mut string, r"\w+@\w+\.\w+", &mut to)?;
601 assert_eq!(str::from_utf8(&to).unwrap(), "email@example.com");
602 assert_eq!(size, 17);
603 assert_eq!(matched, "email@example.com".as_bytes());
604
605 let mut string = "no match here".as_bytes();
606 let mut to = Vec::new();
607 let (matched, size) = read_until_pattern(&mut string, "xyz", &mut to)?;
608 assert_eq!(str::from_utf8(&to).unwrap(), "no match here");
609 assert_eq!(size, 13);
610 assert_eq!(matched, Vec::<u8>::new());
611
612 let mut string = "some text".as_bytes();
613 let mut to = Vec::new();
614 let (matched, size) = read_until_pattern(&mut string, "", &mut to)?;
615 assert_eq!(size, 0);
616 assert_eq!(matched, Vec::<u8>::new());
617
618 Ok(())
619 }
620
621 #[test]
622 fn test_read_until_pattern_async_matched_substring() -> io::Result<()> {
623 block_on(async {
624 let mut string = "hello world test pattern".as_bytes();
626 let mut to = Vec::new();
627 let (matched, size) = read_until_pattern_async(&mut string, r"\w+", &mut to)
628 .await
629 .unwrap();
630 assert_eq!(str::from_utf8(&to).unwrap(), "hello");
631 assert_eq!(size, 5);
632 assert_eq!(matched, "hello".as_bytes());
633
634 let mut string = "email@example.com and more text".as_bytes();
636 let mut to = Vec::new();
637 let (matched, size) = read_until_pattern_async(&mut string, r"\w+@\w+\.\w+", &mut to)
638 .await
639 .unwrap();
640 assert_eq!(str::from_utf8(&to).unwrap(), "email@example.com");
641 assert_eq!(size, 17);
642 assert_eq!(matched, "email@example.com".as_bytes());
643
644 let mut string = "no match here".as_bytes();
646 let mut to = Vec::new();
647 let (matched, size) = read_until_pattern_async(&mut string, "xyz", &mut to)
648 .await
649 .unwrap();
650 assert_eq!(str::from_utf8(&to).unwrap(), "no match here");
651 assert_eq!(size, 13);
652 assert_eq!(matched, Vec::<u8>::new());
653
654 let mut string = "some text".as_bytes();
656 let mut to = Vec::new();
657 let (matched, size) = read_until_pattern_async(&mut string, "", &mut to)
658 .await
659 .unwrap();
660 assert_eq!(size, 0);
661 assert_eq!(matched, Vec::<u8>::new());
662 });
663
664 Ok(())
665 }
666
667 #[test]
668 fn test_read_while_any() -> io::Result<()> {
669 let mut string = b"aaaaabbbccc" as &[u8];
670 let mut to = Vec::new();
671 let up_to = [b"a"[0], b"b"[0]];
672 let (byte, _) = read_while_any(&mut string, &up_to, &mut to)?;
673 assert_eq!(str::from_utf8(&to).unwrap(), "aaaaabbb");
674 assert_eq!(byte, "c".as_bytes()[0]);
675
676 let mut data = b"12345abc" as &[u8];
677 let mut result = Vec::new();
678 let digits = b"0123456789";
679 let (stop_byte, count) = read_while_any(&mut data, digits, &mut result)?;
680 assert_eq!(result, b"12345");
681 assert_eq!(stop_byte, b'a');
682 assert_eq!(count, 5);
683
684 let data = b" \t\n text";
685 let mut reader = Cursor::new(data);
686 let mut result = Vec::new();
687 let whitespace = b" \t\n\r";
688 let (stop_byte, count) = read_while_any(&mut reader, whitespace, &mut result)?;
689 assert_eq!(result, b" \t\n ");
690 assert_eq!(stop_byte, b't');
691 assert_eq!(count, 7);
692
693 let data = b"hello";
694 let mut reader = Cursor::new(data);
695 let mut result = Vec::new();
696 let empty_set = b"";
697 let (stop_byte, count) = read_while_any(&mut reader, empty_set, &mut result)?;
698 assert_eq!(result.len(), 0);
699 assert_eq!(stop_byte, b'h');
700 assert_eq!(count, 0);
701
702 Ok(())
703 }
704
705 #[test]
706 fn test_read_while_any_async() -> io::Result<()> {
707 block_on(async {
708 let mut string = b"aaaaabbbccc" as &[u8];
709 let mut to = Vec::new();
710 let up_to = [b"a"[0], b"b"[0]];
711 let (byte, _) = read_while_any_async(&mut string, &up_to, &mut to)
712 .await
713 .unwrap();
714 assert_eq!(str::from_utf8(&to).unwrap(), "aaaaabbb");
715 assert_eq!(byte, "c".as_bytes()[0]);
716
717 let mut data = b"12345abc" as &[u8];
718 let mut result = Vec::new();
719 let digits = b"0123456789";
720 let (stop_byte, count) = read_while_any_async(&mut data, digits, &mut result)
721 .await
722 .unwrap();
723 assert_eq!(result, b"12345");
724 assert_eq!(stop_byte, b'a');
725 assert_eq!(count, 5);
726
727 let mut data = b" \t\n text" as &[u8];
728 let mut result = Vec::new();
729 let whitespace = b" \t\n\r";
730 let (stop_byte, count) = read_while_any_async(&mut data, whitespace, &mut result)
731 .await
732 .unwrap();
733 assert_eq!(result, b" \t\n ");
734 assert_eq!(stop_byte, b't');
735 assert_eq!(count, 7);
736
737 let mut data = b"hello" as &[u8];
738 let mut result = Vec::new();
739 let empty_set = b"";
740 let (stop_byte, count) = read_while_any_async(&mut data, empty_set, &mut result)
741 .await
742 .unwrap();
743 assert_eq!(result.len(), 0);
744 assert_eq!(stop_byte, b'h');
745 assert_eq!(count, 0);
746 });
747
748 Ok(())
749 }
750
751 #[test]
752 fn test_read_until_any() -> io::Result<()> {
753 let mut string = b"123456789" as &[u8];
754 let mut to = Vec::new();
755 let check_set = "[43]";
756 let (sep, _) = read_until_pattern(&mut string, &check_set, &mut to)?;
757 assert_eq!(str::from_utf8(&to).unwrap(), "123");
758 assert_eq!(String::from_utf8(sep).unwrap(), "3");
759
760 let mut data = b"abc123def" as &[u8];
761 let mut result = Vec::new();
762 let digits_pattern = "[0-9]";
763 let (found_pattern, count) = read_until_pattern(&mut data, digits_pattern, &mut result)?;
764 assert_eq!(result, b"abc1");
765 assert_eq!(found_pattern, b"1");
766 assert_eq!(count, 4);
767 assert_eq!(data, b"23def");
768
769 let mut data = b"hello world!" as &[u8];
770 let mut result = Vec::new();
771 let punctuation_pattern = r"[!@#$%^&*(),.?]";
772 let (found_pattern, count) = read_until_pattern(&mut data, punctuation_pattern, &mut result)?;
773 assert_eq!(result, b"hello world!");
774 assert_eq!(found_pattern, b"!");
775 assert_eq!(count, 12);
776 assert_eq!(data, b"");
777
778 let mut data = b"!hello" as &[u8];
779 let mut result = Vec::new();
780 let punctuation_pattern = r"!";
781 let (found_pattern, count) = read_until_pattern(&mut data, punctuation_pattern, &mut result)?;
782 assert_eq!(result, b"!");
783 assert_eq!(found_pattern, b"!");
784 assert_eq!(count, 1);
785 assert_eq!(data, b"hello"); let mut data = b"abc,def.ghi!" as &[u8];
788 let punctuation_pattern = r"[,.!]";
789 let mut result1 = Vec::new();
790 let (found1, _count1) = read_until_pattern(&mut data, punctuation_pattern, &mut result1)?;
791 assert_eq!(result1, b"abc,");
792 assert_eq!(found1, b",");
793 assert_eq!(data, b"def.ghi!");
794 let mut result2 = Vec::new();
795 let (found2, _count2) = read_until_pattern(&mut data, punctuation_pattern, &mut result2)?;
796 assert_eq!(result2, b"def.");
797 assert_eq!(found2, b".");
798 assert_eq!(data, b"ghi!");
799 let mut result3 = Vec::new();
800 let (found3, _count3) = read_until_pattern(&mut data, punctuation_pattern, &mut result3)?;
801 assert_eq!(result3, b"ghi!");
802 assert_eq!(found3, b"!");
803 assert_eq!(data, b"");
804
805 let mut data = b"hello world" as &[u8];
806 let mut result = Vec::new();
807 let digits_pattern = "[0-9]";
808 let (found_pattern, count) = read_until_pattern(&mut data, digits_pattern, &mut result)?;
809 assert_eq!(result, b"hello world");
810 assert_eq!(found_pattern, Vec::<u8>::new());
811 assert_eq!(count, 11);
812 assert_eq!(data, b"");
813
814 let mut data = b"hello" as &[u8];
815 let mut result = Vec::new();
816 let empty_pattern = "";
817 let (found_pattern, count) = read_until_pattern(&mut data, empty_pattern, &mut result)?;
818 assert_eq!(count, 0);
819 assert_eq!(found_pattern, Vec::<u8>::new());
820
821 Ok(())
822 }
823
824 #[test]
825 fn test_read_until_any_async() -> io::Result<()> {
826 block_on(async {
827 let mut string = b"123456789" as &[u8];
828 let mut to = Vec::new();
829 let check_set = "[43]";
830 let (sep, _) = read_until_pattern_async(&mut string, &check_set, &mut to)
831 .await
832 .unwrap();
833 assert_eq!(str::from_utf8(&to).unwrap(), "123");
834 assert_eq!(str::from_utf8(&sep).unwrap(), "3");
835
836 let mut data = b"abc123def" as &[u8];
837 let mut result = Vec::new();
838 let digits_pattern = "[0-9]";
839 let (found_pattern, count) = read_until_pattern_async(&mut data, digits_pattern, &mut result)
840 .await
841 .unwrap();
842 assert_eq!(result, b"abc1");
843 assert_eq!(found_pattern, b"1");
844 assert_eq!(count, 4);
845 assert_eq!(data, b"23def");
846
847 let mut data = b"hello world!" as &[u8];
848 let mut result = Vec::new();
849 let punctuation_pattern = r"[!@#$%^&*(),.?]";
850 let (found_pattern, count) = read_until_pattern_async(&mut data, punctuation_pattern, &mut result)
851 .await
852 .unwrap();
853 assert_eq!(result, b"hello world!");
854 assert_eq!(found_pattern, b"!");
855 assert_eq!(count, 12);
856 assert_eq!(data, b"");
857
858 let mut data = b"!hello" as &[u8];
859 let mut result = Vec::new();
860 let punctuation_pattern = r"!";
861 let (found_pattern, count) = read_until_pattern_async(&mut data, punctuation_pattern, &mut result)
862 .await
863 .unwrap();
864 assert_eq!(result, b"!");
865 assert_eq!(found_pattern, b"!");
866 assert_eq!(count, 1);
867 assert_eq!(data, b"hello"); let mut data = b"abc,def.ghi!" as &[u8];
870 let punctuation_pattern = r"[,.!]";
871 let mut result1 = Vec::new();
872 let (found1, _count1) = read_until_pattern_async(&mut data, punctuation_pattern, &mut result1)
873 .await
874 .unwrap();
875 assert_eq!(result1, b"abc,");
876 assert_eq!(found1, b",");
877 assert_eq!(data, b"def.ghi!");
878 let mut result2 = Vec::new();
879 let (found2, _count2) = read_until_pattern_async(&mut data, punctuation_pattern, &mut result2)
880 .await
881 .unwrap();
882 assert_eq!(result2, b"def.");
883 assert_eq!(found2, b".");
884 assert_eq!(data, b"ghi!");
885 let mut result3 = Vec::new();
886 let (found3, _count3) = read_until_pattern_async(&mut data, punctuation_pattern, &mut result3)
887 .await
888 .unwrap();
889 assert_eq!(result3, b"ghi!");
890 assert_eq!(found3, b"!");
891 assert_eq!(data, b"");
892
893 let mut data = b"hello world" as &[u8];
894 let mut result = Vec::new();
895 let digits_pattern = "[0-9]";
896 let (found_pattern, count) = read_until_pattern_async(&mut data, digits_pattern, &mut result)
897 .await
898 .unwrap();
899 assert_eq!(result, b"hello world");
900 assert_eq!(found_pattern, Vec::<u8>::new());
901 assert_eq!(count, 11);
902 assert_eq!(data, b"");
903
904 let mut data = b"hello" as &[u8];
905 let mut result = Vec::new();
906 let empty_pattern = "";
907 let (found_pattern, count) = read_until_pattern_async(&mut data, empty_pattern, &mut result)
908 .await
909 .unwrap();
910 assert_eq!(count, 0);
911 assert_eq!(found_pattern, Vec::<u8>::new());
912 });
913
914 Ok(())
915 }
916
917 #[test]
918 fn test_read_until_any_pattern() -> io::Result<()> {
919 let mut string = "![CD!-![CDATA!--?abcd".as_bytes();
920 let mut to = Vec::new();
921 let pattern = format!("{}|{}|\\?", regex::escape(CDATA_TAG), regex::escape(COMMENT_TAG));
922 let (matched, size) = read_until_pattern(&mut string, &pattern, &mut to)?;
923 assert_eq!(str::from_utf8(&matched).unwrap(), COMMENT_TAG);
924 assert_eq!(size, "![CD!-![CDATA!--".len());
925 assert_eq!(str::from_utf8(&to).unwrap(), "![CD!-![CDATA!--");
926
927 let string = r##"<div class="1" >
928 <div class="2">
929 <div class="3">
930 <div class="4"></div>
931 </div>
932 </div>
933 <div class="5"></div>
934 </div>
935 "##;
936
937 let tag = "div";
938 let mut to = vec![];
939 let open_pattern = format!("<{}", tag);
940 let close_pattern = format!("</{}>", tag);
941 let pattern = format!("{}|{}", regex::escape(&close_pattern), regex::escape(&open_pattern));
942 let reader = &mut string.as_bytes();
943 io::BufRead::read_until(reader, ">".as_bytes()[0], &mut to)?;
944 let (matched, _size) = read_until_pattern(reader, &pattern, &mut to)?;
945 assert_eq!(str::from_utf8(&matched).unwrap(), "<div");
946
947 let mut string = "$1131132$".as_bytes();
948 let mut to = Vec::new();
949 let pattern = "1132|115";
950 let (matched, size) = read_until_pattern(&mut string, pattern, &mut to)?;
951 assert_eq!(string, "$".as_bytes());
952 assert_eq!(to, "$1131132".as_bytes());
953 assert_eq!(size, 8);
954 assert_eq!(str::from_utf8(&matched).unwrap(), "1132");
955
956 let string = "<a />\
957 <b></b>\
958 </link>";
959 let mut to = Vec::new();
960 let pattern = "</link>|<link>";
961 let (matched, size) = read_until_pattern(&mut string.as_bytes(), pattern, &mut to)?;
962 assert_eq!(to, string.as_bytes());
963 assert_eq!(std::str::from_utf8(&matched).unwrap(), "</link>");
964 assert_eq!(size, string.len());
965
966 let mut string = "1</link>".as_bytes();
967 let mut to = Vec::new();
968 let pattern = "123|890";
969 let (matched, size) = read_until_pattern(&mut string, pattern, &mut to)?;
970 assert_eq!(to, "1</link>".as_bytes());
971 assert_eq!(size, 8);
972 assert_eq!(matched, Vec::<u8>::new());
973
974 Ok(())
975 }
976
977 #[test]
978 fn test_read_until_any_pattern_async() -> io::Result<()> {
979 block_on(async {
980 let mut string = "![CD!-![CDATA!--?abcd".as_bytes();
981 let mut to = Vec::new();
982 let pattern = format!("{}|{}|\\?", regex::escape(CDATA_TAG), regex::escape(COMMENT_TAG));
983 let (matched, size) = read_until_pattern_async(&mut string, &pattern, &mut to)
984 .await
985 .unwrap();
986 assert_eq!(str::from_utf8(&matched).unwrap(), COMMENT_TAG);
987 assert_eq!(size, "![CD!-![CDATA!--".len());
988 assert_eq!(str::from_utf8(&to).unwrap(), "![CD!-![CDATA!--");
989
990 let string = r##"<div class="1" >
991 <div class="2">
992 <div class="3">
993 <div class="4"></div>
994 </div>
995 </div>
996 <div class="5"></div>
997 </div>
998 "##;
999
1000 let tag = "div";
1001 let mut to = vec![];
1002 let open_pattern = format!("<{}", tag);
1003 let close_pattern = format!("</{}>", tag);
1004 let pattern = format!("{}|{}", regex::escape(&close_pattern), regex::escape(&open_pattern));
1005 let reader = &mut string.as_bytes();
1006 io::BufRead::read_until(reader, ">".as_bytes()[0], &mut to).unwrap();
1007 let (matched, _size) = read_until_pattern_async(reader, &pattern, &mut to)
1008 .await
1009 .unwrap();
1010 assert_eq!(str::from_utf8(&matched).unwrap(), "<div");
1011
1012 let mut string = "$1131132$".as_bytes();
1013 let mut to = Vec::new();
1014 let pattern = "1132|115";
1015 let (matched, size) = read_until_pattern_async(&mut string, pattern, &mut to)
1016 .await
1017 .unwrap();
1018 assert_eq!(string, "$".as_bytes());
1019 assert_eq!(to, "$1131132".as_bytes());
1020 assert_eq!(size, 8);
1021 assert_eq!(str::from_utf8(&matched).unwrap(), "1132");
1022
1023 let string = "<a />\
1024 <b></b>\
1025 </link>";
1026 let mut to = Vec::new();
1027 let pattern = "</link>|<link>";
1028 let (matched, size) = read_until_pattern_async(&mut string.as_bytes(), pattern, &mut to)
1029 .await
1030 .unwrap();
1031 assert_eq!(to, string.as_bytes());
1032 assert_eq!(std::str::from_utf8(&matched).unwrap(), "</link>");
1033 assert_eq!(size, string.len());
1034
1035 let mut string = "1</link>".as_bytes();
1036 let mut to = Vec::new();
1037 let pattern = "123|890";
1038 let (matched, size) = read_until_pattern_async(&mut string, pattern, &mut to)
1039 .await
1040 .unwrap();
1041 assert_eq!(to, "1</link>".as_bytes());
1042 assert_eq!(size, 8);
1043 assert_eq!(matched, Vec::<u8>::new());
1044 });
1045
1046 Ok(())
1047 }
1048
1049}