1use crate::buffer::{Buffer, BufferRev};
3use memchr::memmem;
4use std::io::{self, Read, Seek, SeekFrom};
5
6pub fn find<R>(needle: &[u8], rdr: &mut R) -> Option<io::Result<usize>>
23where
24 R: Read,
25{
26 FindIter::new_with_needle(rdr, needle).next()
27}
28
29pub fn rfind<R>(needle: &[u8], rdr: &mut R) -> Option<io::Result<usize>>
46where
47 R: Read + Seek,
48{
49 match FindRevIter::new_with_needle(rdr, needle) {
50 Ok(mut iter) => iter.next(),
51 Err(e) => Some(Err(e)),
52 }
53}
54
55pub fn find_iter<'n, 's, R>(
74 needle: &'n [u8],
75 rdr: &'s mut R,
76) -> FindIter<'n, 's, R>
77where
78 R: Read,
79{
80 FindIter::new_with_needle(rdr, needle)
81}
82
83pub fn rfind_iter<'n, 's, R>(
110 needle: &'n [u8],
111 rdr: &'s mut R,
112) -> io::Result<FindRevIter<'n, 's, R>>
113where
114 R: Read + Seek,
115{
116 FindRevIter::new_with_needle(rdr, needle)
117}
118
119#[derive(Clone, Debug)]
121pub struct StreamFinder<'n> {
122 needle: &'n [u8],
124}
125
126impl<'n> StreamFinder<'n> {
127 pub fn new(needle: &'n [u8]) -> StreamFinder<'n> {
137 StreamFinder { needle }
138 }
139
140 pub fn needle(&self) -> &[u8] {
151 self.needle
152 }
153
154 pub fn find<R: Read>(&self, rdr: &mut R) -> Option<io::Result<usize>> {
173 self.find_iter(rdr).next()
174 }
175
176 pub fn rfind<R: Read + Seek>(
199 &self,
200 rdr: &mut R,
201 ) -> Option<io::Result<usize>> {
202 match self.rfind_iter(rdr) {
203 Ok(mut iter) => iter.next(),
204 Err(e) => Some(Err(e)),
205 }
206 }
207
208 pub fn find_iter<'s, R: Read>(
229 &'n self,
230 rdr: &'s mut R,
231 ) -> FindIter<'n, 's, R> {
232 FindIter::new(rdr, self)
233 }
234
235 pub fn rfind_iter<'s, R: Read + Seek>(
264 &'n self,
265 rdr: &'s mut R,
266 ) -> io::Result<FindRevIter<'n, 's, R>> {
267 FindRevIter::new(rdr, self)
268 }
269}
270
271#[derive(Debug)]
275pub struct FindIter<'n, 's, R: Read> {
276 rdr: &'s mut R,
278 needle: &'n [u8],
280 buf: Buffer,
282 search_pos: usize,
284 stream_pos: usize,
286 report_pos: usize,
288}
289
290#[derive(Debug)]
294pub struct FindRevIter<'n, 's, R: Read + Seek> {
295 rdr: &'s mut R,
297 needle: &'n [u8],
299 buf: BufferRev,
301 search_pos: usize,
303 stream_pos: usize,
305 report_pos: usize,
307 seek_pos: usize,
309 stream_len: usize,
311}
312
313impl<'n, 's, R: Read> FindIter<'n, 's, R> {
314 pub(crate) fn new(rdr: &'s mut R, fdr: &'n StreamFinder<'n>) -> Self {
315 let needle = fdr.needle();
316 let buf = Buffer::new(needle.len());
317 FindIter {
318 rdr,
319 needle,
320 buf,
321 search_pos: 0,
322 stream_pos: 0,
323 report_pos: 0,
324 }
325 }
326
327 pub(crate) fn new_with_needle(rdr: &'s mut R, needle: &'n [u8]) -> Self {
328 let buf = Buffer::new(needle.len());
329 FindIter {
330 rdr,
331 needle,
332 buf,
333 search_pos: 0,
334 stream_pos: 0,
335 report_pos: 0,
336 }
337 }
338}
339
340impl<'n, 's, R: Read + Seek> FindRevIter<'n, 's, R> {
341 pub(crate) fn new(
342 rdr: &'s mut R,
343 fdr: &'n StreamFinder<'n>,
344 ) -> io::Result<Self> {
345 let stream_len = rdr.seek(SeekFrom::End(0))?;
346 assert!(stream_len <= usize::MAX as u64);
347 let stream_len = stream_len as usize;
348
349 let needle = fdr.needle();
350 let buf = BufferRev::new(needle.len());
351 Ok(FindRevIter {
352 rdr,
353 needle,
354 buf,
355 search_pos: 0,
356 stream_pos: stream_len,
357 report_pos: 0,
358 seek_pos: stream_len,
359 stream_len,
360 })
361 }
362
363 pub(crate) fn new_with_needle(
364 rdr: &'s mut R,
365 needle: &'n [u8],
366 ) -> io::Result<Self> {
367 let stream_len = rdr.seek(SeekFrom::End(0))?;
368 assert!(stream_len <= usize::MAX as u64);
369 let stream_len = stream_len as usize;
370
371 let buf = BufferRev::new(needle.len());
372 Ok(FindRevIter {
373 rdr,
374 needle,
375 buf,
376 search_pos: 0,
377 stream_pos: stream_len,
378 report_pos: 0,
379 seek_pos: stream_len,
380 stream_len,
381 })
382 }
383
384 pub fn stream_len(&self) -> usize {
400 self.stream_len
401 }
402
403 pub fn seek_to(&mut self, pos: usize) -> io::Result<()> {
426 self.rdr.seek(SeekFrom::Start(pos as u64)).map(|_| ())
427 }
428}
429
430impl<'n, 's, R: Read> Iterator for FindIter<'n, 's, R> {
431 type Item = io::Result<usize>;
432
433 fn next(&mut self) -> Option<Self::Item> {
434 loop {
435 if self.search_pos < self.buf.len() {
436 if let Some(mat) = memmem::find(
437 &self.buf.buffer()[self.search_pos..],
438 self.needle,
439 ) {
440 self.report_pos = self.stream_pos + mat;
441 self.stream_pos += mat + self.needle.len();
442 self.search_pos += mat + self.needle.len();
443 return Some(Ok(self.report_pos));
444 }
445
446 self.stream_pos += self.buf.len() - self.search_pos;
447 self.search_pos = self.buf.len();
448 }
449
450 if self.buf.len() >= self.buf.min_buffer_len() {
452 self.buf.roll();
453 if &self.buf.buffer()[..self.buf.min_buffer_len()]
454 == self.needle
455 {
456 self.search_pos = self.buf.min_buffer_len();
457 } else {
458 self.stream_pos -= self.buf.min_buffer_len();
459 self.search_pos = 0;
460 }
461 }
462 match self.buf.fill(&mut self.rdr) {
463 Err(err) => return Some(Err(err)),
465 Ok(false) => {
467 return None;
468 }
469 Ok(true) => {}
471 }
472 }
473 }
474}
475
476impl<'n, 's, R: Read + Seek> Iterator for FindRevIter<'n, 's, R> {
477 type Item = io::Result<usize>;
478
479 fn next(&mut self) -> Option<Self::Item> {
480 loop {
481 if self.search_pos < self.buf.len() {
483 if let Some(mat) = memmem::rfind(
484 &self.buf.buffer()[..self.buf.len() - self.search_pos],
485 self.needle,
486 ) {
487 self.report_pos = self.stream_pos
488 - (self.buf.len() - self.search_pos - mat);
489 self.stream_pos -= self.buf.len() - self.search_pos - mat;
490 self.search_pos += self.buf.len() - self.search_pos - mat;
491
492 if self.stream_len > self.buf.capacity()
495 && self.seek_pos == 0
496 {
497 return Some(Ok(self.report_pos + self.needle.len()));
498 }
499
500 return Some(Ok(self.report_pos));
501 }
502
503 self.stream_pos = self
504 .stream_pos
505 .saturating_sub(self.buf.len() - self.search_pos);
506 self.search_pos = self.buf.len();
507 }
508
509 if self.seek_pos == 0 {
511 return None;
512 }
513
514 if self.buf.len() >= self.buf.min_buffer_len() {
516 self.buf.roll_right();
517
518 if &self.buf.buffer()
519 [self.buf.len() - self.buf.min_buffer_len()..]
520 == self.needle
521 {
522 self.search_pos = self.buf.min_buffer_len();
523 } else {
524 self.stream_pos += self.buf.min_buffer_len();
525 self.search_pos = 0;
526 }
527 }
528
529 let free_buffer_len = self.buf.free_buffer().len();
530 let amount = if self.stream_pos > free_buffer_len {
531 self.seek_pos -= free_buffer_len;
532 free_buffer_len
533 } else {
534 self.seek_pos = 0;
535 self.stream_pos
536 };
537 match self.rdr.seek(SeekFrom::Start(self.seek_pos as u64)) {
538 Ok(_) => {}
539 Err(e) => return Some(Err(e)),
540 }
541 match self.buf.fill_exact(&mut self.rdr, amount) {
542 Err(err) => return Some(Err(err)),
544 Ok(false) => {
546 return None;
547 }
548 Ok(true) => {}
550 }
551 }
552 }
553}
554
555#[cfg(test)]
556mod tests {
557 use super::*;
558 use crate::buffer::DEFAULT_BUFFER_CAPACITY;
559 use std::io::Cursor;
560 use std::iter::repeat;
561
562 #[test]
563 fn test_find_iter_n1s1() {
564 let haystack = b"1";
565 let mut haystack = Cursor::new(haystack);
566
567 let finder = StreamFinder::new(b"1");
568 let matches: Vec<usize> =
569 finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
570 let expected: Vec<usize> = vec![0];
571 assert_eq!(matches, expected);
572
573 let finder = StreamFinder::new(b"0");
574 let matches: Vec<usize> =
575 finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
576 let expected: Vec<usize> = vec![];
577 assert_eq!(matches, expected);
578 }
579
580 #[test]
581 fn test_find_rev_iter_n1s1() {
582 let haystack = b"1";
583 let mut haystack = Cursor::new(haystack);
584
585 let finder = StreamFinder::new(b"1");
586 let matches: Vec<usize> = finder
587 .rfind_iter(&mut haystack)
588 .unwrap()
589 .map(|x| x.unwrap())
590 .collect();
591 let expected: Vec<usize> = vec![0];
592 assert_eq!(matches, expected);
593
594 let finder = StreamFinder::new(b"0");
595 let matches: Vec<usize> = finder
596 .rfind_iter(&mut haystack)
597 .unwrap()
598 .map(|x| x.unwrap())
599 .collect();
600 let expected: Vec<usize> = vec![];
601 assert_eq!(matches, expected);
602 }
603
604 #[test]
605 fn test_find_iter_n2s1() {
606 let haystack = b"1";
607 let mut haystack = Cursor::new(haystack);
608
609 let finder = StreamFinder::new(b"11");
610 let matches: Vec<usize> =
611 finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
612 let expected: Vec<usize> = vec![];
613 assert_eq!(matches, expected);
614 }
615
616 #[test]
617 fn test_find_rev_iter_n2s1() {
618 let haystack = b"1";
619 let mut haystack = Cursor::new(haystack);
620
621 let finder = StreamFinder::new(b"11");
622 let matches: Vec<usize> = finder
623 .rfind_iter(&mut haystack)
624 .unwrap()
625 .map(|x| x.unwrap())
626 .collect();
627 let expected: Vec<usize> = vec![];
628 assert_eq!(matches, expected);
629 }
630
631 #[test]
632 fn test_find_iter_n1s21() {
633 let haystack = b"11 - 1 - 11 - 1 - 11";
634 let mut haystack = Cursor::new(haystack);
635
636 let finder = StreamFinder::new(b"1");
637 let matches: Vec<usize> =
638 finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
639 let expected: Vec<usize> = vec![0, 1, 5, 9, 10, 15, 19, 20];
640 assert_eq!(matches, expected);
641 }
642
643 #[test]
644 fn test_find_rev_iter_n1s21() {
645 let haystack = b"11 - 1 - 11 - 1 - 11";
646 let mut haystack = Cursor::new(haystack);
647
648 let finder = StreamFinder::new(b"1");
649 let matches: Vec<usize> = finder
650 .rfind_iter(&mut haystack)
651 .unwrap()
652 .map(|x| x.unwrap())
653 .collect();
654 let expected: Vec<usize> =
655 vec![0, 1, 5, 9, 10, 15, 19, 20].into_iter().rev().collect();
656 assert_eq!(matches, expected);
657 }
658
659 #[test]
660 fn test_find_iter_n1s8213() {
661 let haystack: Vec<u8> = repeat(&0u8)
662 .take(DEFAULT_BUFFER_CAPACITY)
663 .chain("42 0 42 42 0 42".as_bytes())
664 .copied()
665 .collect();
666 let mut haystack = Cursor::new(haystack);
667
668 let finder = StreamFinder::new(b"4");
669 let matches: Vec<usize> =
670 finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
671 let expected: Vec<usize> = vec![0, 5, 8, 13]
672 .into_iter()
673 .map(|x| x + DEFAULT_BUFFER_CAPACITY)
674 .collect();
675 assert_eq!(matches, expected);
676 }
677
678 #[test]
679 fn test_find_rev_iter_n1s8213() {
680 let haystack: Vec<u8> = repeat(&0u8)
681 .take(DEFAULT_BUFFER_CAPACITY)
682 .chain("42 0 42 42 0 42".as_bytes())
683 .copied()
684 .collect();
685 let mut haystack = Cursor::new(haystack);
686
687 let finder = StreamFinder::new(b"4");
688 let matches: Vec<usize> = finder
689 .rfind_iter(&mut haystack)
690 .unwrap()
691 .map(|x| x.unwrap())
692 .collect();
693 let expected: Vec<usize> = vec![0, 5, 8, 13]
694 .into_iter()
695 .map(|x| x + DEFAULT_BUFFER_CAPACITY)
696 .rev()
697 .collect();
698 assert_eq!(matches, expected);
699 }
700
701 #[test]
702 fn test_find_iter_n2s8213() {
703 let haystack: Vec<u8> = repeat(&0u8)
704 .take(DEFAULT_BUFFER_CAPACITY)
705 .chain("42 0 42 42 0 42".as_bytes())
706 .copied()
707 .collect();
708 let mut haystack = Cursor::new(haystack);
709
710 let finder = StreamFinder::new(b"42");
711 let matches: Vec<usize> =
712 finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
713 let expected: Vec<usize> = vec![0, 5, 8, 13]
714 .into_iter()
715 .map(|x| x + DEFAULT_BUFFER_CAPACITY)
716 .collect();
717 assert_eq!(matches, expected);
718 }
719
720 #[test]
721 fn test_find_rev_iter_n2s8213() {
722 let haystack: Vec<u8> = repeat(&0u8)
723 .take(DEFAULT_BUFFER_CAPACITY)
724 .chain("42 0 42 42 0 42".as_bytes())
725 .copied()
726 .collect();
727 let mut haystack = Cursor::new(haystack);
728
729 let finder = StreamFinder::new(b"42");
730 let matches: Vec<usize> = finder
731 .rfind_iter(&mut haystack)
732 .unwrap()
733 .map(|x| x.unwrap())
734 .collect();
735 let expected: Vec<usize> = vec![0, 5, 8, 13]
736 .into_iter()
737 .map(|x| x + DEFAULT_BUFFER_CAPACITY)
738 .rev()
739 .collect();
740 assert_eq!(matches, expected);
741 }
742
743 #[test]
744 fn test_find_iter_n2s8212() {
745 let haystack: Vec<u8> = repeat(&0u8)
746 .take(DEFAULT_BUFFER_CAPACITY - 1)
747 .chain("42 0 42 42 0 42".as_bytes())
748 .copied()
749 .collect();
750 let mut haystack = Cursor::new(haystack);
751
752 let finder = StreamFinder::new(b"42");
753 let matches: Vec<usize> =
754 finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
755 let expected: Vec<usize> = vec![0, 5, 8, 13]
756 .into_iter()
757 .map(|x| x + DEFAULT_BUFFER_CAPACITY - 1)
758 .collect();
759 assert_eq!(matches, expected);
760 }
761
762 #[test]
763 fn test_find_rev_iter_n2s8212() {
764 let haystack: Vec<u8> = repeat(&0u8)
765 .take(DEFAULT_BUFFER_CAPACITY - 1)
766 .chain("42 0 42 42 0 42".as_bytes())
767 .copied()
768 .collect();
769 let mut haystack = Cursor::new(haystack);
770
771 let finder = StreamFinder::new(b"42");
772 let matches: Vec<usize> = finder
773 .rfind_iter(&mut haystack)
774 .unwrap()
775 .map(|x| x.unwrap())
776 .collect();
777 let expected: Vec<usize> = vec![0, 5, 8, 13]
778 .into_iter()
779 .map(|x| x + DEFAULT_BUFFER_CAPACITY - 1)
780 .rev()
781 .collect();
782 assert_eq!(matches, expected);
783 }
784
785 #[test]
786 fn test_find_iter_n3s8212() {
787 let haystack: Vec<u8> = repeat(&0u8)
788 .take(DEFAULT_BUFFER_CAPACITY - 1)
789 .chain("42 0 42 42 0 42".as_bytes())
790 .copied()
791 .collect();
792 let mut haystack = Cursor::new(haystack);
793
794 let finder = StreamFinder::new(b"42 ");
795 let matches: Vec<usize> =
796 finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
797 let expected: Vec<usize> = vec![0, 5, 8]
798 .into_iter()
799 .map(|x| x + DEFAULT_BUFFER_CAPACITY - 1)
800 .collect();
801 assert_eq!(matches, expected);
802 }
803
804 #[test]
805 fn test_find_rev_iter_n3s8212() {
806 let haystack: Vec<u8> = repeat(&0u8)
807 .take(DEFAULT_BUFFER_CAPACITY - 1)
808 .chain("42 0 42 42 0 42".as_bytes())
809 .copied()
810 .collect();
811 let mut haystack = Cursor::new(haystack);
812
813 let finder = StreamFinder::new(b"42 ");
814 let matches: Vec<usize> = finder
815 .rfind_iter(&mut haystack)
816 .unwrap()
817 .map(|x| x.unwrap())
818 .collect();
819 let expected: Vec<usize> = vec![0, 5, 8]
820 .into_iter()
821 .map(|x| x + DEFAULT_BUFFER_CAPACITY - 1)
822 .rev()
823 .collect();
824 assert_eq!(matches, expected);
825 }
826}