use crate::buffer::{Buffer, BufferRev};
use memchr::memmem;
use std::io::{self, Read, Seek, SeekFrom};
pub fn find<R>(needle: &[u8], rdr: &mut R) -> Option<io::Result<usize>>
where
R: Read,
{
FindIter::new_with_needle(rdr, needle).next()
}
pub fn rfind<R>(needle: &[u8], rdr: &mut R) -> Option<io::Result<usize>>
where
R: Read + Seek,
{
match FindRevIter::new_with_needle(rdr, needle) {
Ok(mut iter) => iter.next(),
Err(e) => Some(Err(e)),
}
}
pub fn find_iter<'n, 's, R>(
needle: &'n [u8],
rdr: &'s mut R,
) -> FindIter<'n, 's, R>
where
R: Read,
{
FindIter::new_with_needle(rdr, needle)
}
pub fn rfind_iter<'n, 's, R>(
needle: &'n [u8],
rdr: &'s mut R,
) -> io::Result<FindRevIter<'n, 's, R>>
where
R: Read + Seek,
{
FindRevIter::new_with_needle(rdr, needle)
}
#[derive(Clone, Debug)]
pub struct StreamFinder<'n> {
needle: &'n [u8],
}
impl<'n> StreamFinder<'n> {
pub fn new(needle: &'n [u8]) -> StreamFinder<'n> {
StreamFinder { needle }
}
pub fn needle(&self) -> &[u8] {
self.needle
}
pub fn find<R: Read>(&self, rdr: &mut R) -> Option<io::Result<usize>> {
self.find_iter(rdr).next()
}
pub fn rfind<R: Read + Seek>(
&self,
rdr: &mut R,
) -> Option<io::Result<usize>> {
match self.rfind_iter(rdr) {
Ok(mut iter) => iter.next(),
Err(e) => Some(Err(e)),
}
}
pub fn find_iter<'s, R: Read>(
&'n self,
rdr: &'s mut R,
) -> FindIter<'n, 's, R> {
FindIter::new(rdr, self)
}
pub fn rfind_iter<'s, R: Read + Seek>(
&'n self,
rdr: &'s mut R,
) -> io::Result<FindRevIter<'n, 's, R>> {
FindRevIter::new(rdr, self)
}
}
#[derive(Debug)]
pub struct FindIter<'n, 's, R: Read> {
rdr: &'s mut R,
needle: &'n [u8],
buf: Buffer,
search_pos: usize,
stream_pos: usize,
report_pos: usize,
}
#[derive(Debug)]
pub struct FindRevIter<'n, 's, R: Read + Seek> {
rdr: &'s mut R,
needle: &'n [u8],
buf: BufferRev,
search_pos: usize,
stream_pos: usize,
report_pos: usize,
seek_pos: usize,
stream_len: usize,
}
impl<'n, 's, R: Read> FindIter<'n, 's, R> {
pub(crate) fn new(rdr: &'s mut R, fdr: &'n StreamFinder<'n>) -> Self {
let needle = fdr.needle();
let buf = Buffer::new(needle.len());
FindIter {
rdr,
needle,
buf,
search_pos: 0,
stream_pos: 0,
report_pos: 0,
}
}
pub(crate) fn new_with_needle(rdr: &'s mut R, needle: &'n [u8]) -> Self {
let buf = Buffer::new(needle.len());
FindIter {
rdr,
needle,
buf,
search_pos: 0,
stream_pos: 0,
report_pos: 0,
}
}
}
impl<'n, 's, R: Read + Seek> FindRevIter<'n, 's, R> {
pub(crate) fn new(
rdr: &'s mut R,
fdr: &'n StreamFinder<'n>,
) -> io::Result<Self> {
let stream_len = rdr.seek(SeekFrom::End(0))?;
assert!(stream_len <= usize::MAX as u64);
let stream_len = stream_len as usize;
let needle = fdr.needle();
let buf = BufferRev::new(needle.len());
Ok(FindRevIter {
rdr,
needle,
buf,
search_pos: 0,
stream_pos: stream_len,
report_pos: 0,
seek_pos: stream_len,
stream_len,
})
}
pub(crate) fn new_with_needle(
rdr: &'s mut R,
needle: &'n [u8],
) -> io::Result<Self> {
let stream_len = rdr.seek(SeekFrom::End(0))?;
assert!(stream_len <= usize::MAX as u64);
let stream_len = stream_len as usize;
let buf = BufferRev::new(needle.len());
Ok(FindRevIter {
rdr,
needle,
buf,
search_pos: 0,
stream_pos: stream_len,
report_pos: 0,
seek_pos: stream_len,
stream_len,
})
}
pub fn stream_len(&self) -> usize {
self.stream_len
}
pub fn seek_to(&mut self, pos: usize) -> io::Result<()> {
self.rdr.seek(SeekFrom::Start(pos as u64)).map(|_| ())
}
}
impl<'n, 's, R: Read> Iterator for FindIter<'n, 's, R> {
type Item = io::Result<usize>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.search_pos < self.buf.len() {
if let Some(mat) = memmem::find(
&self.buf.buffer()[self.search_pos..],
self.needle,
) {
self.report_pos = self.stream_pos + mat;
self.stream_pos += mat + self.needle.len();
self.search_pos += mat + self.needle.len();
return Some(Ok(self.report_pos));
}
self.stream_pos += self.buf.len() - self.search_pos;
self.search_pos = self.buf.len();
}
if self.buf.len() >= self.buf.min_buffer_len() {
self.buf.roll();
if &self.buf.buffer()[..self.buf.min_buffer_len()]
== self.needle
{
self.search_pos = self.buf.min_buffer_len();
} else {
self.stream_pos -= self.buf.min_buffer_len();
self.search_pos = 0;
}
}
match self.buf.fill(&mut self.rdr) {
Err(err) => return Some(Err(err)),
Ok(false) => {
return None;
}
Ok(true) => {}
}
}
}
}
impl<'n, 's, R: Read + Seek> Iterator for FindRevIter<'n, 's, R> {
type Item = io::Result<usize>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.search_pos < self.buf.len() {
if let Some(mat) = memmem::rfind(
&self.buf.buffer()[..self.buf.len() - self.search_pos],
self.needle,
) {
self.report_pos = self.stream_pos
- (self.buf.len() - self.search_pos - mat);
self.stream_pos -= self.buf.len() - self.search_pos - mat;
self.search_pos += self.buf.len() - self.search_pos - mat;
if self.stream_len > self.buf.capacity()
&& self.seek_pos == 0
{
return Some(Ok(self.report_pos + self.needle.len()));
}
return Some(Ok(self.report_pos));
}
self.stream_pos = self
.stream_pos
.saturating_sub(self.buf.len() - self.search_pos);
self.search_pos = self.buf.len();
}
if self.seek_pos == 0 {
return None;
}
if self.buf.len() >= self.buf.min_buffer_len() {
self.buf.roll_right();
if &self.buf.buffer()
[self.buf.len() - self.buf.min_buffer_len()..]
== self.needle
{
self.search_pos = self.buf.min_buffer_len();
} else {
self.stream_pos += self.buf.min_buffer_len();
self.search_pos = 0;
}
}
let free_buffer_len = self.buf.free_buffer().len();
let amount = if self.stream_pos > free_buffer_len {
self.seek_pos -= free_buffer_len;
free_buffer_len
} else {
self.seek_pos = 0;
self.stream_pos
};
match self.rdr.seek(SeekFrom::Start(self.seek_pos as u64)) {
Ok(_) => {}
Err(e) => return Some(Err(e)),
}
match self.buf.fill_exact(&mut self.rdr, amount) {
Err(err) => return Some(Err(err)),
Ok(false) => {
return None;
}
Ok(true) => {}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::buffer::DEFAULT_BUFFER_CAPACITY;
use std::io::Cursor;
use std::iter::repeat;
#[test]
fn test_find_iter_n1s1() {
let haystack = b"1";
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"1");
let matches: Vec<usize> =
finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
let expected: Vec<usize> = vec![0];
assert_eq!(matches, expected);
let finder = StreamFinder::new(b"0");
let matches: Vec<usize> =
finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
let expected: Vec<usize> = vec![];
assert_eq!(matches, expected);
}
#[test]
fn test_find_rev_iter_n1s1() {
let haystack = b"1";
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"1");
let matches: Vec<usize> = finder
.rfind_iter(&mut haystack)
.unwrap()
.map(|x| x.unwrap())
.collect();
let expected: Vec<usize> = vec![0];
assert_eq!(matches, expected);
let finder = StreamFinder::new(b"0");
let matches: Vec<usize> = finder
.rfind_iter(&mut haystack)
.unwrap()
.map(|x| x.unwrap())
.collect();
let expected: Vec<usize> = vec![];
assert_eq!(matches, expected);
}
#[test]
fn test_find_iter_n2s1() {
let haystack = b"1";
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"11");
let matches: Vec<usize> =
finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
let expected: Vec<usize> = vec![];
assert_eq!(matches, expected);
}
#[test]
fn test_find_rev_iter_n2s1() {
let haystack = b"1";
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"11");
let matches: Vec<usize> = finder
.rfind_iter(&mut haystack)
.unwrap()
.map(|x| x.unwrap())
.collect();
let expected: Vec<usize> = vec![];
assert_eq!(matches, expected);
}
#[test]
fn test_find_iter_n1s21() {
let haystack = b"11 - 1 - 11 - 1 - 11";
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"1");
let matches: Vec<usize> =
finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
let expected: Vec<usize> = vec![0, 1, 5, 9, 10, 15, 19, 20];
assert_eq!(matches, expected);
}
#[test]
fn test_find_rev_iter_n1s21() {
let haystack = b"11 - 1 - 11 - 1 - 11";
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"1");
let matches: Vec<usize> = finder
.rfind_iter(&mut haystack)
.unwrap()
.map(|x| x.unwrap())
.collect();
let expected: Vec<usize> =
vec![0, 1, 5, 9, 10, 15, 19, 20].into_iter().rev().collect();
assert_eq!(matches, expected);
}
#[test]
fn test_find_iter_n1s8213() {
let haystack: Vec<u8> = repeat(&0u8)
.take(DEFAULT_BUFFER_CAPACITY)
.chain("42 0 42 42 0 42".as_bytes())
.copied()
.collect();
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"4");
let matches: Vec<usize> =
finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
let expected: Vec<usize> = vec![0, 5, 8, 13]
.into_iter()
.map(|x| x + DEFAULT_BUFFER_CAPACITY)
.collect();
assert_eq!(matches, expected);
}
#[test]
fn test_find_rev_iter_n1s8213() {
let haystack: Vec<u8> = repeat(&0u8)
.take(DEFAULT_BUFFER_CAPACITY)
.chain("42 0 42 42 0 42".as_bytes())
.copied()
.collect();
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"4");
let matches: Vec<usize> = finder
.rfind_iter(&mut haystack)
.unwrap()
.map(|x| x.unwrap())
.collect();
let expected: Vec<usize> = vec![0, 5, 8, 13]
.into_iter()
.map(|x| x + DEFAULT_BUFFER_CAPACITY)
.rev()
.collect();
assert_eq!(matches, expected);
}
#[test]
fn test_find_iter_n2s8213() {
let haystack: Vec<u8> = repeat(&0u8)
.take(DEFAULT_BUFFER_CAPACITY)
.chain("42 0 42 42 0 42".as_bytes())
.copied()
.collect();
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"42");
let matches: Vec<usize> =
finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
let expected: Vec<usize> = vec![0, 5, 8, 13]
.into_iter()
.map(|x| x + DEFAULT_BUFFER_CAPACITY)
.collect();
assert_eq!(matches, expected);
}
#[test]
fn test_find_rev_iter_n2s8213() {
let haystack: Vec<u8> = repeat(&0u8)
.take(DEFAULT_BUFFER_CAPACITY)
.chain("42 0 42 42 0 42".as_bytes())
.copied()
.collect();
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"42");
let matches: Vec<usize> = finder
.rfind_iter(&mut haystack)
.unwrap()
.map(|x| x.unwrap())
.collect();
let expected: Vec<usize> = vec![0, 5, 8, 13]
.into_iter()
.map(|x| x + DEFAULT_BUFFER_CAPACITY)
.rev()
.collect();
assert_eq!(matches, expected);
}
#[test]
fn test_find_iter_n2s8212() {
let haystack: Vec<u8> = repeat(&0u8)
.take(DEFAULT_BUFFER_CAPACITY - 1)
.chain("42 0 42 42 0 42".as_bytes())
.copied()
.collect();
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"42");
let matches: Vec<usize> =
finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
let expected: Vec<usize> = vec![0, 5, 8, 13]
.into_iter()
.map(|x| x + DEFAULT_BUFFER_CAPACITY - 1)
.collect();
assert_eq!(matches, expected);
}
#[test]
fn test_find_rev_iter_n2s8212() {
let haystack: Vec<u8> = repeat(&0u8)
.take(DEFAULT_BUFFER_CAPACITY - 1)
.chain("42 0 42 42 0 42".as_bytes())
.copied()
.collect();
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"42");
let matches: Vec<usize> = finder
.rfind_iter(&mut haystack)
.unwrap()
.map(|x| x.unwrap())
.collect();
let expected: Vec<usize> = vec![0, 5, 8, 13]
.into_iter()
.map(|x| x + DEFAULT_BUFFER_CAPACITY - 1)
.rev()
.collect();
assert_eq!(matches, expected);
}
#[test]
fn test_find_iter_n3s8212() {
let haystack: Vec<u8> = repeat(&0u8)
.take(DEFAULT_BUFFER_CAPACITY - 1)
.chain("42 0 42 42 0 42".as_bytes())
.copied()
.collect();
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"42 ");
let matches: Vec<usize> =
finder.find_iter(&mut haystack).map(|x| x.unwrap()).collect();
let expected: Vec<usize> = vec![0, 5, 8]
.into_iter()
.map(|x| x + DEFAULT_BUFFER_CAPACITY - 1)
.collect();
assert_eq!(matches, expected);
}
#[test]
fn test_find_rev_iter_n3s8212() {
let haystack: Vec<u8> = repeat(&0u8)
.take(DEFAULT_BUFFER_CAPACITY - 1)
.chain("42 0 42 42 0 42".as_bytes())
.copied()
.collect();
let mut haystack = Cursor::new(haystack);
let finder = StreamFinder::new(b"42 ");
let matches: Vec<usize> = finder
.rfind_iter(&mut haystack)
.unwrap()
.map(|x| x.unwrap())
.collect();
let expected: Vec<usize> = vec![0, 5, 8]
.into_iter()
.map(|x| x + DEFAULT_BUFFER_CAPACITY - 1)
.rev()
.collect();
assert_eq!(matches, expected);
}
}