use crate::result::ZipResult;
use memchr::memmem::{Finder, FinderRev};
use std::io::{Read, Seek, SeekFrom};
const MAGIC_LENGTH: usize = 4;
type MagicLengthByteArray = [u8; MAGIC_LENGTH];
pub(crate) trait FinderDirection<'a, B> {
fn new(needle: &'a B) -> Self;
fn reset_cursor(bounds: (u64, u64), window_size: usize) -> u64;
fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize);
fn needle(&self) -> &[u8];
fn find(&self, haystack: &[u8]) -> Option<usize>;
fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64>;
fn move_scope(&self, offset: usize) -> usize;
}
pub(crate) struct Forward<'a>(Finder<'a>);
impl<'a> FinderDirection<'a, MagicLengthByteArray> for Forward<'a> {
fn new(needle: &'a MagicLengthByteArray) -> Self {
Self(Finder::new(needle))
}
fn reset_cursor((start_inclusive, _): (u64, u64), _: usize) -> u64 {
start_inclusive
}
fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize) {
(&window[mid_window_offset..], mid_window_offset)
}
fn find(&self, haystack: &[u8]) -> Option<usize> {
self.0.find(haystack)
}
fn needle(&self) -> &[u8] {
self.0.needle()
}
fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64> {
let magic_overlap = MAGIC_LENGTH.saturating_sub(1) as u64;
let next = cursor.saturating_add(window_size as u64 - magic_overlap);
if next >= bounds.1 { None } else { Some(next) }
}
fn move_scope(&self, offset: usize) -> usize {
offset + MAGIC_LENGTH
}
}
pub(crate) struct Backwards<'a>(FinderRev<'a>);
impl<'a> FinderDirection<'a, MagicLengthByteArray> for Backwards<'a> {
fn new(needle: &'a MagicLengthByteArray) -> Self {
Self(FinderRev::new(needle))
}
fn reset_cursor(bounds: (u64, u64), window_size: usize) -> u64 {
bounds
.1
.saturating_sub(window_size as u64)
.clamp(bounds.0, bounds.1)
}
fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize) {
(&window[..mid_window_offset], 0)
}
fn find(&self, haystack: &[u8]) -> Option<usize> {
self.0.rfind(haystack)
}
fn needle(&self) -> &[u8] {
self.0.needle()
}
fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64> {
let magic_overlap = MAGIC_LENGTH.saturating_sub(1) as u64;
if cursor <= bounds.0 {
None
} else {
Some(
cursor
.saturating_add(magic_overlap)
.saturating_sub(window_size as u64)
.clamp(bounds.0, bounds.1),
)
}
}
fn move_scope(&self, offset: usize) -> usize {
offset
}
}
const BUFFER_SIZE: usize = 1024;
pub(crate) struct MagicFinder<Direction> {
buffer: [u8; BUFFER_SIZE],
pub(self) finder: Direction,
cursor: u64,
mid_buffer_offset: Option<usize>,
bounds: (u64, u64),
}
impl<'a, T: FinderDirection<'a, MagicLengthByteArray>> MagicFinder<T> {
pub(crate) fn new(
magic_bytes: &'a MagicLengthByteArray,
start_inclusive: u64,
end_exclusive: u64,
) -> Self {
Self {
buffer: [0; BUFFER_SIZE],
finder: T::new(magic_bytes),
cursor: T::reset_cursor((start_inclusive, end_exclusive), BUFFER_SIZE),
mid_buffer_offset: None,
bounds: (start_inclusive, end_exclusive),
}
}
pub(crate) fn repurpose(
&mut self,
magic_bytes: &'a MagicLengthByteArray,
bounds: (u64, u64),
) -> &mut Self {
self.finder = T::new(magic_bytes);
self.cursor = T::reset_cursor(bounds, self.buffer.len());
self.bounds = bounds;
self.mid_buffer_offset = None;
self
}
pub(crate) fn next<R: Read + Seek + ?Sized>(
&mut self,
reader: &mut R,
) -> ZipResult<Option<u64>> {
loop {
if self.cursor < self.bounds.0 || self.cursor >= self.bounds.1 {
break;
}
let window_start = self.cursor;
let window_end = self
.cursor
.saturating_add(self.buffer.len() as u64)
.min(self.bounds.1);
if window_end <= window_start {
break;
}
let window = &mut self.buffer[..(window_end - window_start) as usize];
if self.mid_buffer_offset.is_none() {
reader.seek(SeekFrom::Start(window_start))?;
if let Err(e) = reader.read_exact(window) {
if e.kind() == std::io::ErrorKind::UnexpectedEof {
break;
}
return Err(e.into());
}
}
let (window, window_start_offset) = match self.mid_buffer_offset {
Some(mid_buffer_offset) => T::scope_window(window, mid_buffer_offset),
None => (&*window, 0usize),
};
if let Some(offset) = self.finder.find(window) {
let magic_pos = window_start + window_start_offset as u64 + offset as u64;
reader.seek(SeekFrom::Start(magic_pos))?;
self.mid_buffer_offset = Some(self.finder.move_scope(window_start_offset + offset));
return Ok(Some(magic_pos));
}
self.mid_buffer_offset = None;
if let Some(new_cursor) =
self.finder
.move_cursor(self.cursor, self.bounds, self.buffer.len())
{
self.cursor = new_cursor;
} else {
self.bounds.0 = self.bounds.1;
break;
}
}
Ok(None)
}
}
pub(crate) struct OptimisticMagicFinder<Direction> {
inner: MagicFinder<Direction>,
initial_guess: Option<(u64, bool)>,
}
impl<'a, Direction: FinderDirection<'a, MagicLengthByteArray>> OptimisticMagicFinder<Direction> {
pub(crate) fn new_empty() -> Self {
Self {
inner: MagicFinder::new(&[0, 0, 0, 0], 0, 0),
initial_guess: None,
}
}
pub(crate) fn repurpose(
&mut self,
magic_bytes: &'a MagicLengthByteArray,
bounds: (u64, u64),
initial_guess: Option<(u64, bool)>,
) -> &mut Self {
self.inner.repurpose(magic_bytes, bounds);
self.initial_guess = initial_guess;
self
}
pub(crate) fn next<R: Read + Seek + ?Sized>(
&mut self,
reader: &mut R,
) -> ZipResult<Option<u64>> {
if let Some((v, mandatory)) = self.initial_guess {
reader.seek(SeekFrom::Start(v))?;
let mut buffer = MagicLengthByteArray::default();
if v.saturating_add(buffer.len() as u64) <= self.inner.bounds.1 {
if let Err(e) = reader.read_exact(&mut buffer) {
if e.kind() == std::io::ErrorKind::UnexpectedEof {
if mandatory {
return Ok(None);
}
} else {
return Err(e.into());
}
} else if self.inner.finder.needle() == buffer {
self.initial_guess.take();
reader.seek(SeekFrom::Start(v))?;
return Ok(Some(v));
}
}
if mandatory {
return Ok(None);
}
self.initial_guess.take();
}
self.inner.next(reader)
}
}