use std::fs::{File, OpenOptions};
use std::io::{self, Read, Seek, SeekFrom, Write};
use std::path::{Path, PathBuf};
pub const MMAP_THRESHOLD: usize = 10 * 1024 * 1024;
pub struct LargeBuffer {
file: File,
path: PathBuf,
size: usize,
cleanup: bool,
read_pos: usize,
}
impl LargeBuffer {
pub fn new() -> io::Result<Self> {
let path = std::env::temp_dir().join(format!(
"rust_expect_buffer_{}_{}",
std::process::id(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0)
));
Self::with_path(&path)
}
pub fn with_path(path: &Path) -> io::Result<Self> {
let file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(path)?;
Ok(Self {
file,
path: path.to_path_buf(),
size: 0,
cleanup: true,
read_pos: 0,
})
}
pub const fn set_cleanup(&mut self, cleanup: bool) {
self.cleanup = cleanup;
}
#[must_use]
pub fn path(&self) -> &Path {
&self.path
}
pub fn append(&mut self, data: &[u8]) -> io::Result<()> {
self.file.seek(SeekFrom::End(0))?;
self.file.write_all(data)?;
self.size += data.len();
Ok(())
}
#[must_use]
pub const fn len(&self) -> usize {
self.size
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.size == 0
}
pub fn read_range(&mut self, start: usize, len: usize) -> io::Result<Vec<u8>> {
if start >= self.size {
return Ok(Vec::new());
}
let actual_len = len.min(self.size - start);
let mut buf = vec![0u8; actual_len];
self.file.seek(SeekFrom::Start(start as u64))?;
self.file.read_exact(&mut buf)?;
Ok(buf)
}
pub fn read_all(&mut self) -> io::Result<Vec<u8>> {
self.read_range(0, self.size)
}
pub fn tail(&mut self, n: usize) -> io::Result<Vec<u8>> {
let start = self.size.saturating_sub(n);
self.read_range(start, n)
}
pub fn head(&mut self, n: usize) -> io::Result<Vec<u8>> {
self.read_range(0, n)
}
pub fn clear(&mut self) -> io::Result<()> {
self.file.set_len(0)?;
self.size = 0;
self.read_pos = 0;
Ok(())
}
pub fn find(&mut self, needle: &[u8]) -> io::Result<Option<usize>> {
const CHUNK_SIZE: usize = 64 * 1024;
if needle.is_empty() {
return Ok(Some(0));
}
if needle.len() > self.size {
return Ok(None);
}
let mut pos = 0;
let mut overlap = Vec::new();
self.file.seek(SeekFrom::Start(0))?;
while pos < self.size {
let read_size = CHUNK_SIZE.min(self.size - pos);
let mut chunk = vec![0u8; read_size];
self.file.read_exact(&mut chunk)?;
let search_data = if overlap.is_empty() {
chunk.clone()
} else {
let mut combined = overlap.clone();
combined.extend(&chunk);
combined
};
if let Some(idx) = find_subsequence(&search_data, needle) {
let actual_pos = if overlap.is_empty() {
pos + idx
} else {
pos - overlap.len() + idx
};
return Ok(Some(actual_pos));
}
overlap = if chunk.len() >= needle.len() - 1 {
chunk[chunk.len() - (needle.len() - 1)..].to_vec()
} else {
chunk
};
pos += read_size;
}
Ok(None)
}
pub fn find_str(&mut self, needle: &str) -> io::Result<Option<usize>> {
self.find(needle.as_bytes())
}
pub fn as_str_lossy(&mut self) -> io::Result<String> {
let data = self.read_all()?;
Ok(String::from_utf8_lossy(&data).into_owned())
}
pub fn consume(&mut self, len: usize) -> io::Result<Vec<u8>> {
if len == 0 {
return Ok(Vec::new());
}
let consume_len = len.min(self.size);
let consumed = self.read_range(0, consume_len)?;
let remaining = self.read_range(consume_len, self.size - consume_len)?;
self.file.seek(SeekFrom::Start(0))?;
self.file.set_len(0)?;
self.file.write_all(&remaining)?;
self.size = remaining.len();
Ok(consumed)
}
pub fn sync(&self) -> io::Result<()> {
self.file.sync_all()
}
}
impl Drop for LargeBuffer {
fn drop(&mut self) {
if self.cleanup {
let _ = std::fs::remove_file(&self.path);
}
}
}
impl std::fmt::Debug for LargeBuffer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LargeBuffer")
.field("path", &self.path)
.field("size", &self.size)
.field("cleanup", &self.cleanup)
.finish()
}
}
fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option<usize> {
haystack
.windows(needle.len())
.position(|window| window == needle)
}
pub enum AdaptiveBuffer {
Memory(Vec<u8>),
File(LargeBuffer),
}
impl AdaptiveBuffer {
#[must_use]
pub const fn new() -> Self {
Self::Memory(Vec::new())
}
#[must_use]
pub const fn with_threshold(_threshold: usize) -> Self {
Self::Memory(Vec::new())
}
pub fn append(&mut self, data: &[u8], threshold: usize) -> io::Result<()> {
match self {
Self::Memory(buf) => {
if buf.len() + data.len() > threshold {
let mut large = LargeBuffer::new()?;
large.append(buf)?;
large.append(data)?;
*self = Self::File(large);
} else {
buf.extend_from_slice(data);
}
}
Self::File(large) => {
large.append(data)?;
}
}
Ok(())
}
#[must_use]
pub const fn len(&self) -> usize {
match self {
Self::Memory(buf) => buf.len(),
Self::File(large) => large.len(),
}
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.len() == 0
}
#[must_use]
pub const fn is_file_backed(&self) -> bool {
matches!(self, Self::File(_))
}
pub fn read_all(&mut self) -> io::Result<Vec<u8>> {
match self {
Self::Memory(buf) => Ok(buf.clone()),
Self::File(large) => large.read_all(),
}
}
pub fn tail(&mut self, n: usize) -> io::Result<Vec<u8>> {
match self {
Self::Memory(buf) => {
let start = buf.len().saturating_sub(n);
Ok(buf[start..].to_vec())
}
Self::File(large) => large.tail(n),
}
}
pub fn clear(&mut self) -> io::Result<()> {
match self {
Self::Memory(buf) => {
buf.clear();
Ok(())
}
Self::File(large) => large.clear(),
}
}
pub fn as_str_lossy(&mut self) -> io::Result<String> {
match self {
Self::Memory(buf) => Ok(String::from_utf8_lossy(buf).into_owned()),
Self::File(large) => large.as_str_lossy(),
}
}
}
impl Default for AdaptiveBuffer {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Debug for AdaptiveBuffer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Memory(buf) => f.debug_tuple("Memory").field(&buf.len()).finish(),
Self::File(large) => f.debug_tuple("File").field(large).finish(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn large_buffer_basic() {
let mut buf = LargeBuffer::new().unwrap();
buf.append(b"hello world").unwrap();
assert_eq!(buf.len(), 11);
assert_eq!(buf.read_all().unwrap(), b"hello world");
}
#[test]
fn large_buffer_find() {
let mut buf = LargeBuffer::new().unwrap();
buf.append(b"the quick brown fox").unwrap();
assert_eq!(buf.find(b"quick").unwrap(), Some(4));
assert_eq!(buf.find(b"lazy").unwrap(), None);
}
#[test]
fn large_buffer_tail() {
let mut buf = LargeBuffer::new().unwrap();
buf.append(b"hello world").unwrap();
assert_eq!(buf.tail(5).unwrap(), b"world");
}
#[test]
fn large_buffer_consume() {
let mut buf = LargeBuffer::new().unwrap();
buf.append(b"hello world").unwrap();
let consumed = buf.consume(6).unwrap();
assert_eq!(consumed, b"hello ");
assert_eq!(buf.read_all().unwrap(), b"world");
}
#[test]
fn adaptive_buffer_stays_memory() {
let mut buf = AdaptiveBuffer::new();
buf.append(b"small data", MMAP_THRESHOLD).unwrap();
assert!(!buf.is_file_backed());
}
#[test]
fn adaptive_buffer_switches_to_file() {
let mut buf = AdaptiveBuffer::new();
let threshold = 100;
let large_data = vec![b'x'; 150];
buf.append(&large_data, threshold).unwrap();
assert!(buf.is_file_backed());
assert_eq!(buf.len(), 150);
}
}