use anyhow::{Context, Result, bail};
use memmap2::Mmap;
use std::fs::File;
use std::io::Read;
use std::path::Path;
use crate::config::buffers::{max_source_file_size, mmap_threshold};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ReaderPolicy {
Buffered,
Mmap,
Auto {
threshold: u64,
},
}
impl Default for ReaderPolicy {
fn default() -> Self {
Self::Auto {
threshold: mmap_threshold(),
}
}
}
pub enum FileReader {
Mmap {
#[allow(dead_code)]
file: File,
mmap: Mmap,
},
Buffered {
data: Vec<u8>,
},
}
impl FileReader {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
Self::open_with_policy(path, ReaderPolicy::default())
}
pub fn open_with_policy<P: AsRef<Path>>(path: P, policy: ReaderPolicy) -> Result<Self> {
let path = path.as_ref();
let file =
File::open(path).with_context(|| format!("Failed to open file: {}", path.display()))?;
let metadata = file
.metadata()
.with_context(|| format!("Failed to read file metadata: {}", path.display()))?;
let file_size = metadata.len();
let max_size = max_source_file_size();
if file_size > max_size {
bail!(
"File too large to index: {} ({} MB exceeds {} MB limit). \
Adjust SQRY_MAX_SOURCE_FILE_SIZE environment variable if needed.",
path.display(),
file_size / (1024 * 1024),
max_size / (1024 * 1024)
);
}
let use_mmap = match policy {
ReaderPolicy::Buffered => false,
ReaderPolicy::Mmap => true,
ReaderPolicy::Auto { threshold } => file_size >= threshold,
};
if use_mmap {
match Self::try_mmap(file, path) {
Ok(reader) => Ok(reader),
Err(_e) => {
let mut file = File::open(path)?;
Self::read_buffered(&mut file, path)
}
}
} else {
let mut file_for_read = file;
Self::read_buffered(&mut file_for_read, path)
}
}
fn try_mmap(file: File, path: &Path) -> Result<Self> {
let mmap = unsafe {
Mmap::map(&file).with_context(|| format!("Failed to mmap file: {}", path.display()))?
};
Ok(FileReader::Mmap { file, mmap })
}
fn read_buffered(file: &mut File, path: &Path) -> Result<Self> {
let mut data = Vec::new();
file.read_to_end(&mut data)
.with_context(|| format!("Failed to read file: {}", path.display()))?;
Ok(FileReader::Buffered { data })
}
#[must_use]
pub fn as_slice(&self) -> &[u8] {
match self {
FileReader::Mmap { mmap, .. } => &mmap[..],
FileReader::Buffered { data } => &data[..],
}
}
#[must_use]
pub fn len(&self) -> usize {
self.as_slice().len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn chunks(&self, chunk_size: usize) -> impl Iterator<Item = &[u8]> {
self.as_slice().chunks(chunk_size)
}
}
impl AsRef<[u8]> for FileReader {
fn as_ref(&self) -> &[u8] {
self.as_slice()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
fn create_temp_file(size: usize) -> (NamedTempFile, Vec<u8>) {
let mut file = NamedTempFile::new().expect("Failed to create temp file");
#[allow(clippy::cast_possible_truncation)]
let data: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
file.write_all(&data).expect("Failed to write temp file");
file.flush().expect("Failed to flush temp file");
(file, data)
}
#[test]
fn test_buffered_small_file() {
let (file, expected_data) = create_temp_file(1024);
let reader = FileReader::open_with_policy(file.path(), ReaderPolicy::Buffered)
.expect("Failed to open file");
assert_eq!(reader.as_slice(), &expected_data[..]);
assert_eq!(reader.len(), 1024);
assert!(!reader.is_empty());
}
#[test]
fn test_mmap_large_file() {
let size = 15 * 1024 * 1024; let (file, expected_data) = create_temp_file(size);
let reader = FileReader::open_with_policy(file.path(), ReaderPolicy::Mmap)
.expect("Failed to open file");
assert_eq!(reader.as_slice(), &expected_data[..]);
assert_eq!(reader.len(), size);
}
#[test]
fn test_auto_policy_small_file() {
let (file, expected_data) = create_temp_file(1024);
let reader = FileReader::open_with_policy(
file.path(),
ReaderPolicy::Auto {
threshold: 10 * 1024 * 1024,
},
)
.expect("Failed to open file");
assert_eq!(reader.as_slice(), &expected_data[..]);
}
#[test]
fn test_auto_policy_large_file() {
let size = 15 * 1024 * 1024; let (file, expected_data) = create_temp_file(size);
let reader = FileReader::open_with_policy(
file.path(),
ReaderPolicy::Auto {
threshold: 10 * 1024 * 1024,
},
)
.expect("Failed to open file");
assert_eq!(reader.as_slice(), &expected_data[..]);
assert_eq!(reader.len(), size);
}
#[test]
fn test_chunks_iteration() {
let (file, _) = create_temp_file(1000);
let reader = FileReader::open(file.path()).expect("Failed to open file");
let chunks: Vec<_> = reader.chunks(100).collect();
assert_eq!(chunks.len(), 10);
assert_eq!(chunks[0].len(), 100);
assert_eq!(chunks[9].len(), 100);
}
#[test]
fn test_empty_file() {
let file = NamedTempFile::new().expect("Failed to create temp file");
let reader = FileReader::open(file.path()).expect("Failed to open file");
assert!(reader.is_empty());
assert_eq!(reader.len(), 0);
}
#[test]
fn test_threshold_boundary() {
let threshold = 5 * 1024;
let (file_small, data_small) =
create_temp_file(threshold.try_into().unwrap_or(usize::MAX).saturating_sub(1));
let reader_small =
FileReader::open_with_policy(file_small.path(), ReaderPolicy::Auto { threshold })
.expect("Failed to open small file");
assert_eq!(reader_small.as_slice(), &data_small[..]);
let (file_exact, data_exact) = create_temp_file(threshold.try_into().unwrap_or(usize::MAX));
let reader_exact =
FileReader::open_with_policy(file_exact.path(), ReaderPolicy::Auto { threshold })
.expect("Failed to open exact file");
assert_eq!(reader_exact.as_slice(), &data_exact[..]);
let (file_large, data_large) =
create_temp_file(threshold.try_into().unwrap_or(usize::MAX).saturating_add(1));
let reader_large =
FileReader::open_with_policy(file_large.path(), ReaderPolicy::Auto { threshold })
.expect("Failed to open large file");
assert_eq!(reader_large.as_slice(), &data_large[..]);
}
}