use std::io::Read;
use std::path::Path;
use std::sync::Arc;
use crate::index::IndexSnapshot;
pub(super) fn resolve_doc(
snap: &IndexSnapshot,
global_id: u32,
canonical_root: &Path,
max_file_size: u64,
verbose: bool,
) -> Option<(std::path::PathBuf, Arc<[u8]>)> {
if let Some(doc) = snap.overlay.get_doc(global_id) {
return Some((doc.path.clone(), Arc::clone(&doc.content)));
}
if snap.delete_set.contains(global_id) {
return None;
}
if snap.segment_base_ids().is_empty() {
return None;
}
let seg_idx = snap
.segment_base_ids()
.partition_point(|&b| b <= global_id)
.saturating_sub(1);
if seg_idx >= snap.base_segments().len() {
return None;
}
let base = snap.segment_base_ids()[seg_idx];
let local_id = global_id.checked_sub(base)?;
let doc_entry = snap.base_segments()[seg_idx].get_doc(local_id)?;
let abs_path = canonical_root.join(&doc_entry.path);
let canonical = std::fs::canonicalize(&abs_path).ok()?;
if !canonical.starts_with(canonical_root) {
return None;
}
#[cfg(any(unix, windows))]
let pre_meta = std::fs::metadata(&canonical).ok()?;
let file = crate::index::open_readonly_nofollow(&canonical).ok()?;
#[cfg(any(unix, windows))]
if !crate::index::verify_fd_matches_stat(&file, &pre_meta) {
return None;
}
let mut reader = file.take(max_file_size.saturating_add(1));
let mut raw = Vec::new();
reader.read_to_end(&mut raw).ok()?;
if raw.len() as u64 > max_file_size {
return None;
}
let content = crate::index::normalize_encoding(&raw, verbose);
Some((doc_entry.path, Arc::from(content.as_ref())))
}
#[cfg(test)]
mod tests {
use std::io::{Read, Write};
#[test]
fn oversized_file_returns_none() {
let max: u64 = 16;
let mut tmp = tempfile::NamedTempFile::new().unwrap();
tmp.write_all(b"12345678901234567").unwrap(); tmp.flush().unwrap();
let file = std::fs::File::open(tmp.path()).unwrap();
let mut reader = file.take(max.saturating_add(1));
let mut content = Vec::new();
reader.read_to_end(&mut content).unwrap();
assert!(content.len() as u64 > max, "must detect oversized file");
}
#[test]
fn at_limit_file_is_not_skipped() {
let max: u64 = 16;
let mut tmp = tempfile::NamedTempFile::new().unwrap();
tmp.write_all(b"1234567890123456").unwrap(); tmp.flush().unwrap();
let file = std::fs::File::open(tmp.path()).unwrap();
let mut reader = file.take(max.saturating_add(1));
let mut content = Vec::new();
reader.read_to_end(&mut content).unwrap();
assert!(
content.len() as u64 <= max,
"at-limit file must not be skipped"
);
}
}