use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use crate::error::{DciError, Result};
#[derive(Debug, Clone)]
pub struct Limits {
pub max_results: usize,
pub max_files_walked: usize,
pub max_file_bytes: u64,
pub max_line_len: usize,
pub max_read_lines: usize,
pub timeout: Duration,
pub respect_gitignore: bool,
pub include_hidden: bool,
}
impl Default for Limits {
fn default() -> Self {
Self {
max_results: 200,
max_files_walked: 50_000,
max_file_bytes: 8 * 1024 * 1024,
max_line_len: 512,
max_read_lines: 400,
timeout: Duration::from_secs(15),
respect_gitignore: true,
include_hidden: true,
}
}
}
#[derive(Debug, Clone)]
pub struct CorpusRoot {
inner: Arc<CorpusRootInner>,
}
#[derive(Debug)]
struct CorpusRootInner {
root: PathBuf,
limits: Limits,
}
impl CorpusRoot {
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
Self::with_limits(path, Limits::default())
}
pub fn with_limits(path: impl AsRef<Path>, limits: Limits) -> Result<Self> {
let requested = path.as_ref();
let root = requested
.canonicalize()
.map_err(|e| DciError::InvalidRoot {
path: requested.to_path_buf(),
reason: e.to_string(),
})?;
if !root.is_dir() {
return Err(DciError::InvalidRoot {
path: root,
reason: "not a directory".to_string(),
});
}
Ok(Self {
inner: Arc::new(CorpusRootInner { root, limits }),
})
}
pub fn root(&self) -> &Path {
&self.inner.root
}
pub fn limits(&self) -> &Limits {
&self.inner.limits
}
pub fn resolve(&self, requested: &str) -> Result<PathBuf> {
let candidate = self.join_unchecked(requested);
let canonical = candidate.canonicalize().map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
DciError::NotFound {
requested: requested.to_string(),
}
} else {
DciError::Io {
path: candidate.clone(),
source: e,
}
}
})?;
if !canonical.starts_with(&self.inner.root) {
return Err(DciError::PathEscape {
requested: requested.to_string(),
});
}
Ok(canonical)
}
pub fn relativize<'a>(&self, path: &'a Path) -> std::borrow::Cow<'a, str> {
match path.strip_prefix(&self.inner.root) {
Ok(rel) if rel.as_os_str().is_empty() => std::borrow::Cow::Borrowed("."),
Ok(rel) => rel.to_string_lossy(),
Err(_) => path.to_string_lossy(),
}
}
fn join_unchecked(&self, requested: &str) -> PathBuf {
let p = Path::new(requested);
if p.is_absolute() {
let stripped = p.strip_prefix("/").unwrap_or(p);
self.inner.root.join(stripped)
} else {
self.inner.root.join(p)
}
}
}
#[cfg(test)]
mod tests {
#![allow(
clippy::unwrap_used,
clippy::expect_used,
clippy::indexing_slicing,
clippy::panic
)]
use super::*;
use std::fs;
fn temp_corpus() -> (tempfile::TempDir, CorpusRoot) {
let dir = tempfile::tempdir().expect("tempdir");
fs::create_dir(dir.path().join("sub")).expect("subdir");
fs::write(dir.path().join("sub/a.txt"), "hello").expect("write");
let root = CorpusRoot::new(dir.path()).expect("root");
(dir, root)
}
#[test]
fn resolves_paths_inside_root() {
let (_dir, root) = temp_corpus();
let resolved = root.resolve("sub/a.txt").expect("resolve");
assert!(resolved.ends_with("sub/a.txt"));
}
#[test]
fn rejects_parent_traversal() {
let (_dir, root) = temp_corpus();
let err = root.resolve("../../../etc/passwd").unwrap_err();
assert!(matches!(
err,
DciError::PathEscape { .. } | DciError::NotFound { .. }
));
}
#[test]
fn rejects_symlink_escape() {
let (dir, root) = temp_corpus();
let outside = dir.path().parent().expect("parent");
let link = dir.path().join("escape");
#[cfg(unix)]
{
std::os::unix::fs::symlink(outside, &link).expect("symlink");
let err = root.resolve("escape").unwrap_err();
assert!(matches!(err, DciError::PathEscape { .. }));
}
}
#[test]
fn absolute_input_is_rerooted() {
let (_dir, root) = temp_corpus();
let resolved = root.resolve("/sub/a.txt").expect("resolve");
assert!(resolved.ends_with("sub/a.txt"));
}
}