1use std::path::{Path, PathBuf};
15use std::sync::Arc;
16use std::time::Duration;
17
18use crate::error::{DciError, Result};
19
20#[derive(Debug, Clone)]
25pub struct Limits {
26 pub max_results: usize,
28 pub max_files_walked: usize,
30 pub max_file_bytes: u64,
33 pub max_line_len: usize,
35 pub max_read_lines: usize,
37 pub timeout: Duration,
39 pub respect_gitignore: bool,
44 pub include_hidden: bool,
46}
47
48impl Default for Limits {
49 fn default() -> Self {
50 Self {
51 max_results: 200,
52 max_files_walked: 50_000,
53 max_file_bytes: 8 * 1024 * 1024,
54 max_line_len: 512,
55 max_read_lines: 400,
56 timeout: Duration::from_secs(15),
57 respect_gitignore: true,
58 include_hidden: true,
59 }
60 }
61}
62
63#[derive(Debug, Clone)]
67pub struct CorpusRoot {
68 inner: Arc<CorpusRootInner>,
69}
70
71#[derive(Debug)]
72struct CorpusRootInner {
73 root: PathBuf,
74 limits: Limits,
75}
76
77impl CorpusRoot {
78 pub fn new(path: impl AsRef<Path>) -> Result<Self> {
82 Self::with_limits(path, Limits::default())
83 }
84
85 pub fn with_limits(path: impl AsRef<Path>, limits: Limits) -> Result<Self> {
87 let requested = path.as_ref();
88 let root = requested
89 .canonicalize()
90 .map_err(|e| DciError::InvalidRoot {
91 path: requested.to_path_buf(),
92 reason: e.to_string(),
93 })?;
94 if !root.is_dir() {
95 return Err(DciError::InvalidRoot {
96 path: root,
97 reason: "not a directory".to_string(),
98 });
99 }
100 Ok(Self {
101 inner: Arc::new(CorpusRootInner { root, limits }),
102 })
103 }
104
105 pub fn root(&self) -> &Path {
107 &self.inner.root
108 }
109
110 pub fn limits(&self) -> &Limits {
112 &self.inner.limits
113 }
114
115 pub fn resolve(&self, requested: &str) -> Result<PathBuf> {
123 let candidate = self.join_unchecked(requested);
124
125 let canonical = candidate.canonicalize().map_err(|e| {
126 if e.kind() == std::io::ErrorKind::NotFound {
127 DciError::NotFound {
128 requested: requested.to_string(),
129 }
130 } else {
131 DciError::Io {
132 path: candidate.clone(),
133 source: e,
134 }
135 }
136 })?;
137
138 if !canonical.starts_with(&self.inner.root) {
139 return Err(DciError::PathEscape {
140 requested: requested.to_string(),
141 });
142 }
143 Ok(canonical)
144 }
145
146 pub fn relativize<'a>(&self, path: &'a Path) -> std::borrow::Cow<'a, str> {
149 match path.strip_prefix(&self.inner.root) {
150 Ok(rel) if rel.as_os_str().is_empty() => std::borrow::Cow::Borrowed("."),
151 Ok(rel) => rel.to_string_lossy(),
152 Err(_) => path.to_string_lossy(),
153 }
154 }
155
156 fn join_unchecked(&self, requested: &str) -> PathBuf {
159 let p = Path::new(requested);
160 if p.is_absolute() {
161 let stripped = p.strip_prefix("/").unwrap_or(p);
165 self.inner.root.join(stripped)
166 } else {
167 self.inner.root.join(p)
168 }
169 }
170}
171
172#[cfg(test)]
173mod tests {
174 #![allow(
175 clippy::unwrap_used,
176 clippy::expect_used,
177 clippy::indexing_slicing,
178 clippy::panic
179 )]
180 use super::*;
181 use std::fs;
182
183 fn temp_corpus() -> (tempfile::TempDir, CorpusRoot) {
184 let dir = tempfile::tempdir().expect("tempdir");
185 fs::create_dir(dir.path().join("sub")).expect("subdir");
186 fs::write(dir.path().join("sub/a.txt"), "hello").expect("write");
187 let root = CorpusRoot::new(dir.path()).expect("root");
188 (dir, root)
189 }
190
191 #[test]
192 fn resolves_paths_inside_root() {
193 let (_dir, root) = temp_corpus();
194 let resolved = root.resolve("sub/a.txt").expect("resolve");
195 assert!(resolved.ends_with("sub/a.txt"));
196 }
197
198 #[test]
199 fn rejects_parent_traversal() {
200 let (_dir, root) = temp_corpus();
201 let err = root.resolve("../../../etc/passwd").unwrap_err();
202 assert!(matches!(
205 err,
206 DciError::PathEscape { .. } | DciError::NotFound { .. }
207 ));
208 }
209
210 #[test]
211 fn rejects_symlink_escape() {
212 let (dir, root) = temp_corpus();
213 let outside = dir.path().parent().expect("parent");
214 let link = dir.path().join("escape");
215 #[cfg(unix)]
216 {
217 std::os::unix::fs::symlink(outside, &link).expect("symlink");
218 let err = root.resolve("escape").unwrap_err();
219 assert!(matches!(err, DciError::PathEscape { .. }));
220 }
221 }
222
223 #[test]
224 fn absolute_input_is_rerooted() {
225 let (_dir, root) = temp_corpus();
226 let resolved = root.resolve("/sub/a.txt").expect("resolve");
228 assert!(resolved.ends_with("sub/a.txt"));
229 }
230}