1use std::path::{Component, Path, PathBuf};
2
3use sha2::{Digest, Sha256};
4
5use crate::errors::CoreError;
6use crate::models::{FileReadRecord, FileSearchResult, FilesReadInput, FilesSearchInput};
7
8#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
9#[serde(rename_all = "camelCase")]
10pub struct FileSearchResponse {
11 pub results: Vec<FileSearchResult>,
12 pub warnings: Vec<String>,
13}
14
15const MAX_DEPTH: usize = 4;
16
17const SKIP_DIR_NAMES: &[&str] = &[
18 "node_modules",
19 ".git",
20 "target",
21 ".cursor",
22 "dist",
23 "build",
24 ".next",
25 ".svn",
26 "__pycache__",
27 ".idea",
28 ".vscode",
29];
30
31fn should_skip_dir(name: &str) -> bool {
32 SKIP_DIR_NAMES.contains(&name)
33}
34
35fn name_matches_query(file_name: &str, query: &str) -> bool {
36 let q = query.trim();
37 if q.is_empty() {
38 return true;
39 }
40 let ql = q.to_lowercase();
41 let nl = file_name.to_lowercase();
42 nl.contains(&ql) || fuzzy_subsequence(&nl, &ql)
43}
44
45fn fuzzy_subsequence(name: &str, query: &str) -> bool {
46 let mut it = name.chars();
47 for qc in query.chars() {
48 let mut found = false;
49 for c in it.by_ref() {
50 if c == qc {
51 found = true;
52 break;
53 }
54 }
55 if !found {
56 return false;
57 }
58 }
59 true
60}
61
62async fn resolve_registered_project_root(
63 db: &sqlx::SqlitePool,
64 project_path: &str,
65) -> crate::Result<PathBuf> {
66 let raw_root = PathBuf::from(project_path);
67 if !raw_root.exists() {
68 return Err(CoreError::Validation("project path does not exist".into()));
69 }
70
71 let root = raw_root
72 .canonicalize()
73 .map_err(|e| CoreError::Validation(format!("invalid project path: {e}")))?;
74
75 if !root.is_dir() {
76 return Err(CoreError::Validation(
77 "project path must be a directory".into(),
78 ));
79 }
80
81 let normalized_root = root.to_string_lossy().replace('\\', "/");
82 let exists: i64 = sqlx::query_scalar!(
83 r#"SELECT COUNT(1) as "n!: i64" FROM projects WHERE path = ?1"#,
84 normalized_root
85 )
86 .fetch_one(db)
87 .await
88 .map_err(|e| CoreError::Internal(format!("failed to validate project path: {e}")))?;
89
90 if exists == 0 {
91 return Err(CoreError::Validation(
92 "project path must belong to a registered project".into(),
93 ));
94 }
95
96 Ok(root)
97}
98
99fn walk(
100 root: &Path,
101 rel: &Path,
102 depth: usize,
103 query: &str,
104 out: &mut Vec<FileSearchResult>,
105 warnings: &mut Vec<String>,
106 limit: usize,
107) -> crate::Result<()> {
108 if out.len() >= limit || depth > MAX_DEPTH {
109 return Ok(());
110 }
111
112 let dir = root.join(rel);
113 let read = match std::fs::read_dir(&dir) {
114 Ok(r) => r,
115 Err(e) => {
116 warnings.push(format!(
117 "Could not read directory {}: {}",
118 dir.to_string_lossy(),
119 e
120 ));
121 return Ok(());
122 }
123 };
124
125 for entry in read {
126 let entry = entry?;
127 let name = entry.file_name().to_string_lossy().to_string();
128 if should_skip_dir(&name) {
129 continue;
130 }
131
132 let rel_path: PathBuf = if rel.as_os_str().is_empty() {
133 PathBuf::from(&name)
134 } else {
135 rel.join(&name)
136 };
137 let rel_display = rel_path.to_string_lossy().replace('\\', "/");
138
139 let is_dir = entry.file_type()?.is_dir();
140
141 if name_matches_query(&name, query) {
142 out.push(FileSearchResult {
143 path: rel_display.clone(),
144 relative_path: rel_display.clone(),
145 is_directory: is_dir,
146 });
147 if out.len() >= limit {
148 return Ok(());
149 }
150 }
151
152 if is_dir && depth < MAX_DEPTH {
153 walk(root, &rel_path, depth + 1, query, out, warnings, limit)?;
154 if out.len() >= limit {
155 return Ok(());
156 }
157 }
158 }
159 Ok(())
160}
161
162pub async fn search(
163 db: &sqlx::SqlitePool,
164 input: FilesSearchInput,
165) -> crate::Result<FileSearchResponse> {
166 let root = resolve_registered_project_root(db, &input.project_path).await?;
167 let limit = usize::try_from(input.limit.unwrap_or(100).max(0)).unwrap_or(0);
168 if limit == 0 {
169 return Ok(FileSearchResponse {
170 results: vec![],
171 warnings: vec![],
172 });
173 }
174 let mut out = Vec::new();
175 let mut warnings = Vec::new();
176 walk(
177 &root,
178 Path::new(""),
179 0,
180 &input.query,
181 &mut out,
182 &mut warnings,
183 limit,
184 )?;
185 Ok(FileSearchResponse {
186 results: out,
187 warnings,
188 })
189}
190
191pub async fn read(db: &sqlx::SqlitePool, input: FilesReadInput) -> crate::Result<FileReadRecord> {
192 let root = resolve_registered_project_root(db, &input.project_path).await?;
193
194 let rel = PathBuf::from(&input.relative_path);
195 if rel.is_absolute() {
196 return Err(CoreError::Validation(
197 "relativePath must be relative".into(),
198 ));
199 }
200 if rel
201 .components()
202 .any(|c| matches!(c, Component::ParentDir | Component::Prefix(_)))
203 {
204 return Err(CoreError::Validation(
205 "relativePath contains invalid components".into(),
206 ));
207 }
208
209 let abs = root.join(&rel);
210 let abs = abs
211 .canonicalize()
212 .map_err(|e| CoreError::Validation(format!("file not found: {e}")))?;
213 if !abs.starts_with(&root) {
214 return Err(CoreError::Validation("path escapes project root".into()));
215 }
216 if !abs.is_file() {
217 return Err(CoreError::Validation("path is not a file".into()));
218 }
219
220 let max_bytes = usize::try_from(
221 input
222 .max_bytes
223 .unwrap_or(256 * 1024)
224 .clamp(1, 2 * 1024 * 1024),
225 )
226 .unwrap_or(256 * 1024);
227 let bytes = std::fs::read(&abs)?;
228 let truncated = bytes.len() > max_bytes;
229 let bytes = if truncated {
230 &bytes[..max_bytes]
231 } else {
232 &bytes[..]
233 };
234
235 let content = String::from_utf8_lossy(bytes).to_string();
237
238 let mut hasher = Sha256::new();
239 hasher.update(bytes);
240 let digest = hasher.finalize();
241 let sha256 = {
242 use std::fmt::Write as _;
243 digest
244 .iter()
245 .fold(String::with_capacity(digest.len() * 2), |mut acc, b| {
246 let _ = write!(&mut acc, "{b:02x}");
247 acc
248 })
249 };
250
251 let all_lines: Vec<&str> = content.lines().collect();
252 let total_lines = i32::try_from(all_lines.len()).unwrap_or(i32::MAX);
253
254 let start = input.start_line.unwrap_or(1).max(1);
255 let end = input.end_line.unwrap_or(total_lines.max(1)).max(start);
256
257 let start_idx = (start - 1) as usize;
258 let end_idx_exclusive = end.min(total_lines) as usize;
259
260 let sliced = if start_idx >= all_lines.len() {
261 String::new()
262 } else {
263 all_lines[start_idx..end_idx_exclusive].join("\n")
264 };
265
266 let language = abs
267 .extension()
268 .and_then(|e| e.to_str())
269 .map(ToOwned::to_owned);
270
271 Ok(FileReadRecord {
272 absolute_path: abs.to_string_lossy().to_string(),
273 relative_path: input.relative_path.replace('\\', "/"),
274 content: sliced,
275 language,
276 line_count: total_lines,
277 truncated,
278 sha256: Some(sha256),
279 })
280}
281
282#[cfg(test)]
283mod tests {
284 use super::*;
285
286 #[test]
287 fn should_skip_well_known_build_dirs() {
288 assert!(should_skip_dir("node_modules"));
289 assert!(should_skip_dir("target"));
290 assert!(should_skip_dir(".git"));
291 assert!(!should_skip_dir("src"));
292 assert!(!should_skip_dir("crates"));
293 }
294
295 #[test]
296 fn name_matches_empty_query_is_always_true() {
297 assert!(name_matches_query("anything.rs", ""));
298 assert!(name_matches_query("", ""));
299 }
300
301 #[test]
302 fn name_matches_exact_substring() {
303 assert!(name_matches_query("UserService.ts", "user"));
304 assert!(name_matches_query("UserService.ts", "Service"));
305 assert!(!name_matches_query("UserService.ts", "admin"));
306 }
307
308 #[test]
309 fn fuzzy_subsequence_matches_scattered_chars() {
310 assert!(fuzzy_subsequence("usrservice", "usrvc"));
311 assert!(fuzzy_subsequence("abcde", "ace"));
312 assert!(!fuzzy_subsequence("abcde", "aec"));
313 assert!(!fuzzy_subsequence("abc", "abcd"));
314 }
315
316 #[test]
317 fn name_matches_falls_back_to_fuzzy_when_substring_fails() {
318 assert!(name_matches_query("UserServiceImpl.ts", "usi"));
321 }
322}