Skip to main content

difflore_core/domain/
files.rs

1use std::path::{Component, Path, PathBuf};
2
3use sha2::{Digest, Sha256};
4
5use crate::errors::CoreError;
6use crate::models::{FileReadRecord, FileSearchResult, FilesReadInput, FilesSearchInput};
7
8#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
9#[serde(rename_all = "camelCase")]
10pub struct FileSearchResponse {
11    pub results: Vec<FileSearchResult>,
12    pub warnings: Vec<String>,
13}
14
15const MAX_DEPTH: usize = 4;
16
17const SKIP_DIR_NAMES: &[&str] = &[
18    "node_modules",
19    ".git",
20    "target",
21    ".cursor",
22    "dist",
23    "build",
24    ".next",
25    ".svn",
26    "__pycache__",
27    ".idea",
28    ".vscode",
29];
30
31fn should_skip_dir(name: &str) -> bool {
32    SKIP_DIR_NAMES.contains(&name)
33}
34
35fn name_matches_query(file_name: &str, query: &str) -> bool {
36    let q = query.trim();
37    if q.is_empty() {
38        return true;
39    }
40    let ql = q.to_lowercase();
41    let nl = file_name.to_lowercase();
42    nl.contains(&ql) || fuzzy_subsequence(&nl, &ql)
43}
44
45fn fuzzy_subsequence(name: &str, query: &str) -> bool {
46    let mut it = name.chars();
47    for qc in query.chars() {
48        let mut found = false;
49        for c in it.by_ref() {
50            if c == qc {
51                found = true;
52                break;
53            }
54        }
55        if !found {
56            return false;
57        }
58    }
59    true
60}
61
62async fn resolve_registered_project_root(
63    db: &sqlx::SqlitePool,
64    project_path: &str,
65) -> crate::Result<PathBuf> {
66    let raw_root = PathBuf::from(project_path);
67    if !raw_root.exists() {
68        return Err(CoreError::Validation("project path does not exist".into()));
69    }
70
71    let root = raw_root
72        .canonicalize()
73        .map_err(|e| CoreError::Validation(format!("invalid project path: {e}")))?;
74
75    if !root.is_dir() {
76        return Err(CoreError::Validation(
77            "project path must be a directory".into(),
78        ));
79    }
80
81    let normalized_root = root.to_string_lossy().replace('\\', "/");
82    let exists: i64 = sqlx::query_scalar!(
83        r#"SELECT COUNT(1) as "n!: i64" FROM projects WHERE path = ?1"#,
84        normalized_root
85    )
86    .fetch_one(db)
87    .await
88    .map_err(|e| CoreError::Internal(format!("failed to validate project path: {e}")))?;
89
90    if exists == 0 {
91        return Err(CoreError::Validation(
92            "project path must belong to a registered project".into(),
93        ));
94    }
95
96    Ok(root)
97}
98
99fn walk(
100    root: &Path,
101    rel: &Path,
102    depth: usize,
103    query: &str,
104    out: &mut Vec<FileSearchResult>,
105    warnings: &mut Vec<String>,
106    limit: usize,
107) -> crate::Result<()> {
108    if out.len() >= limit || depth > MAX_DEPTH {
109        return Ok(());
110    }
111
112    let dir = root.join(rel);
113    let read = match std::fs::read_dir(&dir) {
114        Ok(r) => r,
115        Err(e) => {
116            warnings.push(format!(
117                "Could not read directory {}: {}",
118                dir.to_string_lossy(),
119                e
120            ));
121            return Ok(());
122        }
123    };
124
125    for entry in read {
126        let entry = entry?;
127        let name = entry.file_name().to_string_lossy().to_string();
128        if should_skip_dir(&name) {
129            continue;
130        }
131
132        let rel_path: PathBuf = if rel.as_os_str().is_empty() {
133            PathBuf::from(&name)
134        } else {
135            rel.join(&name)
136        };
137        let rel_display = rel_path.to_string_lossy().replace('\\', "/");
138
139        let is_dir = entry.file_type()?.is_dir();
140
141        if name_matches_query(&name, query) {
142            out.push(FileSearchResult {
143                path: rel_display.clone(),
144                relative_path: rel_display.clone(),
145                is_directory: is_dir,
146            });
147            if out.len() >= limit {
148                return Ok(());
149            }
150        }
151
152        if is_dir && depth < MAX_DEPTH {
153            walk(root, &rel_path, depth + 1, query, out, warnings, limit)?;
154            if out.len() >= limit {
155                return Ok(());
156            }
157        }
158    }
159    Ok(())
160}
161
162pub async fn search(
163    db: &sqlx::SqlitePool,
164    input: FilesSearchInput,
165) -> crate::Result<FileSearchResponse> {
166    let root = resolve_registered_project_root(db, &input.project_path).await?;
167    let limit = usize::try_from(input.limit.unwrap_or(100).max(0)).unwrap_or(0);
168    if limit == 0 {
169        return Ok(FileSearchResponse {
170            results: vec![],
171            warnings: vec![],
172        });
173    }
174    let mut out = Vec::new();
175    let mut warnings = Vec::new();
176    walk(
177        &root,
178        Path::new(""),
179        0,
180        &input.query,
181        &mut out,
182        &mut warnings,
183        limit,
184    )?;
185    Ok(FileSearchResponse {
186        results: out,
187        warnings,
188    })
189}
190
191pub async fn read(db: &sqlx::SqlitePool, input: FilesReadInput) -> crate::Result<FileReadRecord> {
192    let root = resolve_registered_project_root(db, &input.project_path).await?;
193
194    let rel = PathBuf::from(&input.relative_path);
195    if rel.is_absolute() {
196        return Err(CoreError::Validation(
197            "relativePath must be relative".into(),
198        ));
199    }
200    if rel
201        .components()
202        .any(|c| matches!(c, Component::ParentDir | Component::Prefix(_)))
203    {
204        return Err(CoreError::Validation(
205            "relativePath contains invalid components".into(),
206        ));
207    }
208
209    let abs = root.join(&rel);
210    let abs = abs
211        .canonicalize()
212        .map_err(|e| CoreError::Validation(format!("file not found: {e}")))?;
213    if !abs.starts_with(&root) {
214        return Err(CoreError::Validation("path escapes project root".into()));
215    }
216    if !abs.is_file() {
217        return Err(CoreError::Validation("path is not a file".into()));
218    }
219
220    let max_bytes = usize::try_from(
221        input
222            .max_bytes
223            .unwrap_or(256 * 1024)
224            .clamp(1, 2 * 1024 * 1024),
225    )
226    .unwrap_or(256 * 1024);
227    let bytes = std::fs::read(&abs)?;
228    let truncated = bytes.len() > max_bytes;
229    let bytes = if truncated {
230        &bytes[..max_bytes]
231    } else {
232        &bytes[..]
233    };
234
235    // Best-effort UTF-8. If invalid, we still return lossy text so UI can display.
236    let content = String::from_utf8_lossy(bytes).to_string();
237
238    let mut hasher = Sha256::new();
239    hasher.update(bytes);
240    let digest = hasher.finalize();
241    let sha256 = {
242        use std::fmt::Write as _;
243        digest
244            .iter()
245            .fold(String::with_capacity(digest.len() * 2), |mut acc, b| {
246                let _ = write!(&mut acc, "{b:02x}");
247                acc
248            })
249    };
250
251    let all_lines: Vec<&str> = content.lines().collect();
252    let total_lines = i32::try_from(all_lines.len()).unwrap_or(i32::MAX);
253
254    let start = input.start_line.unwrap_or(1).max(1);
255    let end = input.end_line.unwrap_or(total_lines.max(1)).max(start);
256
257    let start_idx = (start - 1) as usize;
258    let end_idx_exclusive = end.min(total_lines) as usize;
259
260    let sliced = if start_idx >= all_lines.len() {
261        String::new()
262    } else {
263        all_lines[start_idx..end_idx_exclusive].join("\n")
264    };
265
266    let language = abs
267        .extension()
268        .and_then(|e| e.to_str())
269        .map(ToOwned::to_owned);
270
271    Ok(FileReadRecord {
272        absolute_path: abs.to_string_lossy().to_string(),
273        relative_path: input.relative_path.replace('\\', "/"),
274        content: sliced,
275        language,
276        line_count: total_lines,
277        truncated,
278        sha256: Some(sha256),
279    })
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285
286    #[test]
287    fn should_skip_well_known_build_dirs() {
288        assert!(should_skip_dir("node_modules"));
289        assert!(should_skip_dir("target"));
290        assert!(should_skip_dir(".git"));
291        assert!(!should_skip_dir("src"));
292        assert!(!should_skip_dir("crates"));
293    }
294
295    #[test]
296    fn name_matches_empty_query_is_always_true() {
297        assert!(name_matches_query("anything.rs", ""));
298        assert!(name_matches_query("", ""));
299    }
300
301    #[test]
302    fn name_matches_exact_substring() {
303        assert!(name_matches_query("UserService.ts", "user"));
304        assert!(name_matches_query("UserService.ts", "Service"));
305        assert!(!name_matches_query("UserService.ts", "admin"));
306    }
307
308    #[test]
309    fn fuzzy_subsequence_matches_scattered_chars() {
310        assert!(fuzzy_subsequence("usrservice", "usrvc"));
311        assert!(fuzzy_subsequence("abcde", "ace"));
312        assert!(!fuzzy_subsequence("abcde", "aec"));
313        assert!(!fuzzy_subsequence("abc", "abcd"));
314    }
315
316    #[test]
317    fn name_matches_falls_back_to_fuzzy_when_substring_fails() {
318        // "usvc" is not a substring of "UserService.ts" (case-normalized "userservice.ts")
319        // but is a subsequence: u→s→v...wait, "v" is not in "userservice.ts". Use a real case.
320        assert!(name_matches_query("UserServiceImpl.ts", "usi"));
321    }
322}