Skip to main content

rust_memex/tui/indexer/
files.rs

1//! File collection helpers for the data setup step.
2
3use std::path::{Path, PathBuf};
4
5use anyhow::{Result, anyhow};
6use walkdir::WalkDir;
7
8/// Supported file extensions for indexing.
9pub const SUPPORTED_EXTENSIONS: &[&str] = &[
10    "txt", "md", "markdown", "rst", "org", "json", "yaml", "yml", "toml", "xml", "rs", "py", "js",
11    "ts", "tsx", "jsx", "go", "java", "c", "cpp", "h", "hpp", "rb", "php", "swift", "kt", "scala",
12    "sh", "bash", "zsh", "fish", "sql", "graphql", "html", "css", "scss", "sass", "less", "pdf",
13];
14
15/// Collect files from a directory for indexing.
16pub fn collect_indexable_files(dir_path: &Path) -> Result<Vec<PathBuf>> {
17    let mut files = Vec::new();
18
19    if !dir_path.exists() {
20        return Err(anyhow!("Directory does not exist: {}", dir_path.display()));
21    }
22
23    if !dir_path.is_dir() {
24        return Err(anyhow!("Path is not a directory: {}", dir_path.display()));
25    }
26
27    for entry in WalkDir::new(dir_path)
28        .follow_links(true)
29        .into_iter()
30        .filter_map(|entry| entry.ok())
31    {
32        let path = entry.path();
33
34        if path
35            .file_name()
36            .and_then(|name| name.to_str())
37            .map(|name| name.starts_with('.'))
38            .unwrap_or(false)
39        {
40            continue;
41        }
42
43        if path.is_dir() {
44            continue;
45        }
46
47        if let Some(ext) = path.extension().and_then(|ext| ext.to_str())
48            && SUPPORTED_EXTENSIONS.contains(&ext.to_lowercase().as_str())
49        {
50            files.push(path.to_path_buf());
51        }
52    }
53
54    files.sort();
55    Ok(files)
56}
57
58/// Validate a user-provided path with the repo's path safety rules.
59pub fn validate_path(path: &str) -> Result<PathBuf> {
60    use crate::path_utils::sanitize_existing_path;
61
62    if path.trim().is_empty() {
63        return Err(anyhow!("Path cannot be empty"));
64    }
65
66    sanitize_existing_path(path)
67}
68
69#[cfg(test)]
70mod tests {
71    use super::*;
72
73    #[test]
74    fn validate_path_rejects_empty_values() {
75        assert!(validate_path("").is_err());
76        assert!(validate_path("   ").is_err());
77    }
78}