Skip to main content

search_semantically/
scanner.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::Path;
4
5#[derive(Debug, Clone, PartialEq, Eq)]
6pub enum FileType {
7    Rust,
8    #[cfg(feature = "ts-typescript")]
9    TypeScript,
10    #[cfg(feature = "ts-typescript")]
11    Tsx,
12    #[cfg(feature = "ts-python")]
13    Python,
14    #[cfg(feature = "ts-go")]
15    Go,
16    #[cfg(feature = "ts-java")]
17    Java,
18    #[cfg(feature = "ts-c")]
19    C,
20    #[cfg(feature = "ts-cpp")]
21    Cpp,
22    Markdown,
23    Yaml,
24    Json,
25    Toml,
26    Plaintext,
27}
28
29impl std::fmt::Display for FileType {
30    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31        match self {
32            FileType::Rust => write!(f, "rust"),
33            #[cfg(feature = "ts-typescript")]
34            FileType::TypeScript => write!(f, "typescript"),
35            #[cfg(feature = "ts-typescript")]
36            FileType::Tsx => write!(f, "tsx"),
37            #[cfg(feature = "ts-python")]
38            FileType::Python => write!(f, "python"),
39            #[cfg(feature = "ts-go")]
40            FileType::Go => write!(f, "go"),
41            #[cfg(feature = "ts-java")]
42            FileType::Java => write!(f, "java"),
43            #[cfg(feature = "ts-c")]
44            FileType::C => write!(f, "c"),
45            #[cfg(feature = "ts-cpp")]
46            FileType::Cpp => write!(f, "cpp"),
47            FileType::Markdown => write!(f, "markdown"),
48            FileType::Yaml => write!(f, "yaml"),
49            FileType::Json => write!(f, "json"),
50            FileType::Toml => write!(f, "toml"),
51            FileType::Plaintext => write!(f, "plaintext"),
52        }
53    }
54}
55
56impl std::str::FromStr for FileType {
57    type Err = String;
58
59    fn from_str(s: &str) -> Result<Self, Self::Err> {
60        match s {
61            "rust" => Ok(FileType::Rust),
62            #[cfg(feature = "ts-typescript")]
63            "typescript" => Ok(FileType::TypeScript),
64            #[cfg(feature = "ts-typescript")]
65            "tsx" => Ok(FileType::Tsx),
66            #[cfg(feature = "ts-python")]
67            "python" => Ok(FileType::Python),
68            #[cfg(feature = "ts-go")]
69            "go" => Ok(FileType::Go),
70            #[cfg(feature = "ts-java")]
71            "java" => Ok(FileType::Java),
72            #[cfg(feature = "ts-c")]
73            "c" => Ok(FileType::C),
74            #[cfg(feature = "ts-cpp")]
75            "cpp" => Ok(FileType::Cpp),
76            "markdown" => Ok(FileType::Markdown),
77            "yaml" => Ok(FileType::Yaml),
78            "json" => Ok(FileType::Json),
79            "toml" => Ok(FileType::Toml),
80            "plaintext" => Ok(FileType::Plaintext),
81            other => Err(format!("Unknown file type: {other}")),
82        }
83    }
84}
85
86#[derive(Debug, Clone)]
87pub struct ScannedFile {
88    pub file_path: String,
89    pub file_type: FileType,
90    pub mtime: f64,
91}
92
93const MAX_FILE_SIZE: u64 = 1024 * 1024;
94
95fn extension_map() -> HashMap<&'static str, FileType> {
96    let mut m = HashMap::new();
97    m.insert("rs", FileType::Rust);
98    #[cfg(feature = "ts-typescript")]
99    {
100        m.insert("ts", FileType::TypeScript);
101        m.insert("tsx", FileType::Tsx);
102    }
103    #[cfg(feature = "ts-python")]
104    {
105        m.insert("py", FileType::Python);
106    }
107    #[cfg(feature = "ts-go")]
108    {
109        m.insert("go", FileType::Go);
110    }
111    #[cfg(feature = "ts-java")]
112    {
113        m.insert("java", FileType::Java);
114    }
115    #[cfg(feature = "ts-c")]
116    {
117        m.insert("c", FileType::C);
118        m.insert("h", FileType::C);
119    }
120    #[cfg(feature = "ts-cpp")]
121    {
122        m.insert("cpp", FileType::Cpp);
123        m.insert("hpp", FileType::Cpp);
124        m.insert("cc", FileType::Cpp);
125        m.insert("cxx", FileType::Cpp);
126        m.insert("hh", FileType::Cpp);
127        m.insert("hxx", FileType::Cpp);
128    }
129    m.insert("md", FileType::Markdown);
130    m.insert("mdx", FileType::Markdown);
131    m.insert("yml", FileType::Yaml);
132    m.insert("yaml", FileType::Yaml);
133    m.insert("json", FileType::Json);
134    m.insert("toml", FileType::Toml);
135    m.insert("txt", FileType::Plaintext);
136    m.insert("cfg", FileType::Plaintext);
137    m.insert("ini", FileType::Plaintext);
138    m.insert("env", FileType::Plaintext);
139    m.insert("conf", FileType::Plaintext);
140    m
141}
142
143pub fn detect_file_type(file_path: &str) -> Option<FileType> {
144    let path = Path::new(file_path);
145    let ext = path.extension()?.to_str()?.to_lowercase();
146    extension_map().get(&*ext).cloned()
147}
148
149pub fn scan_project(project_root: &Path) -> Vec<ScannedFile> {
150    let mut results = Vec::new();
151    let walker = ignore::WalkBuilder::new(project_root)
152        .hidden(false)
153        .git_ignore(true)
154        .git_global(true)
155        .git_exclude(true)
156        .build();
157
158    for entry in walker.flatten() {
159        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
160            continue;
161        }
162
163        let path = entry.path();
164
165        let file_size = match fs::metadata(path) {
166            Ok(meta) => meta.len(),
167            Err(_) => continue,
168        };
169        if file_size == 0 || file_size > MAX_FILE_SIZE {
170            continue;
171        }
172
173        let relative = match path.strip_prefix(project_root) {
174            Ok(r) => r.to_string_lossy().to_string(),
175            Err(_) => continue,
176        };
177
178        let file_type = match detect_file_type(&relative) {
179            Some(ft) => ft,
180            None => continue,
181        };
182
183        let mtime = fs::metadata(path)
184            .ok()
185            .and_then(|m| m.modified().ok())
186            .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
187            .map(|d| d.as_secs_f64())
188            .unwrap_or(0.0);
189
190        results.push(ScannedFile {
191            file_path: relative,
192            file_type,
193            mtime,
194        });
195    }
196
197    results
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203    use std::fs;
204    use tempfile::TempDir;
205
206    #[test]
207    fn detect_rust_file() {
208        assert_eq!(detect_file_type("src/main.rs"), Some(FileType::Rust));
209    }
210
211    #[test]
212    fn detect_markdown_file() {
213        assert_eq!(detect_file_type("README.md"), Some(FileType::Markdown));
214    }
215
216    #[test]
217    fn detect_yaml_file() {
218        assert_eq!(detect_file_type("config.yaml"), Some(FileType::Yaml));
219        assert_eq!(detect_file_type("config.yml"), Some(FileType::Yaml));
220    }
221
222    #[test]
223    fn detect_json_file() {
224        assert_eq!(detect_file_type("package.json"), Some(FileType::Json));
225    }
226
227    #[test]
228    fn detect_toml_file() {
229        assert_eq!(detect_file_type("Cargo.toml"), Some(FileType::Toml));
230    }
231
232    #[test]
233    fn detect_unknown_extension_returns_none() {
234        assert_eq!(detect_file_type("image.png"), None);
235    }
236
237    #[test]
238    fn detect_no_extension_returns_none() {
239        assert_eq!(detect_file_type("Makefile"), None);
240    }
241
242    #[test]
243    fn scan_empty_directory() {
244        let temp = TempDir::new().expect("temp dir");
245        let results = scan_project(temp.path());
246        assert!(results.is_empty());
247    }
248
249    #[test]
250    fn scan_finds_files_with_known_extensions() {
251        let temp = TempDir::new().expect("temp dir");
252        fs::write(temp.path().join("main.rs"), "fn main() {}").expect("write");
253        fs::write(temp.path().join("README.md"), "# Hello").expect("write");
254
255        let results = scan_project(temp.path());
256        assert_eq!(results.len(), 2);
257
258        let paths: Vec<&str> = results.iter().map(|f| f.file_path.as_str()).collect();
259        assert!(paths.contains(&"main.rs"));
260        assert!(paths.contains(&"README.md"));
261    }
262
263    #[test]
264    fn scan_skips_empty_files() {
265        let temp = TempDir::new().expect("temp dir");
266        fs::write(temp.path().join("empty.rs"), "").expect("write");
267        fs::write(temp.path().join("main.rs"), "fn main() {}").expect("write");
268
269        let results = scan_project(temp.path());
270        assert_eq!(results.len(), 1);
271    }
272
273    #[test]
274    fn scan_skips_unknown_extensions() {
275        let temp = TempDir::new().expect("temp dir");
276        fs::write(temp.path().join("image.png"), "data").expect("write");
277
278        let results = scan_project(temp.path());
279        assert!(results.is_empty());
280    }
281
282    #[test]
283    fn file_type_display_roundtrips() {
284        let ft = FileType::Rust;
285        let s = ft.to_string();
286        let parsed: FileType = s.parse().expect("should parse");
287        assert_eq!(ft, parsed);
288    }
289}