1use crate::scanner::FileNode;
5use std::collections::HashMap;
6use std::path::Path;
7
8#[derive(Debug, Clone, PartialEq)]
10pub enum DirectoryType {
11 CodeProject {
13 language: Language,
14 framework: Option<Framework>,
15 has_tests: bool,
16 has_docs: bool,
17 },
18 PhotoCollection {
20 image_count: usize,
21 date_range: Option<(String, String)>,
22 cameras: Vec<String>,
23 },
24 DocumentArchive {
26 categories: HashMap<String, usize>,
27 total_docs: usize,
28 },
29 MediaLibrary {
31 video_count: usize,
32 audio_count: usize,
33 total_duration: Option<String>,
34 quality: Vec<String>, },
36 DataScience {
38 notebooks: usize,
39 datasets: usize,
40 languages: Vec<String>,
41 },
42 MixedContent {
44 dominant_type: Option<String>,
45 file_types: HashMap<String, usize>,
46 total_files: usize,
47 },
48}
49
50#[derive(Debug, Clone, PartialEq)]
51pub enum Language {
52 Rust,
53 Python,
54 JavaScript,
55 TypeScript,
56 Go,
57 Java,
58 Cpp,
59 Ruby,
60 Other(String),
61}
62
63#[derive(Debug, Clone, PartialEq)]
64pub enum Framework {
65 Actix,
67 Rocket,
68 Tokio,
69 Django,
71 Flask,
72 FastAPI,
73 React,
75 Vue,
76 Angular,
77 NextJs,
78 Express,
79 Other(String),
81}
82
83pub struct ContentDetector;
85
86impl ContentDetector {
87 pub fn detect(nodes: &[FileNode], root_path: &Path) -> DirectoryType {
89 let mut ext_counts: HashMap<String, usize> = HashMap::new();
91 let mut total_files = 0;
92
93 for node in nodes {
94 if !node.is_dir {
95 total_files += 1;
96 if let Some(ext) = node.path.extension().and_then(|e| e.to_str()) {
97 *ext_counts.entry(ext.to_lowercase()).or_insert(0) += 1;
98 }
99 }
100 }
101
102 if Self::is_code_project(&ext_counts, nodes, root_path) {
104 return Self::analyze_code_project(nodes, root_path, &ext_counts);
105 }
106
107 if Self::is_photo_collection(&ext_counts) {
109 return Self::analyze_photo_collection(nodes, &ext_counts);
110 }
111
112 if Self::is_document_archive(&ext_counts) {
114 return Self::analyze_document_archive(nodes);
115 }
116
117 if Self::is_media_library(&ext_counts) {
119 return Self::analyze_media_library(nodes, &ext_counts);
120 }
121
122 if Self::is_data_science(&ext_counts) {
124 return Self::analyze_data_science(&ext_counts);
125 }
126
127 DirectoryType::MixedContent {
129 dominant_type: Self::get_dominant_type(&ext_counts),
130 file_types: ext_counts,
131 total_files,
132 }
133 }
134
135 fn is_code_project(
136 ext_counts: &HashMap<String, usize>,
137 nodes: &[FileNode],
138 _root_path: &Path,
139 ) -> bool {
140 let code_extensions = [
142 "rs", "py", "js", "ts", "go", "java", "cpp", "c", "rb", "php",
143 ];
144 let code_files: usize = code_extensions
145 .iter()
146 .filter_map(|ext| ext_counts.get(*ext))
147 .sum();
148
149 let has_project_files = nodes.iter().any(|n| {
151 let name = n.path.file_name().and_then(|n| n.to_str()).unwrap_or("");
152 matches!(
153 name,
154 "Cargo.toml"
155 | "package.json"
156 | "requirements.txt"
157 | "go.mod"
158 | "pom.xml"
159 | "Gemfile"
160 )
161 });
162
163 code_files > 5 || has_project_files
164 }
165
166 fn analyze_code_project(
167 nodes: &[FileNode],
168 _root_path: &Path,
169 ext_counts: &HashMap<String, usize>,
170 ) -> DirectoryType {
171 let language = if ext_counts.contains_key("rs") {
173 Language::Rust
174 } else if ext_counts.contains_key("py") {
175 Language::Python
176 } else if ext_counts.contains_key("ts") {
177 Language::TypeScript
178 } else if ext_counts.contains_key("js") {
179 Language::JavaScript
180 } else if ext_counts.contains_key("go") {
181 Language::Go
182 } else if ext_counts.contains_key("java") {
183 Language::Java
184 } else if ext_counts.contains_key("cpp") || ext_counts.contains_key("cc") {
185 Language::Cpp
186 } else if ext_counts.contains_key("rb") {
187 Language::Ruby
188 } else {
189 Language::Other("Unknown".to_string())
190 };
191
192 let framework = Self::detect_framework(nodes, &language);
194
195 let has_tests = nodes.iter().any(|n| {
197 let path_str = n.path.to_string_lossy();
198 path_str.contains("test") || path_str.contains("spec")
199 });
200
201 let has_docs = nodes.iter().any(|n| {
202 let name = n.path.file_name().and_then(|n| n.to_str()).unwrap_or("");
203 let path_str = n.path.to_string_lossy();
204 name.ends_with(".md") || path_str.contains("docs/")
205 });
206
207 DirectoryType::CodeProject {
208 language,
209 framework,
210 has_tests,
211 has_docs,
212 }
213 }
214
215 fn detect_framework(nodes: &[FileNode], language: &Language) -> Option<Framework> {
216 for node in nodes {
217 let name = node.path.file_name().and_then(|n| n.to_str()).unwrap_or("");
218
219 match language {
220 Language::Rust => {
221 if name == "Cargo.toml" {
223 return None; }
226 }
227 Language::JavaScript | Language::TypeScript => {
228 if name == "package.json" {
229 return None; }
232 }
233 Language::Python => {
234 if name == "requirements.txt" || name == "pyproject.toml" {
235 return None; }
238 }
239 _ => {}
240 }
241 }
242 None
243 }
244
245 fn is_photo_collection(ext_counts: &HashMap<String, usize>) -> bool {
246 let image_extensions = ["jpg", "jpeg", "png", "gif", "bmp", "raw", "dng", "heic"];
247 let image_files: usize = image_extensions
248 .iter()
249 .filter_map(|ext| ext_counts.get(*ext))
250 .sum();
251
252 image_files > 10
253 }
254
255 fn analyze_photo_collection(
256 _nodes: &[FileNode],
257 ext_counts: &HashMap<String, usize>,
258 ) -> DirectoryType {
259 let image_extensions = ["jpg", "jpeg", "png", "gif", "bmp", "raw", "dng", "heic"];
260 let image_count: usize = image_extensions
261 .iter()
262 .filter_map(|ext| ext_counts.get(*ext))
263 .sum();
264
265 DirectoryType::PhotoCollection {
266 image_count,
267 date_range: None, cameras: vec![], }
270 }
271
272 fn is_document_archive(ext_counts: &HashMap<String, usize>) -> bool {
273 let doc_extensions = ["pdf", "doc", "docx", "txt", "odt", "rtf"];
274 let doc_files: usize = doc_extensions
275 .iter()
276 .filter_map(|ext| ext_counts.get(*ext))
277 .sum();
278
279 doc_files > 10
280 }
281
282 fn analyze_document_archive(nodes: &[FileNode]) -> DirectoryType {
283 let mut categories = HashMap::new();
284
285 for node in nodes {
287 if !node.is_dir {
288 let name = node
289 .path
290 .file_name()
291 .and_then(|n| n.to_str())
292 .unwrap_or("")
293 .to_lowercase();
294
295 let category = if name.contains("invoice")
296 || name.contains("receipt")
297 || name.contains("bank")
298 {
299 "Financial"
300 } else if name.contains("homework")
301 || name.contains("assignment")
302 || name.contains("grade")
303 {
304 "School"
305 } else if name.contains("resume") || name.contains("cv") || name.contains("letter")
306 {
307 "Personal"
308 } else {
309 "Other"
310 };
311
312 *categories.entry(category.to_string()).or_insert(0) += 1;
313 }
314 }
315
316 let total_docs = categories.values().sum();
317
318 DirectoryType::DocumentArchive {
319 categories,
320 total_docs,
321 }
322 }
323
324 fn is_media_library(ext_counts: &HashMap<String, usize>) -> bool {
325 let video_extensions = ["mp4", "avi", "mkv", "mov", "wmv", "flv"];
326 let audio_extensions = ["mp3", "wav", "flac", "aac", "ogg", "m4a"];
327
328 let video_files: usize = video_extensions
329 .iter()
330 .filter_map(|ext| ext_counts.get(*ext))
331 .sum();
332 let audio_files: usize = audio_extensions
333 .iter()
334 .filter_map(|ext| ext_counts.get(*ext))
335 .sum();
336
337 video_files + audio_files > 10
338 }
339
340 fn analyze_media_library(
341 _nodes: &[FileNode],
342 ext_counts: &HashMap<String, usize>,
343 ) -> DirectoryType {
344 let video_extensions = ["mp4", "avi", "mkv", "mov", "wmv", "flv"];
345 let audio_extensions = ["mp3", "wav", "flac", "aac", "ogg", "m4a"];
346
347 let video_count: usize = video_extensions
348 .iter()
349 .filter_map(|ext| ext_counts.get(*ext))
350 .sum();
351 let audio_count: usize = audio_extensions
352 .iter()
353 .filter_map(|ext| ext_counts.get(*ext))
354 .sum();
355
356 DirectoryType::MediaLibrary {
357 video_count,
358 audio_count,
359 total_duration: None, quality: vec![], }
362 }
363
364 fn is_data_science(ext_counts: &HashMap<String, usize>) -> bool {
365 ext_counts.contains_key("ipynb")
366 || (ext_counts.contains_key("csv") && ext_counts["csv"] > 5)
367 || (ext_counts.contains_key("parquet") || ext_counts.contains_key("feather"))
368 }
369
370 fn analyze_data_science(ext_counts: &HashMap<String, usize>) -> DirectoryType {
371 let notebooks = ext_counts.get("ipynb").copied().unwrap_or(0);
372 let datasets = ext_counts.get("csv").copied().unwrap_or(0)
373 + ext_counts.get("parquet").copied().unwrap_or(0)
374 + ext_counts.get("feather").copied().unwrap_or(0);
375
376 let mut languages = vec![];
377 if ext_counts.contains_key("py") {
378 languages.push("Python".to_string());
379 }
380 if ext_counts.contains_key("r") {
381 languages.push("R".to_string());
382 }
383 if ext_counts.contains_key("jl") {
384 languages.push("Julia".to_string());
385 }
386
387 DirectoryType::DataScience {
388 notebooks,
389 datasets,
390 languages,
391 }
392 }
393
394 fn get_dominant_type(ext_counts: &HashMap<String, usize>) -> Option<String> {
395 ext_counts
396 .iter()
397 .max_by_key(|(_, count)| *count)
398 .map(|(ext, _)| ext.clone())
399 }
400}