Skip to main content

st/
content_detector.rs

1//! Content detection engine - "Understanding what's in your directories" - Omni
2//! Analyzes directory contents to determine the type of project/collection
3
4use crate::scanner::FileNode;
5use std::collections::HashMap;
6use std::path::Path;
7
8/// Types of content that can be detected in a directory
9#[derive(Debug, Clone, PartialEq)]
10pub enum DirectoryType {
11    /// Software project with language and framework info
12    CodeProject {
13        language: Language,
14        framework: Option<Framework>,
15        has_tests: bool,
16        has_docs: bool,
17    },
18    /// Photo/image collection
19    PhotoCollection {
20        image_count: usize,
21        date_range: Option<(String, String)>,
22        cameras: Vec<String>,
23    },
24    /// Document archive (PDFs, docs, etc.)
25    DocumentArchive {
26        categories: HashMap<String, usize>,
27        total_docs: usize,
28    },
29    /// Media library (videos, audio)
30    MediaLibrary {
31        video_count: usize,
32        audio_count: usize,
33        total_duration: Option<String>,
34        quality: Vec<String>, // e.g., ["1080p", "4K", "720p"]
35    },
36    /// Data science workspace
37    DataScience {
38        notebooks: usize,
39        datasets: usize,
40        languages: Vec<String>,
41    },
42    /// Mixed content or unknown
43    MixedContent {
44        dominant_type: Option<String>,
45        file_types: HashMap<String, usize>,
46        total_files: usize,
47    },
48}
49
50#[derive(Debug, Clone, PartialEq)]
51pub enum Language {
52    Rust,
53    Python,
54    JavaScript,
55    TypeScript,
56    Go,
57    Java,
58    Cpp,
59    Ruby,
60    Other(String),
61}
62
63#[derive(Debug, Clone, PartialEq)]
64pub enum Framework {
65    // Rust
66    Actix,
67    Rocket,
68    Tokio,
69    // Python
70    Django,
71    Flask,
72    FastAPI,
73    // JavaScript/TypeScript
74    React,
75    Vue,
76    Angular,
77    NextJs,
78    Express,
79    // Other
80    Other(String),
81}
82
83/// Analyzes a directory and detects its content type
84pub struct ContentDetector;
85
86impl ContentDetector {
87    /// Analyze nodes and detect directory type
88    pub fn detect(nodes: &[FileNode], root_path: &Path) -> DirectoryType {
89        // Count file extensions
90        let mut ext_counts: HashMap<String, usize> = HashMap::new();
91        let mut total_files = 0;
92
93        for node in nodes {
94            if !node.is_dir {
95                total_files += 1;
96                if let Some(ext) = node.path.extension().and_then(|e| e.to_str()) {
97                    *ext_counts.entry(ext.to_lowercase()).or_insert(0) += 1;
98                }
99            }
100        }
101
102        // Check for code project indicators
103        if Self::is_code_project(&ext_counts, nodes, root_path) {
104            return Self::analyze_code_project(nodes, root_path, &ext_counts);
105        }
106
107        // Check for photo collection
108        if Self::is_photo_collection(&ext_counts) {
109            return Self::analyze_photo_collection(nodes, &ext_counts);
110        }
111
112        // Check for document archive
113        if Self::is_document_archive(&ext_counts) {
114            return Self::analyze_document_archive(nodes);
115        }
116
117        // Check for media library
118        if Self::is_media_library(&ext_counts) {
119            return Self::analyze_media_library(nodes, &ext_counts);
120        }
121
122        // Check for data science
123        if Self::is_data_science(&ext_counts) {
124            return Self::analyze_data_science(&ext_counts);
125        }
126
127        // Default to mixed content
128        DirectoryType::MixedContent {
129            dominant_type: Self::get_dominant_type(&ext_counts),
130            file_types: ext_counts,
131            total_files,
132        }
133    }
134
135    fn is_code_project(
136        ext_counts: &HashMap<String, usize>,
137        nodes: &[FileNode],
138        _root_path: &Path,
139    ) -> bool {
140        // Check for common code file extensions
141        let code_extensions = [
142            "rs", "py", "js", "ts", "go", "java", "cpp", "c", "rb", "php",
143        ];
144        let code_files: usize = code_extensions
145            .iter()
146            .filter_map(|ext| ext_counts.get(*ext))
147            .sum();
148
149        // Check for project files
150        let has_project_files = nodes.iter().any(|n| {
151            let name = n.path.file_name().and_then(|n| n.to_str()).unwrap_or("");
152            matches!(
153                name,
154                "Cargo.toml"
155                    | "package.json"
156                    | "requirements.txt"
157                    | "go.mod"
158                    | "pom.xml"
159                    | "Gemfile"
160            )
161        });
162
163        code_files > 5 || has_project_files
164    }
165
166    fn analyze_code_project(
167        nodes: &[FileNode],
168        _root_path: &Path,
169        ext_counts: &HashMap<String, usize>,
170    ) -> DirectoryType {
171        // Detect primary language
172        let language = if ext_counts.contains_key("rs") {
173            Language::Rust
174        } else if ext_counts.contains_key("py") {
175            Language::Python
176        } else if ext_counts.contains_key("ts") {
177            Language::TypeScript
178        } else if ext_counts.contains_key("js") {
179            Language::JavaScript
180        } else if ext_counts.contains_key("go") {
181            Language::Go
182        } else if ext_counts.contains_key("java") {
183            Language::Java
184        } else if ext_counts.contains_key("cpp") || ext_counts.contains_key("cc") {
185            Language::Cpp
186        } else if ext_counts.contains_key("rb") {
187            Language::Ruby
188        } else {
189            Language::Other("Unknown".to_string())
190        };
191
192        // Detect framework
193        let framework = Self::detect_framework(nodes, &language);
194
195        // Check for tests and docs
196        let has_tests = nodes.iter().any(|n| {
197            let path_str = n.path.to_string_lossy();
198            path_str.contains("test") || path_str.contains("spec")
199        });
200
201        let has_docs = nodes.iter().any(|n| {
202            let name = n.path.file_name().and_then(|n| n.to_str()).unwrap_or("");
203            let path_str = n.path.to_string_lossy();
204            name.ends_with(".md") || path_str.contains("docs/")
205        });
206
207        DirectoryType::CodeProject {
208            language,
209            framework,
210            has_tests,
211            has_docs,
212        }
213    }
214
215    fn detect_framework(nodes: &[FileNode], language: &Language) -> Option<Framework> {
216        for node in nodes {
217            let name = node.path.file_name().and_then(|n| n.to_str()).unwrap_or("");
218
219            match language {
220                Language::Rust => {
221                    // Check Cargo.toml for dependencies
222                    if name == "Cargo.toml" {
223                        // In real implementation, would read file and check deps
224                        return None; // Placeholder
225                    }
226                }
227                Language::JavaScript | Language::TypeScript => {
228                    if name == "package.json" {
229                        // Would check for React, Vue, etc. in dependencies
230                        return None; // Placeholder
231                    }
232                }
233                Language::Python => {
234                    if name == "requirements.txt" || name == "pyproject.toml" {
235                        // Would check for Django, Flask, etc.
236                        return None; // Placeholder
237                    }
238                }
239                _ => {}
240            }
241        }
242        None
243    }
244
245    fn is_photo_collection(ext_counts: &HashMap<String, usize>) -> bool {
246        let image_extensions = ["jpg", "jpeg", "png", "gif", "bmp", "raw", "dng", "heic"];
247        let image_files: usize = image_extensions
248            .iter()
249            .filter_map(|ext| ext_counts.get(*ext))
250            .sum();
251
252        image_files > 10
253    }
254
255    fn analyze_photo_collection(
256        _nodes: &[FileNode],
257        ext_counts: &HashMap<String, usize>,
258    ) -> DirectoryType {
259        let image_extensions = ["jpg", "jpeg", "png", "gif", "bmp", "raw", "dng", "heic"];
260        let image_count: usize = image_extensions
261            .iter()
262            .filter_map(|ext| ext_counts.get(*ext))
263            .sum();
264
265        DirectoryType::PhotoCollection {
266            image_count,
267            date_range: None, // Would need EXIF parsing
268            cameras: vec![],  // Would need EXIF parsing
269        }
270    }
271
272    fn is_document_archive(ext_counts: &HashMap<String, usize>) -> bool {
273        let doc_extensions = ["pdf", "doc", "docx", "txt", "odt", "rtf"];
274        let doc_files: usize = doc_extensions
275            .iter()
276            .filter_map(|ext| ext_counts.get(*ext))
277            .sum();
278
279        doc_files > 10
280    }
281
282    fn analyze_document_archive(nodes: &[FileNode]) -> DirectoryType {
283        let mut categories = HashMap::new();
284
285        // Simple categorization based on filename patterns
286        for node in nodes {
287            if !node.is_dir {
288                let name = node
289                    .path
290                    .file_name()
291                    .and_then(|n| n.to_str())
292                    .unwrap_or("")
293                    .to_lowercase();
294
295                let category = if name.contains("invoice")
296                    || name.contains("receipt")
297                    || name.contains("bank")
298                {
299                    "Financial"
300                } else if name.contains("homework")
301                    || name.contains("assignment")
302                    || name.contains("grade")
303                {
304                    "School"
305                } else if name.contains("resume") || name.contains("cv") || name.contains("letter")
306                {
307                    "Personal"
308                } else {
309                    "Other"
310                };
311
312                *categories.entry(category.to_string()).or_insert(0) += 1;
313            }
314        }
315
316        let total_docs = categories.values().sum();
317
318        DirectoryType::DocumentArchive {
319            categories,
320            total_docs,
321        }
322    }
323
324    fn is_media_library(ext_counts: &HashMap<String, usize>) -> bool {
325        let video_extensions = ["mp4", "avi", "mkv", "mov", "wmv", "flv"];
326        let audio_extensions = ["mp3", "wav", "flac", "aac", "ogg", "m4a"];
327
328        let video_files: usize = video_extensions
329            .iter()
330            .filter_map(|ext| ext_counts.get(*ext))
331            .sum();
332        let audio_files: usize = audio_extensions
333            .iter()
334            .filter_map(|ext| ext_counts.get(*ext))
335            .sum();
336
337        video_files + audio_files > 10
338    }
339
340    fn analyze_media_library(
341        _nodes: &[FileNode],
342        ext_counts: &HashMap<String, usize>,
343    ) -> DirectoryType {
344        let video_extensions = ["mp4", "avi", "mkv", "mov", "wmv", "flv"];
345        let audio_extensions = ["mp3", "wav", "flac", "aac", "ogg", "m4a"];
346
347        let video_count: usize = video_extensions
348            .iter()
349            .filter_map(|ext| ext_counts.get(*ext))
350            .sum();
351        let audio_count: usize = audio_extensions
352            .iter()
353            .filter_map(|ext| ext_counts.get(*ext))
354            .sum();
355
356        DirectoryType::MediaLibrary {
357            video_count,
358            audio_count,
359            total_duration: None, // Would need media parsing
360            quality: vec![],      // TODO: Extract quality from filenames (e.g., "movie_1080p.mp4")
361        }
362    }
363
364    fn is_data_science(ext_counts: &HashMap<String, usize>) -> bool {
365        ext_counts.contains_key("ipynb")
366            || (ext_counts.contains_key("csv") && ext_counts["csv"] > 5)
367            || (ext_counts.contains_key("parquet") || ext_counts.contains_key("feather"))
368    }
369
370    fn analyze_data_science(ext_counts: &HashMap<String, usize>) -> DirectoryType {
371        let notebooks = ext_counts.get("ipynb").copied().unwrap_or(0);
372        let datasets = ext_counts.get("csv").copied().unwrap_or(0)
373            + ext_counts.get("parquet").copied().unwrap_or(0)
374            + ext_counts.get("feather").copied().unwrap_or(0);
375
376        let mut languages = vec![];
377        if ext_counts.contains_key("py") {
378            languages.push("Python".to_string());
379        }
380        if ext_counts.contains_key("r") {
381            languages.push("R".to_string());
382        }
383        if ext_counts.contains_key("jl") {
384            languages.push("Julia".to_string());
385        }
386
387        DirectoryType::DataScience {
388            notebooks,
389            datasets,
390            languages,
391        }
392    }
393
394    fn get_dominant_type(ext_counts: &HashMap<String, usize>) -> Option<String> {
395        ext_counts
396            .iter()
397            .max_by_key(|(_, count)| *count)
398            .map(|(ext, _)| ext.clone())
399    }
400}