Skip to main content

st/smart/
context.rs

1//! 🧠 Context Analysis Engine
2//!
3//! This module provides intelligent context understanding for Smart Tools.
4//! It analyzes user tasks, file content, and project structure to determine
5//! relevance and priority for maximum token efficiency.
6
7use super::{FocusArea, RelevanceScore, TaskContext};
8use crate::scanner::{FileCategory, FileNode};
9use std::collections::HashMap;
10
11/// 🎯 Context analyzer that understands user intent and task focus
12pub struct ContextAnalyzer {
13    /// Keyword mappings for different focus areas
14    focus_keywords: HashMap<FocusArea, Vec<String>>,
15    /// File type relevance scores
16    file_type_scores: HashMap<FileCategory, f32>,
17}
18
19impl ContextAnalyzer {
20    /// Create a new context analyzer with default mappings
21    pub fn new() -> Self {
22        let mut analyzer = Self {
23            focus_keywords: HashMap::new(),
24            file_type_scores: HashMap::new(),
25        };
26
27        analyzer.initialize_focus_keywords();
28        analyzer.initialize_file_type_scores();
29        analyzer
30    }
31
32    /// 🔍 Analyze task context from natural language description
33    pub fn analyze_task(&self, task_description: &str) -> TaskContext {
34        let task_lower = task_description.to_lowercase();
35        let mut focus_areas = Vec::new();
36        let mut relevance_threshold = 0.6;
37
38        // Detect focus areas from task description
39        for (focus_area, keywords) in &self.focus_keywords {
40            for keyword in keywords {
41                if task_lower.contains(keyword) && !focus_areas.contains(focus_area) {
42                    focus_areas.push(focus_area.clone());
43                }
44            }
45        }
46
47        // Adjust relevance threshold based on task specificity
48        if focus_areas.len() == 1 {
49            relevance_threshold = 0.8; // Very specific task
50        } else if focus_areas.len() > 4 {
51            relevance_threshold = 0.5; // Broad task
52        }
53
54        // Default focus areas if none detected
55        if focus_areas.is_empty() {
56            focus_areas = vec![FocusArea::API, FocusArea::Configuration];
57        }
58
59        TaskContext {
60            task: task_description.to_string(),
61            focus_areas,
62            relevance_threshold,
63            max_results: Some(50),
64        }
65    }
66
67    /// 📊 Score file relevance based on task context
68    pub fn score_file_relevance(
69        &self,
70        file_node: &FileNode,
71        context: &TaskContext,
72    ) -> RelevanceScore {
73        let mut score: f32 = 0.0;
74        let mut reasons = Vec::new();
75        let mut focus_matches = Vec::new();
76
77        // Base score from file category
78        if let Some(type_score) = self.file_type_scores.get(&file_node.category) {
79            score += type_score;
80            reasons.push(format!("File category {:?} relevance", file_node.category));
81        }
82
83        // Score based on filename and path
84        let file_path = file_node.path.to_string_lossy().to_lowercase();
85        let file_name = file_node
86            .path
87            .file_name()
88            .and_then(|name| name.to_str())
89            .unwrap_or("")
90            .to_lowercase();
91
92        for focus_area in &context.focus_areas {
93            let empty_vec = Vec::new();
94            let keywords = self.focus_keywords.get(focus_area).unwrap_or(&empty_vec);
95            let mut area_score = 0.0;
96
97            for keyword in keywords {
98                // Check filename
99                if file_name.contains(keyword) {
100                    area_score += 0.3;
101                    reasons.push(format!("Filename contains '{}'", keyword));
102                }
103
104                // Check full path
105                if file_path.contains(keyword) {
106                    area_score += 0.2;
107                    reasons.push(format!("Path contains '{}'", keyword));
108                }
109            }
110
111            if area_score > 0.0 {
112                score += area_score;
113                focus_matches.push(focus_area.clone());
114            }
115        }
116
117        // Boost score for common important files
118        if self.is_important_file(&file_name) {
119            score += 0.4;
120            reasons.push("Important project file".to_string());
121        }
122
123        // Normalize score to 0.0-1.0 range
124        score = score.min(1.0);
125
126        RelevanceScore {
127            score,
128            reasons,
129            focus_matches,
130        }
131    }
132
133    /// 📁 Score directory relevance based on contents and context
134    pub fn score_directory_relevance(
135        &self,
136        dir_node: &FileNode,
137        context: &TaskContext,
138    ) -> RelevanceScore {
139        let mut score: f32 = 0.0;
140        let mut reasons = Vec::new();
141        let mut focus_matches = Vec::new();
142
143        let dir_name = dir_node
144            .path
145            .file_name()
146            .and_then(|name| name.to_str())
147            .unwrap_or("")
148            .to_lowercase();
149        let dir_path = dir_node.path.to_string_lossy().to_lowercase();
150
151        // Score based on directory name and common patterns
152        for focus_area in &context.focus_areas {
153            let empty_vec = Vec::new();
154            let keywords = self.focus_keywords.get(focus_area).unwrap_or(&empty_vec);
155
156            for keyword in keywords {
157                if dir_name.contains(keyword) || dir_path.contains(keyword) {
158                    score += 0.4;
159                    reasons.push(format!("Directory name/path contains '{}'", keyword));
160                    if !focus_matches.contains(focus_area) {
161                        focus_matches.push(focus_area.clone());
162                    }
163                }
164            }
165        }
166
167        // Boost score for important directories
168        if self.is_important_directory(&dir_name) {
169            score += 0.3;
170            reasons.push("Important project directory".to_string());
171        }
172
173        // Penalize common unimportant directories
174        if self.is_unimportant_directory(&dir_name) {
175            score *= 0.2;
176            reasons.push("Low-priority directory".to_string());
177        }
178
179        score = score.min(1.0);
180
181        RelevanceScore {
182            score,
183            reasons,
184            focus_matches,
185        }
186    }
187
188    /// Initialize focus area keyword mappings
189    fn initialize_focus_keywords(&mut self) {
190        // Authentication keywords
191        self.focus_keywords.insert(
192            FocusArea::Authentication,
193            vec![
194                "auth",
195                "login",
196                "password",
197                "token",
198                "session",
199                "jwt",
200                "oauth",
201                "signin",
202                "signup",
203                "credential",
204                "authenticate",
205                "authorize",
206            ]
207            .into_iter()
208            .map(String::from)
209            .collect(),
210        );
211
212        // API keywords
213        self.focus_keywords.insert(
214            FocusArea::API,
215            vec![
216                "api",
217                "endpoint",
218                "route",
219                "handler",
220                "request",
221                "response",
222                "http",
223                "rest",
224                "graphql",
225                "controller",
226                "service",
227                "client",
228            ]
229            .into_iter()
230            .map(String::from)
231            .collect(),
232        );
233
234        // Database keywords
235        self.focus_keywords.insert(
236            FocusArea::Database,
237            vec![
238                "db",
239                "database",
240                "sql",
241                "query",
242                "table",
243                "schema",
244                "migration",
245                "model",
246                "entity",
247                "repository",
248                "dao",
249                "orm",
250            ]
251            .into_iter()
252            .map(String::from)
253            .collect(),
254        );
255
256        // Frontend keywords
257        self.focus_keywords.insert(
258            FocusArea::Frontend,
259            vec![
260                "ui",
261                "component",
262                "react",
263                "vue",
264                "angular",
265                "html",
266                "css",
267                "js",
268                "frontend",
269                "client",
270                "view",
271                "template",
272                "style",
273            ]
274            .into_iter()
275            .map(String::from)
276            .collect(),
277        );
278
279        // Backend keywords
280        self.focus_keywords.insert(
281            FocusArea::Backend,
282            vec![
283                "server",
284                "service",
285                "controller",
286                "model",
287                "business",
288                "logic",
289                "backend",
290                "core",
291                "engine",
292                "processor",
293            ]
294            .into_iter()
295            .map(String::from)
296            .collect(),
297        );
298
299        // Testing keywords
300        self.focus_keywords.insert(
301            FocusArea::Testing,
302            vec![
303                "test",
304                "spec",
305                "mock",
306                "assert",
307                "expect",
308                "unit",
309                "integration",
310                "e2e",
311                "fixture",
312                "stub",
313                "spy",
314            ]
315            .into_iter()
316            .map(String::from)
317            .collect(),
318        );
319
320        // Configuration keywords
321        self.focus_keywords.insert(
322            FocusArea::Configuration,
323            vec![
324                "config",
325                "env",
326                "settings",
327                "properties",
328                "yaml",
329                "json",
330                "toml",
331                "ini",
332                "conf",
333                "cfg",
334                "setup",
335            ]
336            .into_iter()
337            .map(String::from)
338            .collect(),
339        );
340
341        // Security keywords
342        self.focus_keywords.insert(
343            FocusArea::Security,
344            vec![
345                "security",
346                "vulnerability",
347                "sanitize",
348                "validate",
349                "encrypt",
350                "hash",
351                "secure",
352                "crypto",
353                "ssl",
354                "tls",
355                "cert",
356            ]
357            .into_iter()
358            .map(String::from)
359            .collect(),
360        );
361
362        // Performance keywords
363        self.focus_keywords.insert(
364            FocusArea::Performance,
365            vec![
366                "performance",
367                "optimize",
368                "cache",
369                "memory",
370                "cpu",
371                "benchmark",
372                "perf",
373                "speed",
374                "fast",
375                "efficient",
376            ]
377            .into_iter()
378            .map(String::from)
379            .collect(),
380        );
381
382        // Documentation keywords
383        self.focus_keywords.insert(
384            FocusArea::Documentation,
385            vec![
386                "doc",
387                "readme",
388                "comment",
389                "documentation",
390                "guide",
391                "manual",
392                "help",
393                "tutorial",
394                "example",
395            ]
396            .into_iter()
397            .map(String::from)
398            .collect(),
399        );
400    }
401
402    /// Initialize file category relevance scores
403    fn initialize_file_type_scores(&mut self) {
404        // High relevance programming languages
405        self.file_type_scores.insert(FileCategory::Rust, 0.9);
406        self.file_type_scores.insert(FileCategory::Python, 0.8);
407        self.file_type_scores.insert(FileCategory::JavaScript, 0.8);
408        self.file_type_scores.insert(FileCategory::TypeScript, 0.8);
409        self.file_type_scores.insert(FileCategory::Go, 0.8);
410        self.file_type_scores.insert(FileCategory::Java, 0.8);
411        self.file_type_scores.insert(FileCategory::C, 0.7);
412        self.file_type_scores.insert(FileCategory::Cpp, 0.7);
413
414        // Configuration and markup files
415        self.file_type_scores.insert(FileCategory::Json, 0.6);
416        self.file_type_scores.insert(FileCategory::Yaml, 0.6);
417        self.file_type_scores.insert(FileCategory::Toml, 0.6);
418        self.file_type_scores.insert(FileCategory::Markdown, 0.6);
419        self.file_type_scores.insert(FileCategory::Html, 0.5);
420        self.file_type_scores.insert(FileCategory::Css, 0.5);
421
422        // Build and system files
423        self.file_type_scores.insert(FileCategory::Makefile, 0.5);
424        self.file_type_scores.insert(FileCategory::Dockerfile, 0.5);
425        self.file_type_scores.insert(FileCategory::GitConfig, 0.4);
426
427        // Lower relevance files
428        self.file_type_scores.insert(FileCategory::Archive, 0.2);
429        self.file_type_scores.insert(FileCategory::Image, 0.2);
430        self.file_type_scores.insert(FileCategory::Video, 0.1);
431        self.file_type_scores.insert(FileCategory::Audio, 0.1);
432        self.file_type_scores.insert(FileCategory::Binary, 0.2);
433        self.file_type_scores.insert(FileCategory::Unknown, 0.3);
434    }
435
436    /// Check if file is commonly important
437    fn is_important_file(&self, filename: &str) -> bool {
438        matches!(
439            filename,
440            "readme.md"
441                | "cargo.toml"
442                | "package.json"
443                | "requirements.txt"
444                | "dockerfile"
445                | "docker-compose.yml"
446                | "makefile"
447                | ".gitignore"
448                | "main.rs"
449                | "lib.rs"
450                | "mod.rs"
451                | "index.js"
452                | "app.py"
453                | "main.py"
454        )
455    }
456
457    /// Check if directory is commonly important
458    fn is_important_directory(&self, dirname: &str) -> bool {
459        matches!(
460            dirname,
461            "src"
462                | "lib"
463                | "api"
464                | "server"
465                | "client"
466                | "frontend"
467                | "backend"
468                | "components"
469                | "services"
470                | "controllers"
471                | "models"
472                | "routes"
473                | "config"
474                | "configs"
475                | "auth"
476                | "authentication"
477        )
478    }
479
480    /// Check if directory is commonly unimportant
481    fn is_unimportant_directory(&self, dirname: &str) -> bool {
482        matches!(
483            dirname,
484            "node_modules"
485                | "target"
486                | "dist"
487                | "build"
488                | ".git"
489                | ".vscode"
490                | "vendor"
491                | "__pycache__"
492                | ".pytest_cache"
493                | "coverage"
494                | "logs"
495        )
496    }
497}
498
499impl Default for ContextAnalyzer {
500    fn default() -> Self {
501        Self::new()
502    }
503}
504
505#[cfg(test)]
506mod tests {
507    use super::*;
508    use crate::scanner::{FileCategory, FileType, FilesystemType};
509    use std::path::PathBuf;
510    use std::time::SystemTime;
511
512    #[test]
513    fn test_task_analysis() {
514        let analyzer = ContextAnalyzer::new();
515        let context = analyzer.analyze_task("debugging authentication issues in the API");
516
517        assert!(context.focus_areas.contains(&FocusArea::Authentication));
518        assert!(context.focus_areas.contains(&FocusArea::API));
519        assert_eq!(context.relevance_threshold, 0.6);
520    }
521
522    #[test]
523    fn test_file_relevance_scoring() {
524        let analyzer = ContextAnalyzer::new();
525        let context = TaskContext {
526            task: "API debugging".to_string(),
527            focus_areas: vec![FocusArea::API],
528            relevance_threshold: 0.6,
529            max_results: Some(50),
530        };
531
532        let file_node = FileNode {
533            path: PathBuf::from("src/api/api_handler.rs"),
534            is_dir: false,
535            size: 1024,
536            permissions: 0o644,
537            uid: 1000,
538            gid: 1000,
539            modified: SystemTime::now(),
540            is_symlink: false,
541            is_hidden: false,
542            permission_denied: false,
543            is_ignored: false,
544            depth: 1,
545            file_type: FileType::RegularFile,
546            category: FileCategory::Rust,
547            search_matches: None,
548            filesystem_type: FilesystemType::Ext4,
549            git_branch: None,
550            traversal_context: None,
551            interest: None,
552            security_findings: Vec::new(),
553            change_status: None,
554            content_hash: None,
555        };
556
557        let score = analyzer.score_file_relevance(&file_node, &context);
558        assert!(score.score > 0.5); // Should be highly relevant
559        assert!(!score.reasons.is_empty());
560    }
561}