1use super::{FocusArea, RelevanceScore, TaskContext};
8use crate::scanner::{FileCategory, FileNode};
9use std::collections::HashMap;
10
11pub struct ContextAnalyzer {
13 focus_keywords: HashMap<FocusArea, Vec<String>>,
15 file_type_scores: HashMap<FileCategory, f32>,
17}
18
19impl ContextAnalyzer {
20 pub fn new() -> Self {
22 let mut analyzer = Self {
23 focus_keywords: HashMap::new(),
24 file_type_scores: HashMap::new(),
25 };
26
27 analyzer.initialize_focus_keywords();
28 analyzer.initialize_file_type_scores();
29 analyzer
30 }
31
32 pub fn analyze_task(&self, task_description: &str) -> TaskContext {
34 let task_lower = task_description.to_lowercase();
35 let mut focus_areas = Vec::new();
36 let mut relevance_threshold = 0.6;
37
38 for (focus_area, keywords) in &self.focus_keywords {
40 for keyword in keywords {
41 if task_lower.contains(keyword) && !focus_areas.contains(focus_area) {
42 focus_areas.push(focus_area.clone());
43 }
44 }
45 }
46
47 if focus_areas.len() == 1 {
49 relevance_threshold = 0.8; } else if focus_areas.len() > 4 {
51 relevance_threshold = 0.5; }
53
54 if focus_areas.is_empty() {
56 focus_areas = vec![FocusArea::API, FocusArea::Configuration];
57 }
58
59 TaskContext {
60 task: task_description.to_string(),
61 focus_areas,
62 relevance_threshold,
63 max_results: Some(50),
64 }
65 }
66
67 pub fn score_file_relevance(
69 &self,
70 file_node: &FileNode,
71 context: &TaskContext,
72 ) -> RelevanceScore {
73 let mut score: f32 = 0.0;
74 let mut reasons = Vec::new();
75 let mut focus_matches = Vec::new();
76
77 if let Some(type_score) = self.file_type_scores.get(&file_node.category) {
79 score += type_score;
80 reasons.push(format!("File category {:?} relevance", file_node.category));
81 }
82
83 let file_path = file_node.path.to_string_lossy().to_lowercase();
85 let file_name = file_node
86 .path
87 .file_name()
88 .and_then(|name| name.to_str())
89 .unwrap_or("")
90 .to_lowercase();
91
92 for focus_area in &context.focus_areas {
93 let empty_vec = Vec::new();
94 let keywords = self.focus_keywords.get(focus_area).unwrap_or(&empty_vec);
95 let mut area_score = 0.0;
96
97 for keyword in keywords {
98 if file_name.contains(keyword) {
100 area_score += 0.3;
101 reasons.push(format!("Filename contains '{}'", keyword));
102 }
103
104 if file_path.contains(keyword) {
106 area_score += 0.2;
107 reasons.push(format!("Path contains '{}'", keyword));
108 }
109 }
110
111 if area_score > 0.0 {
112 score += area_score;
113 focus_matches.push(focus_area.clone());
114 }
115 }
116
117 if self.is_important_file(&file_name) {
119 score += 0.4;
120 reasons.push("Important project file".to_string());
121 }
122
123 score = score.min(1.0);
125
126 RelevanceScore {
127 score,
128 reasons,
129 focus_matches,
130 }
131 }
132
133 pub fn score_directory_relevance(
135 &self,
136 dir_node: &FileNode,
137 context: &TaskContext,
138 ) -> RelevanceScore {
139 let mut score: f32 = 0.0;
140 let mut reasons = Vec::new();
141 let mut focus_matches = Vec::new();
142
143 let dir_name = dir_node
144 .path
145 .file_name()
146 .and_then(|name| name.to_str())
147 .unwrap_or("")
148 .to_lowercase();
149 let dir_path = dir_node.path.to_string_lossy().to_lowercase();
150
151 for focus_area in &context.focus_areas {
153 let empty_vec = Vec::new();
154 let keywords = self.focus_keywords.get(focus_area).unwrap_or(&empty_vec);
155
156 for keyword in keywords {
157 if dir_name.contains(keyword) || dir_path.contains(keyword) {
158 score += 0.4;
159 reasons.push(format!("Directory name/path contains '{}'", keyword));
160 if !focus_matches.contains(focus_area) {
161 focus_matches.push(focus_area.clone());
162 }
163 }
164 }
165 }
166
167 if self.is_important_directory(&dir_name) {
169 score += 0.3;
170 reasons.push("Important project directory".to_string());
171 }
172
173 if self.is_unimportant_directory(&dir_name) {
175 score *= 0.2;
176 reasons.push("Low-priority directory".to_string());
177 }
178
179 score = score.min(1.0);
180
181 RelevanceScore {
182 score,
183 reasons,
184 focus_matches,
185 }
186 }
187
188 fn initialize_focus_keywords(&mut self) {
190 self.focus_keywords.insert(
192 FocusArea::Authentication,
193 vec![
194 "auth",
195 "login",
196 "password",
197 "token",
198 "session",
199 "jwt",
200 "oauth",
201 "signin",
202 "signup",
203 "credential",
204 "authenticate",
205 "authorize",
206 ]
207 .into_iter()
208 .map(String::from)
209 .collect(),
210 );
211
212 self.focus_keywords.insert(
214 FocusArea::API,
215 vec![
216 "api",
217 "endpoint",
218 "route",
219 "handler",
220 "request",
221 "response",
222 "http",
223 "rest",
224 "graphql",
225 "controller",
226 "service",
227 "client",
228 ]
229 .into_iter()
230 .map(String::from)
231 .collect(),
232 );
233
234 self.focus_keywords.insert(
236 FocusArea::Database,
237 vec![
238 "db",
239 "database",
240 "sql",
241 "query",
242 "table",
243 "schema",
244 "migration",
245 "model",
246 "entity",
247 "repository",
248 "dao",
249 "orm",
250 ]
251 .into_iter()
252 .map(String::from)
253 .collect(),
254 );
255
256 self.focus_keywords.insert(
258 FocusArea::Frontend,
259 vec![
260 "ui",
261 "component",
262 "react",
263 "vue",
264 "angular",
265 "html",
266 "css",
267 "js",
268 "frontend",
269 "client",
270 "view",
271 "template",
272 "style",
273 ]
274 .into_iter()
275 .map(String::from)
276 .collect(),
277 );
278
279 self.focus_keywords.insert(
281 FocusArea::Backend,
282 vec![
283 "server",
284 "service",
285 "controller",
286 "model",
287 "business",
288 "logic",
289 "backend",
290 "core",
291 "engine",
292 "processor",
293 ]
294 .into_iter()
295 .map(String::from)
296 .collect(),
297 );
298
299 self.focus_keywords.insert(
301 FocusArea::Testing,
302 vec![
303 "test",
304 "spec",
305 "mock",
306 "assert",
307 "expect",
308 "unit",
309 "integration",
310 "e2e",
311 "fixture",
312 "stub",
313 "spy",
314 ]
315 .into_iter()
316 .map(String::from)
317 .collect(),
318 );
319
320 self.focus_keywords.insert(
322 FocusArea::Configuration,
323 vec![
324 "config",
325 "env",
326 "settings",
327 "properties",
328 "yaml",
329 "json",
330 "toml",
331 "ini",
332 "conf",
333 "cfg",
334 "setup",
335 ]
336 .into_iter()
337 .map(String::from)
338 .collect(),
339 );
340
341 self.focus_keywords.insert(
343 FocusArea::Security,
344 vec![
345 "security",
346 "vulnerability",
347 "sanitize",
348 "validate",
349 "encrypt",
350 "hash",
351 "secure",
352 "crypto",
353 "ssl",
354 "tls",
355 "cert",
356 ]
357 .into_iter()
358 .map(String::from)
359 .collect(),
360 );
361
362 self.focus_keywords.insert(
364 FocusArea::Performance,
365 vec![
366 "performance",
367 "optimize",
368 "cache",
369 "memory",
370 "cpu",
371 "benchmark",
372 "perf",
373 "speed",
374 "fast",
375 "efficient",
376 ]
377 .into_iter()
378 .map(String::from)
379 .collect(),
380 );
381
382 self.focus_keywords.insert(
384 FocusArea::Documentation,
385 vec![
386 "doc",
387 "readme",
388 "comment",
389 "documentation",
390 "guide",
391 "manual",
392 "help",
393 "tutorial",
394 "example",
395 ]
396 .into_iter()
397 .map(String::from)
398 .collect(),
399 );
400 }
401
402 fn initialize_file_type_scores(&mut self) {
404 self.file_type_scores.insert(FileCategory::Rust, 0.9);
406 self.file_type_scores.insert(FileCategory::Python, 0.8);
407 self.file_type_scores.insert(FileCategory::JavaScript, 0.8);
408 self.file_type_scores.insert(FileCategory::TypeScript, 0.8);
409 self.file_type_scores.insert(FileCategory::Go, 0.8);
410 self.file_type_scores.insert(FileCategory::Java, 0.8);
411 self.file_type_scores.insert(FileCategory::C, 0.7);
412 self.file_type_scores.insert(FileCategory::Cpp, 0.7);
413
414 self.file_type_scores.insert(FileCategory::Json, 0.6);
416 self.file_type_scores.insert(FileCategory::Yaml, 0.6);
417 self.file_type_scores.insert(FileCategory::Toml, 0.6);
418 self.file_type_scores.insert(FileCategory::Markdown, 0.6);
419 self.file_type_scores.insert(FileCategory::Html, 0.5);
420 self.file_type_scores.insert(FileCategory::Css, 0.5);
421
422 self.file_type_scores.insert(FileCategory::Makefile, 0.5);
424 self.file_type_scores.insert(FileCategory::Dockerfile, 0.5);
425 self.file_type_scores.insert(FileCategory::GitConfig, 0.4);
426
427 self.file_type_scores.insert(FileCategory::Archive, 0.2);
429 self.file_type_scores.insert(FileCategory::Image, 0.2);
430 self.file_type_scores.insert(FileCategory::Video, 0.1);
431 self.file_type_scores.insert(FileCategory::Audio, 0.1);
432 self.file_type_scores.insert(FileCategory::Binary, 0.2);
433 self.file_type_scores.insert(FileCategory::Unknown, 0.3);
434 }
435
436 fn is_important_file(&self, filename: &str) -> bool {
438 matches!(
439 filename,
440 "readme.md"
441 | "cargo.toml"
442 | "package.json"
443 | "requirements.txt"
444 | "dockerfile"
445 | "docker-compose.yml"
446 | "makefile"
447 | ".gitignore"
448 | "main.rs"
449 | "lib.rs"
450 | "mod.rs"
451 | "index.js"
452 | "app.py"
453 | "main.py"
454 )
455 }
456
457 fn is_important_directory(&self, dirname: &str) -> bool {
459 matches!(
460 dirname,
461 "src"
462 | "lib"
463 | "api"
464 | "server"
465 | "client"
466 | "frontend"
467 | "backend"
468 | "components"
469 | "services"
470 | "controllers"
471 | "models"
472 | "routes"
473 | "config"
474 | "configs"
475 | "auth"
476 | "authentication"
477 )
478 }
479
480 fn is_unimportant_directory(&self, dirname: &str) -> bool {
482 matches!(
483 dirname,
484 "node_modules"
485 | "target"
486 | "dist"
487 | "build"
488 | ".git"
489 | ".vscode"
490 | "vendor"
491 | "__pycache__"
492 | ".pytest_cache"
493 | "coverage"
494 | "logs"
495 )
496 }
497}
498
499impl Default for ContextAnalyzer {
500 fn default() -> Self {
501 Self::new()
502 }
503}
504
505#[cfg(test)]
506mod tests {
507 use super::*;
508 use crate::scanner::{FileCategory, FileType, FilesystemType};
509 use std::path::PathBuf;
510 use std::time::SystemTime;
511
512 #[test]
513 fn test_task_analysis() {
514 let analyzer = ContextAnalyzer::new();
515 let context = analyzer.analyze_task("debugging authentication issues in the API");
516
517 assert!(context.focus_areas.contains(&FocusArea::Authentication));
518 assert!(context.focus_areas.contains(&FocusArea::API));
519 assert_eq!(context.relevance_threshold, 0.6);
520 }
521
522 #[test]
523 fn test_file_relevance_scoring() {
524 let analyzer = ContextAnalyzer::new();
525 let context = TaskContext {
526 task: "API debugging".to_string(),
527 focus_areas: vec![FocusArea::API],
528 relevance_threshold: 0.6,
529 max_results: Some(50),
530 };
531
532 let file_node = FileNode {
533 path: PathBuf::from("src/api/api_handler.rs"),
534 is_dir: false,
535 size: 1024,
536 permissions: 0o644,
537 uid: 1000,
538 gid: 1000,
539 modified: SystemTime::now(),
540 is_symlink: false,
541 is_hidden: false,
542 permission_denied: false,
543 is_ignored: false,
544 depth: 1,
545 file_type: FileType::RegularFile,
546 category: FileCategory::Rust,
547 search_matches: None,
548 filesystem_type: FilesystemType::Ext4,
549 git_branch: None,
550 traversal_context: None,
551 interest: None,
552 security_findings: Vec::new(),
553 change_status: None,
554 content_hash: None,
555 };
556
557 let score = analyzer.score_file_relevance(&file_node, &context);
558 assert!(score.score > 0.5); assert!(!score.reasons.is_empty());
560 }
561}