1use crate::scanner::{FileCategory, FileNode, FilesystemType};
20use crate::scanner_interest::{
21 ChangeType, DependencyManager, InterestFactor, InterestLevel, InterestScore,
22 InterestWeights, KeyFileType, RiskLevel, TraversalContext, TraversalPath,
23};
24use crate::scanner_state::{FileSignature, ScanState};
25use crate::security_scan::{SecurityFinding, SecurityScanner};
26use std::collections::HashSet;
27use std::path::Path;
28use std::time::SystemTime;
29
30pub struct InterestCalculator {
32 weights: InterestWeights,
34
35 previous_state: Option<ScanState>,
37
38 hot_dirs: HashSet<std::path::PathBuf>,
40
41 security_scanner: Option<SecurityScanner>,
43
44 now: SystemTime,
46}
47
48impl InterestCalculator {
49 pub fn new() -> Self {
51 Self {
52 weights: InterestWeights::default(),
53 previous_state: None,
54 hot_dirs: HashSet::new(),
55 security_scanner: Some(SecurityScanner::new()),
56 now: SystemTime::now(),
57 }
58 }
59
60 pub fn with_weights(weights: InterestWeights) -> Self {
62 Self {
63 weights,
64 previous_state: None,
65 hot_dirs: HashSet::new(),
66 security_scanner: Some(SecurityScanner::new()),
67 now: SystemTime::now(),
68 }
69 }
70
71 pub fn with_previous_state(mut self, state: ScanState) -> Self {
73 self.previous_state = Some(state);
74 self
75 }
76
77 pub fn with_hot_dirs(mut self, dirs: HashSet<std::path::PathBuf>) -> Self {
79 self.hot_dirs = dirs;
80 self
81 }
82
83 pub fn without_security(mut self) -> Self {
85 self.security_scanner = None;
86 self
87 }
88
89 pub fn calculate(&self, node: &FileNode) -> InterestScore {
91 let mut factors = Vec::new();
92
93 if let Some(factor) = self.check_recency(node) {
95 factors.push(factor);
96 }
97
98 if let Some(factor) = self.check_key_file(node) {
100 factors.push(factor);
101 }
102
103 if let Some(factor) = self.check_changed(node) {
105 factors.push(factor);
106 }
107
108 if let Some(factor) = self.check_hot_dir(node) {
110 factors.push(factor);
111 }
112
113 if let Some(factor) = self.check_dependency_context(node) {
115 factors.push(factor);
116 }
117
118 if let Some(factor) = self.check_filesystem_type(node) {
120 factors.push(factor);
121 }
122
123 if let Some(factor) = self.check_category_boost(node) {
125 factors.push(factor);
126 }
127
128 InterestScore::from_factors(factors)
129 }
130
131 pub fn calculate_with_security(
133 &self,
134 node: &FileNode,
135 content: Option<&str>,
136 ) -> (InterestScore, Vec<SecurityFinding>) {
137 let mut factors = Vec::new();
138 let mut findings = Vec::new();
139
140 if let (Some(scanner), Some(content)) = (&self.security_scanner, content) {
142 let file_findings = scanner.scan_file_content(&node.path, content);
143 for finding in &file_findings {
144 let risk_level = match finding.risk_level {
145 crate::security_scan::RiskLevel::Critical => RiskLevel::Critical,
146 crate::security_scan::RiskLevel::High => RiskLevel::High,
147 crate::security_scan::RiskLevel::Medium => RiskLevel::Medium,
148 crate::security_scan::RiskLevel::Low => RiskLevel::Low,
149 };
150
151 factors.push(InterestFactor::SecurityPattern {
152 risk: risk_level,
153 description: finding.description.clone(),
154 weight: match finding.risk_level {
155 crate::security_scan::RiskLevel::Critical => 1.0,
156 crate::security_scan::RiskLevel::High => 0.8,
157 crate::security_scan::RiskLevel::Medium => 0.5,
158 crate::security_scan::RiskLevel::Low => 0.2,
159 },
160 });
161 }
162 findings = file_findings;
163 }
164
165 if let Some(factor) = self.check_recency(node) {
167 factors.push(factor);
168 }
169 if let Some(factor) = self.check_key_file(node) {
170 factors.push(factor);
171 }
172 if let Some(factor) = self.check_changed(node) {
173 factors.push(factor);
174 }
175 if let Some(factor) = self.check_hot_dir(node) {
176 factors.push(factor);
177 }
178 if let Some(factor) = self.check_dependency_context(node) {
179 factors.push(factor);
180 }
181 if let Some(factor) = self.check_filesystem_type(node) {
182 factors.push(factor);
183 }
184 if let Some(factor) = self.check_category_boost(node) {
185 factors.push(factor);
186 }
187
188 (InterestScore::from_factors(factors), findings)
189 }
190
191 fn check_recency(&self, node: &FileNode) -> Option<InterestFactor> {
193 let duration = self.now.duration_since(node.modified).ok()?;
194 let hours = duration.as_secs_f32() / 3600.0;
195
196 let weight = if hours < 1.0 {
198 self.weights.recent_modification * 1.5 } else if hours < 24.0 {
200 self.weights.recent_modification * (1.0 - hours / 48.0)
201 } else if hours < 168.0 {
202 self.weights.recent_modification * 0.3 * (1.0 - hours / 336.0)
204 } else {
205 return None; };
207
208 if weight > 0.05 {
209 Some(InterestFactor::RecentlyModified {
210 hours_ago: hours,
211 weight,
212 })
213 } else {
214 None
215 }
216 }
217
218 fn check_key_file(&self, node: &FileNode) -> Option<InterestFactor> {
220 if node.is_dir {
221 return None;
222 }
223
224 let file_name = node.path.file_name()?.to_str()?;
225 let file_name_lower = file_name.to_lowercase();
226
227 let key_type = match file_name_lower.as_str() {
228 "readme.md" | "readme" | "readme.txt" | "changelog.md" | "changelog" | "history.md" => {
230 Some(KeyFileType::Documentation)
231 }
232
233 "cargo.toml" | "package.json" | "pyproject.toml" | "go.mod" | "gemfile"
235 | "build.gradle" | "pom.xml" | "makefile" | "cmakelists.txt" => {
236 Some(KeyFileType::BuildConfig)
237 }
238
239 ".env" | ".env.local" | ".env.example" | "config.toml" | "config.yaml"
241 | "config.json" | "settings.toml" | "settings.yaml" => Some(KeyFileType::Configuration),
242
243 "main.rs" | "lib.rs" | "mod.rs" | "index.js" | "index.ts" | "main.py" | "__init__.py"
245 | "app.py" | "main.go" | "main.java" => Some(KeyFileType::EntryPoint),
246
247 "license" | "license.md" | "license.txt" | "copying" => Some(KeyFileType::License),
249
250 ".gitlab-ci.yml" | "jenkinsfile" | ".travis.yml" | "azure-pipelines.yml" => {
252 Some(KeyFileType::CiConfig)
253 }
254
255 "dockerfile" | "docker-compose.yml" | "docker-compose.yaml" | "containerfile" => {
257 Some(KeyFileType::Container)
258 }
259
260 "claude.md" | ".cursorrules" | ".aider" | "copilot.md" => Some(KeyFileType::AiConfig),
262
263 _ => None,
264 };
265
266 let key_type = key_type.or_else(|| {
268 if node.path.to_string_lossy().contains(".github/workflows") {
269 Some(KeyFileType::CiConfig)
270 } else {
271 None
272 }
273 });
274
275 key_type.map(|file_type| InterestFactor::KeyProjectFile {
276 file_type,
277 weight: self.weights.key_file,
278 })
279 }
280
281 fn check_changed(&self, node: &FileNode) -> Option<InterestFactor> {
283 let prev_state = self.previous_state.as_ref()?;
284 let prev_sig = prev_state.signatures.get(&node.path);
285
286 match prev_sig {
287 None => {
288 Some(InterestFactor::ChangedSinceLastScan {
290 change: ChangeType::Added,
291 weight: self.weights.changed_since_scan,
292 })
293 }
294 Some(old_sig) => {
295 let new_sig = FileSignature::from_path(&node.path).ok()?;
297
298 if new_sig.changed(old_sig) {
299 let change_type = if old_sig.permissions != new_sig.permissions {
300 ChangeType::PermissionChanged
301 } else {
302 ChangeType::Modified
303 };
304
305 Some(InterestFactor::ChangedSinceLastScan {
306 change: change_type,
307 weight: self.weights.changed_since_scan,
308 })
309 } else {
310 None
311 }
312 }
313 }
314 }
315
316 fn check_hot_dir(&self, node: &FileNode) -> Option<InterestFactor> {
318 for ancestor in node.path.ancestors() {
320 if self.hot_dirs.contains(ancestor) {
321 return Some(InterestFactor::HotDirectory {
322 change_count: 0, weight: self.weights.hot_directory,
324 });
325 }
326 }
327 None
328 }
329
330 fn check_dependency_context(&self, node: &FileNode) -> Option<InterestFactor> {
332 let path_str = node.path.to_string_lossy();
333
334 let dep_indicators = [
336 ("node_modules", DependencyManager::Npm),
337 ("target/debug", DependencyManager::Cargo),
338 ("target/release", DependencyManager::Cargo),
339 (".venv", DependencyManager::Python),
340 ("venv", DependencyManager::Python),
341 ("__pycache__", DependencyManager::Python),
342 ("vendor", DependencyManager::Go), (".m2", DependencyManager::Java),
344 ("build/classes", DependencyManager::Java),
345 ];
346
347 for (indicator, _manager) in &dep_indicators {
348 if path_str.contains(indicator) {
349 let depth = path_str
351 .split(indicator)
352 .nth(1)
353 .map(|s| s.matches('/').count())
354 .unwrap_or(0);
355
356 return Some(InterestFactor::InDependencyTree {
357 depth,
358 weight: self.weights.dependency_depth_penalty * (depth as f32 + 1.0),
359 });
360 }
361 }
362
363 None
364 }
365
366 fn check_filesystem_type(&self, node: &FileNode) -> Option<InterestFactor> {
368 match node.filesystem_type {
369 FilesystemType::Procfs | FilesystemType::Sysfs | FilesystemType::Devfs => {
370 Some(InterestFactor::InDependencyTree {
371 depth: 0,
372 weight: -0.5, })
374 }
375 FilesystemType::Tmpfs => Some(InterestFactor::InDependencyTree {
376 depth: 0,
377 weight: -0.2, }),
379 _ => None,
380 }
381 }
382
383 fn check_category_boost(&self, node: &FileNode) -> Option<InterestFactor> {
385 if node.is_dir {
386 return None;
387 }
388
389 let boost: f32 = match node.category {
391 FileCategory::Rust
392 | FileCategory::Python
393 | FileCategory::JavaScript
394 | FileCategory::TypeScript
395 | FileCategory::Go
396 | FileCategory::Java
397 | FileCategory::Cpp
398 | FileCategory::C => 0.1,
399
400 FileCategory::Toml
402 | FileCategory::Yaml
403 | FileCategory::Json
404 | FileCategory::Makefile
405 | FileCategory::Dockerfile => 0.15,
406
407 FileCategory::Markdown | FileCategory::Readme => 0.1,
409
410 FileCategory::Test => 0.1,
412
413 FileCategory::Archive | FileCategory::Binary | FileCategory::DiskImage => -0.1,
415
416 FileCategory::Temp | FileCategory::Backup => -0.2,
418
419 _ => 0.0,
420 };
421
422 if boost.abs() > 0.01 {
423 Some(InterestFactor::Custom {
424 name: format!("Category: {:?}", node.category),
425 weight: boost,
426 })
427 } else {
428 None
429 }
430 }
431
432 pub fn build_traversal_context(
434 &self,
435 node: &FileNode,
436 parent_interest: Option<InterestLevel>,
437 ) -> TraversalContext {
438 let path_str = node.path.to_string_lossy();
439
440 let traversal_path = if node.is_symlink {
442 TraversalPath::Symlink {
443 target: std::fs::read_link(&node.path).unwrap_or_default(),
444 target_exists: node.path.exists(),
445 }
446 } else if let Some((indicator, manager)) = self.find_dependency_indicator(&path_str) {
447 TraversalPath::Dependency {
448 manager,
449 dep_root: node
450 .path
451 .to_string_lossy()
452 .split(indicator)
453 .next()
454 .map(|s| std::path::PathBuf::from(format!("{}{}", s, indicator)))
455 .unwrap_or_default(),
456 }
457 } else {
458 TraversalPath::Direct
459 };
460
461 let in_git_worktree = node.path.join(".git").exists()
463 || node
464 .path
465 .ancestors()
466 .any(|p| p.join(".git").exists());
467
468 let in_submodule = node
470 .path
471 .ancestors()
472 .any(|p| p.join(".git").is_file()); TraversalContext {
475 path: traversal_path,
476 depth_from_root: node.depth,
477 in_git_worktree,
478 in_submodule,
479 parent_interest,
480 }
481 }
482
483 fn find_dependency_indicator(&self, path: &str) -> Option<(&'static str, DependencyManager)> {
485 let indicators = [
486 ("node_modules", DependencyManager::Npm),
487 ("target/debug", DependencyManager::Cargo),
488 ("target/release", DependencyManager::Cargo),
489 (".venv", DependencyManager::Python),
490 ("venv", DependencyManager::Python),
491 ("vendor", DependencyManager::Go),
492 (".m2", DependencyManager::Java),
493 ];
494
495 for (indicator, manager) in indicators {
496 if path.contains(indicator) {
497 return Some((indicator, manager));
498 }
499 }
500 None
501 }
502}
503
504impl Default for InterestCalculator {
505 fn default() -> Self {
506 Self::new()
507 }
508}
509
510pub fn quick_interest_check(path: &Path) -> InterestLevel {
512 let path_str = path.to_string_lossy();
513
514 if path_str.contains(".env") && !path_str.contains(".env.example") {
516 return InterestLevel::Critical;
517 }
518
519 let boring_patterns = [
521 "node_modules",
522 "target/debug",
523 "target/release",
524 "__pycache__",
525 ".git/objects",
526 ".venv",
527 "venv/lib",
528 ];
529
530 for pattern in boring_patterns {
531 if path_str.contains(pattern) {
532 return InterestLevel::Boring;
533 }
534 }
535
536 if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
538 let name_lower = name.to_lowercase();
539 if matches!(
540 name_lower.as_str(),
541 "readme.md"
542 | "cargo.toml"
543 | "package.json"
544 | "main.rs"
545 | "lib.rs"
546 | "index.js"
547 | "index.ts"
548 ) {
549 return InterestLevel::Important;
550 }
551 }
552
553 InterestLevel::Background
554}
555
556#[cfg(test)]
557mod tests {
558 use super::*;
559 use crate::scanner::{FileCategory, FileType, FilesystemType};
560 use std::path::PathBuf;
561 use std::time::Duration;
562
563 fn make_test_node(path: &str, is_dir: bool, hours_old: f32) -> FileNode {
564 let modified = SystemTime::now() - Duration::from_secs_f32(hours_old * 3600.0);
565
566 FileNode {
567 path: PathBuf::from(path),
568 is_dir,
569 size: 1000,
570 permissions: 0o644,
571 uid: 1000,
572 gid: 1000,
573 modified,
574 is_symlink: false,
575 is_hidden: false,
576 permission_denied: false,
577 is_ignored: false,
578 depth: path.matches('/').count(),
579 file_type: if is_dir {
580 FileType::Directory
581 } else {
582 FileType::RegularFile
583 },
584 category: FileCategory::Unknown,
585 search_matches: None,
586 filesystem_type: FilesystemType::Unknown,
587 git_branch: None,
588 traversal_context: None,
589 interest: None,
590 security_findings: Vec::new(),
591 change_status: None,
592 content_hash: None,
593 }
594 }
595
596 #[test]
597 fn test_recency_scoring() {
598 let calc = InterestCalculator::new();
599
600 let recent = make_test_node("src/main.rs", false, 0.5);
602 let score = calc.calculate(&recent);
603 assert!(score.score > 0.3, "Recent file should have high score");
604
605 let old = make_test_node("src/old.rs", false, 200.0);
607 let score = calc.calculate(&old);
608 assert!(score.score < 0.2, "Old file should have low score");
609 }
610
611 #[test]
612 fn test_key_file_detection() {
613 let calc = InterestCalculator::new();
614
615 let readme = make_test_node("README.md", false, 100.0);
616 let score = calc.calculate(&readme);
617 assert!(
618 score.score >= 0.4,
619 "README should be important: {}",
620 score.score
621 );
622
623 let cargo = make_test_node("Cargo.toml", false, 100.0);
624 let score = calc.calculate(&cargo);
625 assert!(
626 score.score >= 0.4,
627 "Cargo.toml should be important: {}",
628 score.score
629 );
630 }
631
632 #[test]
633 fn test_dependency_penalty() {
634 let calc = InterestCalculator::new();
635
636 let node_mod = make_test_node("node_modules/lodash/index.js", false, 200.0);
638 let score = calc.calculate(&node_mod);
639 assert!(
642 score.score < 0.3,
643 "node_modules file should have reduced interest: {}",
644 score.score
645 );
646
647 let normal = make_test_node("src/utils/index.js", false, 200.0);
649 let score = calc.calculate(&normal);
650
651 let node_mod_score = calc.calculate(&make_test_node("node_modules/lodash/index.js", false, 200.0)).score;
653 assert!(
654 score.score > node_mod_score,
655 "Normal source file ({}) should have higher interest than node_modules ({})",
656 score.score,
657 node_mod_score
658 );
659 }
660
661 #[test]
662 fn test_quick_interest_check() {
663 assert_eq!(
664 quick_interest_check(Path::new(".env")),
665 InterestLevel::Critical
666 );
667 assert_eq!(
668 quick_interest_check(Path::new("node_modules/foo/bar.js")),
669 InterestLevel::Boring
670 );
671 assert_eq!(
672 quick_interest_check(Path::new("README.md")),
673 InterestLevel::Important
674 );
675 assert_eq!(
676 quick_interest_check(Path::new("src/utils.rs")),
677 InterestLevel::Background
678 );
679 }
680}