Skip to main content

st/
scanner_interest.rs

1//
2// -----------------------------------------------------------------------------
3//  INTEREST SCORING: Surfacing What Matters
4//
5//  This module is the heart of Smart Tree's intelligent scanning. Instead of
6//  listing everything, we score each file/folder by "interest" - how relevant
7//  is this to the developer or AI right now?
8//
9//  Key concepts:
10//  - TraversalPath: How did we reach this location? (direct, symlink, mount, etc.)
11//  - InterestScore: A 0.0-1.0 score with breakdown of contributing factors
12//  - InterestLevel: Human-friendly categorization (Boring → Critical)
13//
14//  "The goal is signal, not noise." - Omni
15// -----------------------------------------------------------------------------
16//
17
18use crate::scanner::FilesystemType;
19use serde::{Deserialize, Serialize};
20use std::path::PathBuf;
21use std::time::SystemTime;
22
23/// How we reached this location during traversal.
24/// This context helps understand if a path is "real" or indirect.
25#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
26pub enum TraversalPath {
27    /// Directly under the scan root via normal directory traversal
28    Direct,
29
30    /// Reached via symbolic link
31    Symlink {
32        /// The actual target of the symlink
33        target: PathBuf,
34        /// Whether the target exists
35        target_exists: bool,
36    },
37
38    /// Crossed a mount point boundary
39    Mount {
40        /// The filesystem type we're now on
41        filesystem: FilesystemType,
42        /// Mount point path
43        mount_point: PathBuf,
44    },
45
46    /// Reached via recursive traversal into a nested structure
47    Recursive {
48        /// How deep we are from the original interesting location
49        depth: usize,
50        /// The original path that led us here
51        original: PathBuf,
52    },
53
54    /// Inside a dependency/vendor directory (node_modules, vendor, etc.)
55    Dependency {
56        /// Type of dependency manager
57        manager: DependencyManager,
58        /// Root of the dependency tree
59        dep_root: PathBuf,
60    },
61}
62
63/// Types of dependency managers we recognize
64#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
65pub enum DependencyManager {
66    /// npm/yarn/pnpm (node_modules)
67    Npm,
68    /// Cargo (target/debug, target/release)
69    Cargo,
70    /// pip/venv/virtualenv
71    Python,
72    /// Go modules (vendor)
73    Go,
74    /// Ruby gems (vendor/bundle)
75    Ruby,
76    /// Composer (vendor)
77    Composer,
78    /// Maven/Gradle (.m2, build)
79    Java,
80    /// Unknown dependency manager
81    Unknown,
82}
83
84impl DependencyManager {
85    /// Get the typical directory name for this dependency manager
86    pub fn dir_name(&self) -> &'static str {
87        match self {
88            Self::Npm => "node_modules",
89            Self::Cargo => "target",
90            Self::Python => ".venv",
91            Self::Go => "vendor",
92            Self::Ruby => "vendor",
93            Self::Composer => "vendor",
94            Self::Java => "build",
95            Self::Unknown => "",
96        }
97    }
98
99    /// Detect dependency manager from a directory name
100    pub fn from_dir_name(name: &str) -> Option<Self> {
101        match name {
102            "node_modules" => Some(Self::Npm),
103            "target" => Some(Self::Cargo),
104            ".venv" | "venv" | ".virtualenv" | "virtualenv" => Some(Self::Python),
105            "vendor" => Some(Self::Go), // Could also be Ruby/Composer - context needed
106            ".m2" | "build" | "out" => Some(Self::Java),
107            _ => None,
108        }
109    }
110}
111
112/// Full traversal context for a node
113#[derive(Debug, Clone, Serialize, Deserialize)]
114pub struct TraversalContext {
115    /// How we reached this location
116    pub path: TraversalPath,
117
118    /// Depth from scan root
119    pub depth_from_root: usize,
120
121    /// Is this path inside a git worktree?
122    pub in_git_worktree: bool,
123
124    /// Is this inside a submodule?
125    pub in_submodule: bool,
126
127    /// Parent directory interest level (for inheritance)
128    pub parent_interest: Option<InterestLevel>,
129}
130
131impl Default for TraversalContext {
132    fn default() -> Self {
133        Self {
134            path: TraversalPath::Direct,
135            depth_from_root: 0,
136            in_git_worktree: false,
137            in_submodule: false,
138            parent_interest: None,
139        }
140    }
141}
142
143/// Interest level - human-readable categorization of importance
144#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
145pub enum InterestLevel {
146    /// Not worth showing (generated files, caches, etc.)
147    Boring = 0,
148
149    /// Exists but rarely relevant (most dependencies, build artifacts)
150    #[default]
151    Background = 1,
152
153    /// Worth knowing about but not urgent
154    Notable = 2,
155
156    /// Should be surfaced to the user
157    Important = 3,
158
159    /// Must be shown - security issues, breaking changes, critical files
160    Critical = 4,
161}
162
163impl InterestLevel {
164    /// Get emoji representation
165    pub fn emoji(&self) -> &'static str {
166        match self {
167            Self::Boring => "💤",
168            Self::Background => "đŸ“Ļ",
169            Self::Notable => "📝",
170            Self::Important => "đŸ”Ĩ",
171            Self::Critical => "âš ī¸",
172        }
173    }
174
175    /// Get color name for terminal output
176    pub fn color(&self) -> &'static str {
177        match self {
178            Self::Boring => "bright_black",
179            Self::Background => "white",
180            Self::Notable => "cyan",
181            Self::Important => "yellow",
182            Self::Critical => "red",
183        }
184    }
185
186    /// Convert from float score to level
187    pub fn from_score(score: f32) -> Self {
188        match score {
189            s if s >= 0.8 => Self::Critical,
190            s if s >= 0.6 => Self::Important,
191            s if s >= 0.4 => Self::Notable,
192            s if s >= 0.2 => Self::Background,
193            _ => Self::Boring,
194        }
195    }
196}
197
198/// Risk level for security-related factors
199#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
200pub enum RiskLevel {
201    /// No risk detected
202    None = 0,
203    /// Informational finding
204    Info = 1,
205    /// Low risk - worth noting
206    Low = 2,
207    /// Medium risk - should be reviewed
208    Medium = 3,
209    /// High risk - needs attention
210    High = 4,
211    /// Critical risk - immediate action needed
212    Critical = 5,
213}
214
215impl RiskLevel {
216    pub fn emoji(&self) -> &'static str {
217        match self {
218            Self::None => "",
219            Self::Info => "â„šī¸",
220            Self::Low => "đŸ”ĩ",
221            Self::Medium => "🟡",
222            Self::High => "🟠",
223            Self::Critical => "🔴",
224        }
225    }
226}
227
228/// Factors that contribute to a node's interest score
229#[derive(Debug, Clone, Serialize, Deserialize)]
230pub enum InterestFactor {
231    /// File was recently modified
232    RecentlyModified {
233        /// Hours since last modification
234        hours_ago: f32,
235        /// Contribution to score (0.0-1.0)
236        weight: f32,
237    },
238
239    /// Security pattern detected
240    SecurityPattern {
241        /// Risk level of the finding
242        risk: RiskLevel,
243        /// Brief description
244        description: String,
245        /// Contribution to score
246        weight: f32,
247    },
248
249    /// This is a key project file (README, Cargo.toml, package.json, etc.)
250    KeyProjectFile {
251        /// Type of key file
252        file_type: KeyFileType,
253        /// Contribution to score
254        weight: f32,
255    },
256
257    /// Changed since last scan
258    ChangedSinceLastScan {
259        /// Type of change
260        change: ChangeType,
261        /// Contribution to score
262        weight: f32,
263    },
264
265    /// In a "hot" directory with frequent changes
266    HotDirectory {
267        /// Number of changes in recent period
268        change_count: u32,
269        /// Contribution to score
270        weight: f32,
271    },
272
273    /// Suspicious dependency detected
274    SuspiciousDependency {
275        /// Reason for suspicion
276        reason: String,
277        /// Contribution to score
278        weight: f32,
279    },
280
281    /// Git-related interest (uncommitted changes, conflicts, etc.)
282    GitStatus {
283        /// Type of git status
284        status: GitStatusType,
285        /// Contribution to score
286        weight: f32,
287    },
288
289    /// Code complexity or size concern
290    Complexity {
291        /// Description of the complexity factor
292        description: String,
293        /// Contribution to score
294        weight: f32,
295    },
296
297    /// Inside a dependency tree (usually reduces interest)
298    InDependencyTree {
299        /// Depth inside dependency tree
300        depth: usize,
301        /// Negative contribution to score
302        weight: f32,
303    },
304
305    /// Custom user-defined interest factor
306    Custom {
307        /// Name of the custom factor
308        name: String,
309        /// Contribution to score
310        weight: f32,
311    },
312}
313
314impl InterestFactor {
315    /// Get the weight contribution of this factor
316    pub fn weight(&self) -> f32 {
317        match self {
318            Self::RecentlyModified { weight, .. } => *weight,
319            Self::SecurityPattern { weight, .. } => *weight,
320            Self::KeyProjectFile { weight, .. } => *weight,
321            Self::ChangedSinceLastScan { weight, .. } => *weight,
322            Self::HotDirectory { weight, .. } => *weight,
323            Self::SuspiciousDependency { weight, .. } => *weight,
324            Self::GitStatus { weight, .. } => *weight,
325            Self::Complexity { weight, .. } => *weight,
326            Self::InDependencyTree { weight, .. } => *weight,
327            Self::Custom { weight, .. } => *weight,
328        }
329    }
330
331    /// Get a short description of this factor
332    pub fn description(&self) -> String {
333        match self {
334            Self::RecentlyModified { hours_ago, .. } => {
335                format!("Modified {:.1}h ago", hours_ago)
336            }
337            Self::SecurityPattern { description, .. } => description.clone(),
338            Self::KeyProjectFile { file_type, .. } => {
339                format!("Key file: {:?}", file_type)
340            }
341            Self::ChangedSinceLastScan { change, .. } => {
342                format!("Changed: {:?}", change)
343            }
344            Self::HotDirectory { change_count, .. } => {
345                format!("{} recent changes", change_count)
346            }
347            Self::SuspiciousDependency { reason, .. } => reason.clone(),
348            Self::GitStatus { status, .. } => format!("Git: {:?}", status),
349            Self::Complexity { description, .. } => description.clone(),
350            Self::InDependencyTree { depth, .. } => {
351                format!("Dependency depth: {}", depth)
352            }
353            Self::Custom { name, .. } => name.clone(),
354        }
355    }
356}
357
358/// Types of key project files
359#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
360pub enum KeyFileType {
361    /// README, CHANGELOG, etc.
362    Documentation,
363    /// Cargo.toml, package.json, etc.
364    BuildConfig,
365    /// .env, config.toml, etc.
366    Configuration,
367    /// main.rs, index.js, etc.
368    EntryPoint,
369    /// LICENSE, COPYING
370    License,
371    /// .github/workflows, .gitlab-ci.yml
372    CiConfig,
373    /// Dockerfile, docker-compose.yml
374    Container,
375    /// CLAUDE.md, .cursorrules, etc.
376    AiConfig,
377}
378
379/// Types of changes detected between scans
380#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
381pub enum ChangeType {
382    /// File was added
383    Added,
384    /// File content was modified
385    Modified,
386    /// File was deleted
387    Deleted,
388    /// File permissions changed
389    PermissionChanged,
390    /// File was renamed/moved
391    Renamed,
392    /// File type changed (e.g., regular file to symlink)
393    TypeChanged,
394}
395
396impl ChangeType {
397    pub fn emoji(&self) -> &'static str {
398        match self {
399            Self::Added => "+",
400            Self::Modified => "~",
401            Self::Deleted => "-",
402            Self::PermissionChanged => "🔐",
403            Self::Renamed => "→",
404            Self::TypeChanged => "⚡",
405        }
406    }
407}
408
409/// Git status types that affect interest
410#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
411pub enum GitStatusType {
412    /// Uncommitted changes
413    Uncommitted,
414    /// Merge conflict
415    Conflict,
416    /// Staged for commit
417    Staged,
418    /// Untracked file
419    Untracked,
420    /// Ahead of remote
421    Ahead,
422    /// Behind remote
423    Behind,
424}
425
426/// Interest score with breakdown of contributing factors
427#[derive(Debug, Clone, Serialize, Deserialize)]
428pub struct InterestScore {
429    /// Overall interest score (0.0 = boring, 1.0 = critical)
430    pub score: f32,
431
432    /// Factors that contributed to this score
433    pub factors: Vec<InterestFactor>,
434
435    /// Human-friendly interest level
436    pub level: InterestLevel,
437
438    /// When this score was calculated
439    pub calculated_at: SystemTime,
440}
441
442impl InterestScore {
443    /// Create a new interest score from factors
444    pub fn from_factors(factors: Vec<InterestFactor>) -> Self {
445        let score = factors.iter().map(|f| f.weight()).sum::<f32>().clamp(0.0, 1.0);
446        let level = InterestLevel::from_score(score);
447
448        Self {
449            score,
450            factors,
451            level,
452            calculated_at: SystemTime::now(),
453        }
454    }
455
456    /// Create a default "boring" score
457    pub fn boring() -> Self {
458        Self {
459            score: 0.0,
460            factors: vec![],
461            level: InterestLevel::Boring,
462            calculated_at: SystemTime::now(),
463        }
464    }
465
466    /// Create a critical score with a single reason
467    pub fn critical(reason: String) -> Self {
468        Self {
469            score: 1.0,
470            factors: vec![InterestFactor::SecurityPattern {
471                risk: RiskLevel::Critical,
472                description: reason,
473                weight: 1.0,
474            }],
475            level: InterestLevel::Critical,
476            calculated_at: SystemTime::now(),
477        }
478    }
479
480    /// Check if this score indicates the node should be shown by default
481    pub fn should_show(&self) -> bool {
482        self.level >= InterestLevel::Notable
483    }
484
485    /// Get a summary of why this is interesting
486    pub fn summary(&self) -> String {
487        if self.factors.is_empty() {
488            return String::from("No notable factors");
489        }
490
491        self.factors
492            .iter()
493            .map(|f| f.description())
494            .collect::<Vec<_>>()
495            .join(", ")
496    }
497}
498
499impl Default for InterestScore {
500    fn default() -> Self {
501        Self {
502            score: 0.1,
503            factors: vec![],
504            level: InterestLevel::Background,
505            calculated_at: SystemTime::now(),
506        }
507    }
508}
509
510/// Default weights for interest calculation
511#[derive(Debug, Clone, Serialize, Deserialize)]
512pub struct InterestWeights {
513    /// Weight for recently modified files (within 24h)
514    pub recent_modification: f32,
515
516    /// Weight for critical security findings
517    pub security_critical: f32,
518
519    /// Weight for key project files
520    pub key_file: f32,
521
522    /// Weight for files changed since last scan
523    pub changed_since_scan: f32,
524
525    /// Weight for files in hot directories
526    pub hot_directory: f32,
527
528    /// Negative weight for dependency tree depth
529    pub dependency_depth_penalty: f32,
530
531    /// Base interest for files with git changes
532    pub git_changes: f32,
533}
534
535impl Default for InterestWeights {
536    fn default() -> Self {
537        Self {
538            recent_modification: 0.3,
539            security_critical: 1.0,
540            key_file: 0.5,
541            changed_since_scan: 0.4,
542            hot_directory: 0.3,
543            dependency_depth_penalty: -0.1,
544            git_changes: 0.35,
545        }
546    }
547}
548
549#[cfg(test)]
550mod tests {
551    use super::*;
552
553    #[test]
554    fn test_interest_level_from_score() {
555        assert_eq!(InterestLevel::from_score(0.0), InterestLevel::Boring);
556        assert_eq!(InterestLevel::from_score(0.1), InterestLevel::Boring);
557        assert_eq!(InterestLevel::from_score(0.3), InterestLevel::Background);
558        assert_eq!(InterestLevel::from_score(0.5), InterestLevel::Notable);
559        assert_eq!(InterestLevel::from_score(0.7), InterestLevel::Important);
560        assert_eq!(InterestLevel::from_score(0.9), InterestLevel::Critical);
561        assert_eq!(InterestLevel::from_score(1.0), InterestLevel::Critical);
562    }
563
564    #[test]
565    fn test_interest_score_from_factors() {
566        let factors = vec![
567            InterestFactor::RecentlyModified {
568                hours_ago: 2.0,
569                weight: 0.3,
570            },
571            InterestFactor::KeyProjectFile {
572                file_type: KeyFileType::BuildConfig,
573                weight: 0.5,
574            },
575        ];
576
577        let score = InterestScore::from_factors(factors);
578        assert!((score.score - 0.8).abs() < 0.01);
579        assert_eq!(score.level, InterestLevel::Critical);
580    }
581
582    #[test]
583    fn test_interest_score_clamping() {
584        let factors = vec![
585            InterestFactor::SecurityPattern {
586                risk: RiskLevel::Critical,
587                description: "Bad thing".to_string(),
588                weight: 1.0,
589            },
590            InterestFactor::HotDirectory {
591                change_count: 100,
592                weight: 0.5,
593            },
594        ];
595
596        let score = InterestScore::from_factors(factors);
597        // Should clamp to 1.0
598        assert_eq!(score.score, 1.0);
599    }
600
601    #[test]
602    fn test_dependency_manager_detection() {
603        assert_eq!(
604            DependencyManager::from_dir_name("node_modules"),
605            Some(DependencyManager::Npm)
606        );
607        assert_eq!(
608            DependencyManager::from_dir_name("target"),
609            Some(DependencyManager::Cargo)
610        );
611        assert_eq!(
612            DependencyManager::from_dir_name(".venv"),
613            Some(DependencyManager::Python)
614        );
615        assert_eq!(DependencyManager::from_dir_name("src"), None);
616    }
617}