Skip to main content

codelens_engine/
vfs.rs

1//! Virtual File System event normalization layer.
2//!
3//! Transforms raw OS watcher events into semantic file events:
4//! - Coalesces rapid create+modify into a single Modified
5//! - Detects rename via delete+create with same content hash
6//! - Filters unsupported file types and excluded paths
7
8use crate::project::is_excluded_within;
9use crate::symbols::language_for_path;
10use serde::Serialize;
11use std::collections::HashMap;
12use std::path::{Path, PathBuf};
13
14/// Semantic file event after normalization.
15#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
16pub enum FileEvent {
17    Created(PathBuf),
18    Modified(PathBuf),
19    Deleted(PathBuf),
20    Renamed { from: PathBuf, to: PathBuf },
21}
22
23/// Normalize raw watcher events into semantic FileEvents.
24///
25/// Takes the project root plus lists of changed (created/modified) and
26/// removed paths, and produces a deduplicated list of FileEvents with
27/// rename detection. Exclusion is evaluated root-relative so watcher
28/// events for a project rooted under an excluded-name ancestor (e.g.
29/// `~/.claude/...`) are not silently dropped (#358).
30pub fn normalize_events(root: &Path, changed: &[PathBuf], removed: &[PathBuf]) -> Vec<FileEvent> {
31    // Filter to supported files only
32    let changed: Vec<&PathBuf> = changed
33        .iter()
34        .filter(|p| !is_excluded_within(root, p) && language_for_path(p).is_some())
35        .collect();
36    let removed: Vec<&PathBuf> = removed
37        .iter()
38        .filter(|p| !is_excluded_within(root, p) && language_for_path(p).is_some())
39        .collect();
40
41    if removed.is_empty() && changed.is_empty() {
42        return Vec::new();
43    }
44
45    // Try to detect renames: a delete + create with the same content hash
46    // within the same batch is likely a rename.
47    let mut events = Vec::new();
48    let mut matched_renames: HashMap<usize, usize> = HashMap::new(); // removed_idx → changed_idx
49
50    if !removed.is_empty() && !changed.is_empty() {
51        // Hash deleted files from DB would be ideal, but we don't have that here.
52        // Instead, hash the newly created files and see if any match recently deleted files.
53        // Since deleted files no longer exist, we can only do this if we have
54        // the new file's hash and compare with known sizes/names.
55        //
56        // Simple heuristic: same filename (basename) in different directory
57        // within the same batch → likely rename.
58        let removed_basenames: Vec<(&PathBuf, Option<&str>)> = removed
59            .iter()
60            .map(|p| (*p, p.file_name().and_then(|n| n.to_str())))
61            .collect();
62
63        for (ci, cp) in changed.iter().enumerate() {
64            let Some(changed_name) = cp.file_name().and_then(|n| n.to_str()) else {
65                continue;
66            };
67            for (ri, (rp, rname)) in removed_basenames.iter().enumerate() {
68                if matched_renames.contains_key(&ri) {
69                    continue;
70                }
71                if *rname == Some(changed_name) && rp != cp {
72                    matched_renames.insert(ri, ci);
73                    break;
74                }
75            }
76        }
77    }
78
79    // Emit rename events for matched pairs
80    let matched_changed: std::collections::HashSet<usize> =
81        matched_renames.values().copied().collect();
82    let matched_removed: std::collections::HashSet<usize> =
83        matched_renames.keys().copied().collect();
84
85    for (ri, ci) in &matched_renames {
86        events.push(FileEvent::Renamed {
87            from: removed[*ri].clone(),
88            to: changed[*ci].clone(),
89        });
90    }
91
92    // Emit delete events for unmatched removals
93    for (ri, rp) in removed.iter().enumerate() {
94        if !matched_removed.contains(&ri) {
95            events.push(FileEvent::Deleted((*rp).clone()));
96        }
97    }
98
99    // Emit created/modified events for unmatched changes
100    for (ci, cp) in changed.iter().enumerate() {
101        if !matched_changed.contains(&ci) {
102            // We can't distinguish create vs modify from watcher events alone,
103            // so treat all as Modified (the index pipeline handles both the same way).
104            events.push(FileEvent::Modified((*cp).clone()));
105        }
106    }
107
108    events
109}
110
111/// Convenience: extract paths by event type for the index pipeline.
112pub fn partition_events(
113    events: &[FileEvent],
114) -> (Vec<PathBuf>, Vec<PathBuf>, Vec<(PathBuf, PathBuf)>) {
115    let mut changed = Vec::new();
116    let mut removed = Vec::new();
117    let mut renamed = Vec::new();
118
119    for event in events {
120        match event {
121            FileEvent::Created(p) | FileEvent::Modified(p) => changed.push(p.clone()),
122            FileEvent::Deleted(p) => removed.push(p.clone()),
123            FileEvent::Renamed { from, to } => {
124                renamed.push((from.clone(), to.clone()));
125                // Also index the new path
126                changed.push(to.clone());
127                // And remove the old path
128                removed.push(from.clone());
129            }
130        }
131    }
132
133    (changed, removed, renamed)
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139    use std::path::PathBuf;
140
141    #[test]
142    fn dot_directory_rooted_project_events_are_not_dropped() {
143        // #358 regression: watcher events for a project rooted under an
144        // excluded-name ancestor must survive normalization.
145        let root = Path::new("/Users/u/.claude/worktrees/proj");
146        let changed = vec![PathBuf::from("/Users/u/.claude/worktrees/proj/src/main.py")];
147        let events = normalize_events(root, &changed, &[]);
148        assert_eq!(
149            events.len(),
150            1,
151            "event under dot-dir root must not be filtered"
152        );
153        // In-project exclusions still apply.
154        let nm = vec![PathBuf::from(
155            "/Users/u/.claude/worktrees/proj/node_modules/dep/x.py",
156        )];
157        assert!(normalize_events(root, &nm, &[]).is_empty());
158    }
159
160    #[test]
161    fn empty_events() {
162        let events = normalize_events(Path::new("/project"), &[], &[]);
163        assert!(events.is_empty());
164    }
165
166    #[test]
167    fn simple_modified() {
168        let changed = vec![PathBuf::from("/project/src/main.py")];
169        let events = normalize_events(Path::new("/project"), &changed, &[]);
170        assert_eq!(events.len(), 1);
171        assert!(
172            matches!(&events[0], FileEvent::Modified(p) if p.to_str().unwrap().contains("main.py"))
173        );
174    }
175
176    #[test]
177    fn simple_deleted() {
178        let removed = vec![PathBuf::from("/project/src/old.py")];
179        let events = normalize_events(Path::new("/project"), &[], &removed);
180        assert_eq!(events.len(), 1);
181        assert!(matches!(&events[0], FileEvent::Deleted(_)));
182    }
183
184    #[test]
185    fn rename_detection_same_basename() {
186        let removed = vec![PathBuf::from("/project/src/service.py")];
187        let changed = vec![PathBuf::from("/project/lib/service.py")];
188        let events = normalize_events(Path::new("/project"), &changed, &removed);
189        assert_eq!(events.len(), 1);
190        assert!(matches!(&events[0], FileEvent::Renamed { from, to }
191            if from.to_str().unwrap().contains("src/service.py")
192            && to.to_str().unwrap().contains("lib/service.py")));
193    }
194
195    #[test]
196    fn partition_handles_renames() {
197        let events = vec![
198            FileEvent::Modified(PathBuf::from("a.py")),
199            FileEvent::Renamed {
200                from: PathBuf::from("old.py"),
201                to: PathBuf::from("new.py"),
202            },
203            FileEvent::Deleted(PathBuf::from("gone.py")),
204        ];
205        let (changed, removed, renamed) = partition_events(&events);
206        assert_eq!(changed.len(), 2); // a.py + new.py
207        assert_eq!(removed.len(), 2); // old.py + gone.py
208        assert_eq!(renamed.len(), 1);
209    }
210}