Skip to main content

codelens_engine/
vfs.rs

1//! Virtual File System event normalization layer.
2//!
3//! Transforms raw OS watcher events into semantic file events:
4//! - Coalesces rapid create+modify into a single Modified
5//! - Detects rename via delete+create with same content hash
6//! - Filters unsupported file types and excluded paths
7
8use crate::project::is_excluded;
9use crate::symbols::language_for_path;
10use serde::Serialize;
11use std::collections::HashMap;
12use std::path::PathBuf;
13
14/// Semantic file event after normalization.
15#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
16pub enum FileEvent {
17    Created(PathBuf),
18    Modified(PathBuf),
19    Deleted(PathBuf),
20    Renamed { from: PathBuf, to: PathBuf },
21}
22
23/// Normalize raw watcher events into semantic FileEvents.
24///
25/// Takes lists of changed (created/modified) and removed paths,
26/// and produces a deduplicated list of FileEvents with rename detection.
27pub fn normalize_events(changed: &[PathBuf], removed: &[PathBuf]) -> Vec<FileEvent> {
28    // Filter to supported files only
29    let changed: Vec<&PathBuf> = changed
30        .iter()
31        .filter(|p| !is_excluded(p) && language_for_path(p).is_some())
32        .collect();
33    let removed: Vec<&PathBuf> = removed
34        .iter()
35        .filter(|p| !is_excluded(p) && language_for_path(p).is_some())
36        .collect();
37
38    if removed.is_empty() && changed.is_empty() {
39        return Vec::new();
40    }
41
42    // Try to detect renames: a delete + create with the same content hash
43    // within the same batch is likely a rename.
44    let mut events = Vec::new();
45    let mut matched_renames: HashMap<usize, usize> = HashMap::new(); // removed_idx → changed_idx
46
47    if !removed.is_empty() && !changed.is_empty() {
48        // Hash deleted files from DB would be ideal, but we don't have that here.
49        // Instead, hash the newly created files and see if any match recently deleted files.
50        // Since deleted files no longer exist, we can only do this if we have
51        // the new file's hash and compare with known sizes/names.
52        //
53        // Simple heuristic: same filename (basename) in different directory
54        // within the same batch → likely rename.
55        let removed_basenames: Vec<(&PathBuf, Option<&str>)> = removed
56            .iter()
57            .map(|p| (*p, p.file_name().and_then(|n| n.to_str())))
58            .collect();
59
60        for (ci, cp) in changed.iter().enumerate() {
61            let Some(changed_name) = cp.file_name().and_then(|n| n.to_str()) else {
62                continue;
63            };
64            for (ri, (rp, rname)) in removed_basenames.iter().enumerate() {
65                if matched_renames.contains_key(&ri) {
66                    continue;
67                }
68                if *rname == Some(changed_name) && rp != cp {
69                    matched_renames.insert(ri, ci);
70                    break;
71                }
72            }
73        }
74    }
75
76    // Emit rename events for matched pairs
77    let matched_changed: std::collections::HashSet<usize> =
78        matched_renames.values().copied().collect();
79    let matched_removed: std::collections::HashSet<usize> =
80        matched_renames.keys().copied().collect();
81
82    for (ri, ci) in &matched_renames {
83        events.push(FileEvent::Renamed {
84            from: removed[*ri].clone(),
85            to: changed[*ci].clone(),
86        });
87    }
88
89    // Emit delete events for unmatched removals
90    for (ri, rp) in removed.iter().enumerate() {
91        if !matched_removed.contains(&ri) {
92            events.push(FileEvent::Deleted((*rp).clone()));
93        }
94    }
95
96    // Emit created/modified events for unmatched changes
97    for (ci, cp) in changed.iter().enumerate() {
98        if !matched_changed.contains(&ci) {
99            // We can't distinguish create vs modify from watcher events alone,
100            // so treat all as Modified (the index pipeline handles both the same way).
101            events.push(FileEvent::Modified((*cp).clone()));
102        }
103    }
104
105    events
106}
107
108/// Convenience: extract paths by event type for the index pipeline.
109pub fn partition_events(
110    events: &[FileEvent],
111) -> (Vec<PathBuf>, Vec<PathBuf>, Vec<(PathBuf, PathBuf)>) {
112    let mut changed = Vec::new();
113    let mut removed = Vec::new();
114    let mut renamed = Vec::new();
115
116    for event in events {
117        match event {
118            FileEvent::Created(p) | FileEvent::Modified(p) => changed.push(p.clone()),
119            FileEvent::Deleted(p) => removed.push(p.clone()),
120            FileEvent::Renamed { from, to } => {
121                renamed.push((from.clone(), to.clone()));
122                // Also index the new path
123                changed.push(to.clone());
124                // And remove the old path
125                removed.push(from.clone());
126            }
127        }
128    }
129
130    (changed, removed, renamed)
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136    use std::path::PathBuf;
137
138    #[test]
139    fn empty_events() {
140        let events = normalize_events(&[], &[]);
141        assert!(events.is_empty());
142    }
143
144    #[test]
145    fn simple_modified() {
146        let changed = vec![PathBuf::from("/project/src/main.py")];
147        let events = normalize_events(&changed, &[]);
148        assert_eq!(events.len(), 1);
149        assert!(
150            matches!(&events[0], FileEvent::Modified(p) if p.to_str().unwrap().contains("main.py"))
151        );
152    }
153
154    #[test]
155    fn simple_deleted() {
156        let removed = vec![PathBuf::from("/project/src/old.py")];
157        let events = normalize_events(&[], &removed);
158        assert_eq!(events.len(), 1);
159        assert!(matches!(&events[0], FileEvent::Deleted(_)));
160    }
161
162    #[test]
163    fn rename_detection_same_basename() {
164        let removed = vec![PathBuf::from("/project/src/service.py")];
165        let changed = vec![PathBuf::from("/project/lib/service.py")];
166        let events = normalize_events(&changed, &removed);
167        assert_eq!(events.len(), 1);
168        assert!(matches!(&events[0], FileEvent::Renamed { from, to }
169            if from.to_str().unwrap().contains("src/service.py")
170            && to.to_str().unwrap().contains("lib/service.py")));
171    }
172
173    #[test]
174    fn partition_handles_renames() {
175        let events = vec![
176            FileEvent::Modified(PathBuf::from("a.py")),
177            FileEvent::Renamed {
178                from: PathBuf::from("old.py"),
179                to: PathBuf::from("new.py"),
180            },
181            FileEvent::Deleted(PathBuf::from("gone.py")),
182        ];
183        let (changed, removed, renamed) = partition_events(&events);
184        assert_eq!(changed.len(), 2); // a.py + new.py
185        assert_eq!(removed.len(), 2); // old.py + gone.py
186        assert_eq!(renamed.len(), 1);
187    }
188}