dirty_tracker/
lib.rs

1//! Opportunistic dirty file tracker
2//!
3//! This library provides a simple way to track dirty files in a directory.
4//! It uses the `notify` crate to watch for file system events and keep track
5//! of the files that have been modified.
6//!
7//! If the underlying file system does not support watching for file system events, or if there are
8//! too many files to watch, the tracker will simply give up and return `State::Unknown`.
9//!
10//! # Example
11//! ```rust
12//! use dirty_tracker::{State, DirtyTracker};
13//!
14//! let td = tempfile::tempdir().unwrap();
15//!
16//! let mut tracker = DirtyTracker::new(td.path()).unwrap();
17//! assert_eq!(tracker.state(), State::Clean);
18//! assert!(tracker.paths().unwrap().is_empty());
19//!
20//! // Modify a file in the directory.
21//! std::fs::write(td.path().join("file"), b"hello").unwrap();
22//!
23//! assert_eq!(tracker.state(), State::Dirty);
24//! assert_eq!(tracker.paths(), Some(&maplit::hashset![td.path().join("file")]));
25//! ```
26
27use notify::{Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
28use std::collections::HashSet;
29use std::path::{Path, PathBuf};
30use std::sync::mpsc::{channel, Receiver, RecvError, RecvTimeoutError};
31
32/// The tracker object.
33///
34/// This object keeps track of the dirty files in a directory.
35///
36/// The tracker is created with a path to a directory. It will watch for file
37/// system events in that directory and keep track of the files that have been
38/// modified.
39///
40/// The tracker can be in one of three states:
41/// - Clean: No files have been modified.
42/// - Dirty: Some files have been modified.
43/// - Unknown: The tracker is in an unknown state. This can happen if the
44///  tracker has missed some events, or if the underlying file system is
45///  behaving in an unexpected way.
46pub struct DirtyTracker {
47    path: PathBuf,
48    rx: Receiver<notify::Result<Event>>,
49    paths: HashSet<PathBuf>,
50    created: HashSet<PathBuf>,
51    need_rescan: bool,
52    #[allow(dead_code)]
53    watcher: RecommendedWatcher,
54}
55
56#[derive(Debug, PartialEq, Eq)]
57pub enum State {
58    Clean,
59    Dirty,
60    Unknown,
61}
62
63#[derive(Debug)]
64pub enum ProcessError {
65    Timeout(std::time::Duration),
66    Disconnected,
67}
68
69impl std::fmt::Display for ProcessError {
70    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
71        match self {
72            ProcessError::Timeout(timeout) => write!(f, "Timeout: {:?}", timeout),
73            ProcessError::Disconnected => write!(f, "Disconnected"),
74        }
75    }
76}
77
78impl std::error::Error for ProcessError {}
79
80impl DirtyTracker {
81    /// Create a new tracker object.
82    ///
83    /// # Arguments
84    /// * `path` - The path to the directory to watch.
85    ///
86    /// # Returns
87    /// A new `DirtyTracker` object.
88    pub fn new(path: &Path) -> notify::Result<Self> {
89        // Create a channel to receive the events.
90        let (tx, rx) = channel();
91
92        let config = notify::Config::default();
93
94        // Create a watcher object.
95        let mut watcher: RecommendedWatcher = notify::RecommendedWatcher::new(tx, config)?;
96
97        // TODO: Refuse to work with watchers that are low-performance.
98
99        // Add a path to be watched. All files and directories at that path and below will be monitored for changes.
100        watcher.watch(path, RecursiveMode::Recursive)?;
101
102        Ok(DirtyTracker {
103            path: path.to_path_buf(),
104            rx,
105            paths: HashSet::new(),
106            created: HashSet::new(),
107            need_rescan: false,
108            watcher,
109        })
110    }
111
112    /// Mark all files as clean.
113    ///
114    /// Note that this can race with file modifications, so it's only safe
115    /// if you're sure that no modifications are happening.
116    pub fn mark_clean(&mut self) {
117        let _ = self.process_pending(None);
118        self.need_rescan = false;
119        self.paths.clear();
120        self.created.clear();
121    }
122
123    /// Returns true if there are dirty files.
124    #[deprecated(since = "0.2.0", note = "Use state() instead")]
125    pub fn is_dirty(&mut self) -> bool {
126        self.state() == State::Dirty
127    }
128
129    /// Returns the state of the tracker.
130    pub fn state(&mut self) -> State {
131        if self.process_pending(None).is_err() {
132            return State::Unknown;
133        }
134        if self.need_rescan {
135            State::Unknown
136        } else if self.paths.is_empty() {
137            State::Clean
138        } else {
139            State::Dirty
140        }
141    }
142
143    /// Returns the paths of the dirty files.
144    ///
145    /// If the tracker is in an unknown state, this will return None.
146    pub fn paths(&mut self) -> Option<&HashSet<PathBuf>> {
147        if self.process_pending(None).is_err() {
148            return None;
149        }
150        if self.need_rescan {
151            None
152        } else {
153            Some(&self.paths)
154        }
155    }
156
157    /// Returns the relative paths of the dirty files.
158    ///
159    /// If the tracker is in an unknown state, this will return None.
160    pub fn relpaths(&mut self) -> Option<HashSet<&Path>> {
161        let path = self.path.clone();
162        self.paths().as_mut().map(|paths| {
163            paths
164                .iter()
165                .map(|p| p.strip_prefix(&path).unwrap())
166                .collect()
167        })
168    }
169
170    fn process_pending_event(&mut self, event: Event) {
171        if event.need_rescan() {
172            self.need_rescan = true;
173        }
174        match event {
175            Event {
176                kind: EventKind::Create(_),
177                paths,
178                ..
179            } => {
180                for path in paths {
181                    self.created.insert(path.clone());
182                    self.paths.insert(path);
183                }
184            }
185            Event {
186                kind: EventKind::Modify(_),
187                paths,
188                ..
189            } => {
190                for path in paths {
191                    self.paths.insert(path);
192                }
193            }
194            Event {
195                kind: EventKind::Remove(_),
196                paths,
197                ..
198            } => {
199                for path in paths {
200                    if self.created.contains(&path) {
201                        self.paths.remove(&path);
202                        self.created.remove(&path);
203                    } else {
204                        self.paths.insert(path.clone());
205                    }
206                }
207            }
208            _ => {}
209        }
210    }
211
212    fn process_pending(
213        &mut self,
214        timeout: Option<std::time::Duration>,
215    ) -> Result<(), ProcessError> {
216        // Make a sentinel change to ensure that we process all pending events.
217
218        // We do this by creating a dummy file and then deleting it
219        // immediately.
220        //
221        // This is a bit of a hack, but it's the simplest way to ensure
222        // that we process all pending events.
223        //
224        // We can't just wait for a timeout, because we might miss events - and it would be
225        // difficult to determine the correct timeout value. Performance is one of the main
226        // reasons for using this library, so we don't want to wait for a long time.
227        let mut dummy = tempfile::NamedTempFile::new_in(&self.path).unwrap();
228        use std::io::Write;
229        dummy.write_all(b"dummy").unwrap();
230        let dummy_path = dummy.path().to_path_buf();
231        std::mem::drop(dummy);
232
233        let is_sentinel_delete_event = |event: &notify::Event| {
234            matches!(
235                event.kind,
236                EventKind::Remove(_) if event.paths.iter().any(|p| p == &dummy_path)
237            )
238        };
239
240        // Process all pending events.
241        loop {
242            if let Some(timeout) = timeout {
243                match self.rx.recv_timeout(timeout) {
244                    Ok(Ok(event)) => {
245                        if is_sentinel_delete_event(&event) {
246                            self.process_pending_event(event);
247                            break;
248                        } else {
249                            self.process_pending_event(event)
250                        }
251                    }
252                    Ok(Err(e)) => {
253                        panic!("Error receiving event: {:?}", e);
254                    }
255                    Err(RecvTimeoutError::Timeout) => {
256                        return Err(ProcessError::Timeout(timeout));
257                    }
258                    Err(RecvTimeoutError::Disconnected) => {
259                        return Err(ProcessError::Disconnected);
260                    }
261                }
262            } else {
263                match self.rx.recv() {
264                    Ok(Ok(event)) => {
265                        if is_sentinel_delete_event(&event) {
266                            self.process_pending_event(event);
267                            break;
268                        } else {
269                            self.process_pending_event(event)
270                        }
271                    }
272                    Ok(Err(e)) => {
273                        panic!("Error receiving event: {:?}", e);
274                    }
275                    Err(RecvError) => {
276                        return Err(ProcessError::Disconnected);
277                    }
278                }
279            }
280        }
281
282        Ok(())
283    }
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289    use std::fs::File;
290    use std::io::Write;
291    use tempfile::tempdir;
292
293    fn wait_for(
294        tracker: &mut DirtyTracker,
295        expected_paths: &HashSet<PathBuf>,
296        expected_state: State,
297    ) {
298        let state = tracker.state();
299        let paths = tracker.paths().unwrap().clone();
300        if state == State::Unknown {
301            panic!("Unexpected unknown state");
302        }
303        assert_eq!(state, expected_state);
304        assert_eq!(paths, *expected_paths);
305    }
306
307    #[test]
308    fn test_no_changes() {
309        let dir = tempdir().unwrap();
310        let mut tracker = DirtyTracker::new(dir.path()).unwrap();
311        assert_eq!(tracker.state(), State::Clean);
312        assert!(tracker.paths().unwrap().is_empty());
313
314        wait_for(&mut tracker, &maplit::hashset![], State::Clean);
315
316        assert_eq!(tracker.paths(), Some(&maplit::hashset![]));
317        assert_eq!(tracker.state(), State::Clean);
318    }
319
320    #[test]
321    fn test_simple_create() {
322        let dir = tempdir().unwrap();
323        let mut tracker = DirtyTracker::new(dir.path()).unwrap();
324        assert_eq!(tracker.state(), State::Clean);
325        assert!(tracker.paths().unwrap().is_empty());
326
327        let file = dir.path().join("file");
328        let mut f = File::create(&file).unwrap();
329        f.write_all(b"hello").unwrap();
330        f.sync_all().unwrap();
331        wait_for(&mut tracker, &maplit::hashset![file.clone()], State::Dirty);
332        assert_eq!(tracker.paths(), Some(&maplit::hashset![file.clone()]));
333        assert_eq!(
334            tracker.relpaths(),
335            Some(maplit::hashset![Path::new("file")])
336        );
337        assert_eq!(tracker.state(), State::Dirty);
338    }
339
340    #[test]
341    fn test_simple_modify() {
342        let dir = tempdir().unwrap();
343
344        let file = dir.path().join("file");
345        std::fs::write(&file, b"hello").unwrap();
346
347        let mut tracker = DirtyTracker::new(dir.path()).unwrap();
348        assert_eq!(tracker.state(), State::Clean);
349        assert!(tracker.paths().unwrap().is_empty());
350
351        std::fs::write(&file, b"world").unwrap();
352
353        wait_for(&mut tracker, &maplit::hashset![file.clone()], State::Dirty);
354        assert_eq!(tracker.paths(), Some(&maplit::hashset![file.clone()]));
355        assert_eq!(
356            tracker.relpaths(),
357            Some(maplit::hashset![Path::new("file")])
358        );
359        assert_eq!(tracker.state(), State::Dirty);
360    }
361
362    #[test]
363    fn test_delete() {
364        let dir = tempdir().unwrap();
365
366        let file = dir.path().join("file");
367        std::fs::write(&file, b"hello").unwrap();
368
369        let mut tracker = DirtyTracker::new(dir.path()).unwrap();
370        assert_eq!(tracker.state(), State::Clean);
371        assert!(tracker.paths().unwrap().is_empty());
372
373        std::fs::remove_file(&file).unwrap();
374
375        wait_for(&mut tracker, &maplit::hashset![file.clone()], State::Dirty);
376        assert_eq!(tracker.paths(), Some(&maplit::hashset![file.clone()]));
377        assert_eq!(
378            tracker.relpaths(),
379            Some(maplit::hashset![Path::new("file")])
380        );
381        assert_eq!(tracker.state(), State::Dirty);
382    }
383
384    #[test]
385    fn test_rename() {
386        let dir = tempdir().unwrap();
387
388        let file = dir.path().join("file");
389        std::fs::write(&file, b"hello").unwrap();
390
391        let mut tracker = DirtyTracker::new(dir.path()).unwrap();
392        assert_eq!(tracker.state(), State::Clean);
393        assert!(tracker.paths().unwrap().is_empty());
394
395        let new_file = dir.path().join("new_file");
396        std::fs::rename(&file, &new_file).unwrap();
397
398        wait_for(
399            &mut tracker,
400            &maplit::hashset![new_file.clone(), file.clone()],
401            State::Dirty,
402        );
403
404        assert_eq!(
405            tracker.paths(),
406            Some(&maplit::hashset![file.clone(), new_file.clone()])
407        );
408        assert_eq!(tracker.state(), State::Dirty);
409    }
410
411    #[test]
412    fn test_mark_clean() {
413        let dir = tempdir().unwrap();
414
415        let file = dir.path().join("file");
416        std::fs::write(&file, b"hello").unwrap();
417
418        let mut tracker = DirtyTracker::new(dir.path()).unwrap();
419        assert_eq!(tracker.state(), State::Clean);
420        assert!(tracker.paths().unwrap().is_empty());
421
422        std::fs::write(&file, b"world").unwrap();
423
424        wait_for(&mut tracker, &maplit::hashset![file.clone()], State::Dirty);
425        assert_eq!(tracker.paths(), Some(&maplit::hashset![file.clone()]));
426        assert_eq!(tracker.state(), State::Dirty);
427
428        tracker.mark_clean();
429        assert_eq!(tracker.state(), State::Clean);
430        assert!(tracker.paths().unwrap().is_empty());
431    }
432
433    #[test]
434    fn test_add_and_remove() {
435        let dir = tempdir().unwrap();
436
437        let file = dir.path().join("file");
438        std::fs::write(file, b"hello").unwrap();
439
440        let mut tracker = DirtyTracker::new(dir.path()).unwrap();
441        assert_eq!(tracker.state(), State::Clean);
442        assert!(tracker.paths().unwrap().is_empty());
443
444        let file2 = dir.path().join("file2");
445        std::fs::write(&file2, b"world").unwrap();
446
447        wait_for(&mut tracker, &maplit::hashset![file2.clone()], State::Dirty);
448        assert_eq!(tracker.paths(), Some(&maplit::hashset![file2.clone()]));
449        assert_eq!(tracker.state(), State::Dirty);
450
451        std::fs::remove_file(&file2).unwrap();
452
453        wait_for(&mut tracker, &maplit::hashset![], State::Clean);
454        assert_eq!(tracker.paths(), Some(&maplit::hashset![]));
455        assert_eq!(tracker.state(), State::Clean);
456    }
457
458    #[test]
459    fn test_follow_subdir() {
460        let dir = tempdir().unwrap();
461
462        let subdir = dir.path().join("subdir");
463        std::fs::create_dir(&subdir).unwrap();
464
465        let file = subdir.join("file");
466        std::fs::write(&file, b"hello").unwrap();
467
468        let mut tracker = DirtyTracker::new(dir.path()).unwrap();
469        assert_eq!(tracker.state(), State::Clean);
470        assert!(tracker.paths().unwrap().is_empty());
471
472        std::fs::write(&file, b"world").unwrap();
473
474        wait_for(&mut tracker, &maplit::hashset![file.clone()], State::Dirty);
475        assert_eq!(tracker.paths(), Some(&maplit::hashset![file.clone()]));
476        assert_eq!(tracker.state(), State::Dirty);
477    }
478
479    #[test]
480    fn test_create_and_follow_subdir() {
481        let dir = tempdir().unwrap();
482
483        let mut tracker = DirtyTracker::new(dir.path()).unwrap();
484        assert_eq!(tracker.state(), State::Clean);
485        assert!(tracker.paths().unwrap().is_empty());
486
487        let subdir = dir.path().join("subdir");
488        std::fs::create_dir(&subdir).unwrap();
489
490        wait_for(
491            &mut tracker,
492            &maplit::hashset![subdir.clone()],
493            State::Dirty,
494        );
495        assert_eq!(tracker.paths(), Some(&maplit::hashset![subdir.clone()]));
496
497        let file = subdir.join("file");
498        std::fs::write(&file, b"hello").unwrap();
499
500        wait_for(
501            &mut tracker,
502            &maplit::hashset![subdir.clone(), file.clone()],
503            State::Dirty,
504        );
505        assert_eq!(
506            tracker.paths(),
507            Some(&maplit::hashset![subdir.clone(), file.clone()])
508        );
509        assert_eq!(tracker.state(), State::Dirty);
510    }
511
512    #[test]
513    fn test_many_added() {
514        let dir = tempdir().unwrap();
515
516        let mut tracker = DirtyTracker::new(dir.path()).unwrap();
517        assert_eq!(tracker.state(), State::Clean);
518        assert!(tracker.paths().unwrap().is_empty());
519
520        let mut expected_paths = HashSet::new();
521
522        for i in 0..100 {
523            let file = dir.path().join(format!("file{}", i));
524            std::fs::write(&file, b"hello").unwrap();
525            expected_paths.insert(file.clone());
526        }
527
528        wait_for(&mut tracker, &expected_paths, State::Dirty);
529        assert_eq!(tracker.paths(), Some(&expected_paths));
530        assert_eq!(tracker.state(), State::Dirty);
531    }
532}