Skip to main content

ralph_workflow/files/protection/
monitoring.rs

1//! Real-time file system monitoring for PROMPT.md protection.
2//!
3//! This module provides proactive monitoring to detect deletion attempts
4//! on PROMPT.md immediately, rather than waiting for periodic checks.
5//! It uses the `notify` crate for cross-platform file system events.
6//!
7//! # Effect System Exception
8//!
9//! This module uses `std::fs` directly rather than the `Workspace` trait.
10//! This is a documented exception to the effect system architecture because:
11//!
12//! 1. **Real-time filesystem monitoring**: The `notify` crate requires watching
13//!    the actual filesystem for events (inotify, FSEvents, ReadDirectoryChangesW).
14//! 2. **Background thread operation**: The monitor runs in a separate thread
15//!    that cannot share `PhaseContext` or workspace references.
16//! 3. **OS-level event handling**: File system events are inherently tied to
17//!    the real filesystem, not an abstraction layer.
18//!
19//! This exception is documented in `docs/architecture/effect-system.md`.
20//!
21//! # Design
22//!
23//! The monitor runs in a background thread and watches for deletion events
24//! on PROMPT.md. When a deletion is detected, it's automatically restored
25//! from backup. The main thread can poll the monitor to check if any
26//! restoration events occurred.
27//!
28//! # Platform Support
29//!
30//! - **Unix/Linux**: inotify via `notify` crate
31//! - **macOS**: `FSEvents` via `notify` crate
32//! - **Windows**: `ReadDirectoryChangesW` via `notify` crate
33
34use std::fs;
35use std::path::Path;
36use std::sync::atomic::{AtomicBool, Ordering};
37use std::sync::Arc;
38use std::thread;
39use std::time::Duration;
40
41/// File system monitor for detecting PROMPT.md deletion events.
42///
43/// The monitor watches for deletion events and automatically restores
44/// PROMPT.md from backup when detected. Monitoring happens in a background
45/// thread, so the main thread is not blocked.
46///
47/// # Example
48///
49/// ```no_run
50/// # use ralph_workflow::files::protection::monitoring::PromptMonitor;
51/// let mut monitor = PromptMonitor::new().unwrap();
52/// monitor.start().unwrap();
53///
54/// // ... run pipeline phases ...
55///
56/// // Check if any restoration occurred
57/// if monitor.check_and_restore() {
58///     println!("PROMPT.md was restored!");
59/// }
60///
61/// monitor.stop();
62/// # Ok::<(), std::io::Error>(())
63/// ```
64pub struct PromptMonitor {
65    /// Flag indicating if PROMPT.md was deleted and restored
66    restoration_detected: Arc<AtomicBool>,
67    /// Flag to signal the monitor thread to stop
68    stop_signal: Arc<AtomicBool>,
69    /// Handle to the monitor thread (None if not started)
70    monitor_thread: Option<thread::JoinHandle<()>>,
71}
72
73impl PromptMonitor {
74    /// Create a new file system monitor for PROMPT.md.
75    ///
76    /// Returns an error if the current directory cannot be accessed or
77    /// if PROMPT.md doesn't exist (we need to know what to watch for).
78    pub fn new() -> std::io::Result<Self> {
79        // Verify we're in a valid directory with PROMPT.md
80        let prompt_path = Path::new("PROMPT.md");
81        if !prompt_path.exists() {
82            return Err(std::io::Error::new(
83                std::io::ErrorKind::NotFound,
84                "PROMPT.md does not exist - cannot monitor",
85            ));
86        }
87
88        Ok(Self {
89            restoration_detected: Arc::new(AtomicBool::new(false)),
90            stop_signal: Arc::new(AtomicBool::new(false)),
91            monitor_thread: None,
92        })
93    }
94
95    /// Start monitoring PROMPT.md for deletion events.
96    ///
97    /// This spawns a background thread that watches for file system events.
98    /// Returns immediately; monitoring happens asynchronously.
99    ///
100    /// The monitor will automatically restore PROMPT.md from backup if
101    /// deletion is detected.
102    pub fn start(&mut self) -> std::io::Result<()> {
103        if self.monitor_thread.is_some() {
104            return Err(std::io::Error::new(
105                std::io::ErrorKind::AlreadyExists,
106                "Monitor is already running",
107            ));
108        }
109
110        let restoration_flag = Arc::clone(&self.restoration_detected);
111        let stop_signal = Arc::clone(&self.stop_signal);
112
113        let handle = thread::spawn(move || {
114            Self::monitor_thread_main(&restoration_flag, &stop_signal);
115        });
116
117        self.monitor_thread = Some(handle);
118        Ok(())
119    }
120
121    /// Background thread entry point for file system monitoring.
122    ///
123    /// This thread watches the current directory for deletion events on
124    /// PROMPT.md and restores from backup when detected.
125    fn monitor_thread_main(restoration_detected: &Arc<AtomicBool>, stop_signal: &Arc<AtomicBool>) {
126        use notify::Watcher;
127
128        // Create a channel to receive file system events
129        let (tx, rx) = std::sync::mpsc::channel();
130
131        // Create a watcher for the current directory
132        let mut watcher = match notify::recommended_watcher(tx) {
133            Ok(w) => w,
134            Err(e) => {
135                eprintln!("Warning: Failed to create file system watcher: {e}");
136                eprintln!("Falling back to periodic polling for PROMPT.md protection");
137                // Fallback to polling if watcher creation fails
138                Self::polling_monitor(restoration_detected, stop_signal);
139                return;
140            }
141        };
142
143        // Watch the current directory for events
144        if let Err(e) = watcher.watch(Path::new("."), notify::RecursiveMode::NonRecursive) {
145            eprintln!("Warning: Failed to watch current directory: {e}");
146            eprintln!("Falling back to periodic polling for PROMPT.md protection");
147            Self::polling_monitor(restoration_detected, stop_signal);
148            return;
149        }
150
151        // Process events until stop signal is received
152        let mut prompt_existed_last_check = true;
153
154        while !stop_signal.load(Ordering::Relaxed) {
155            // Check for events with a short timeout
156            match rx.recv_timeout(Duration::from_millis(100)) {
157                Ok(Ok(event)) => {
158                    Self::handle_fs_event(
159                        &event,
160                        restoration_detected,
161                        &mut prompt_existed_last_check,
162                    );
163                }
164                Ok(Err(_)) | Err(std::sync::mpsc::RecvTimeoutError::Timeout) => {
165                    // Error in watcher or timeout - continue anyway
166                }
167                Err(_) => {
168                    // Channel disconnected - stop monitoring
169                    break;
170                }
171            }
172        }
173    }
174
175    /// Handle a file system event from the watcher.
176    fn handle_fs_event(
177        event: &notify::Event,
178        restoration_detected: &Arc<AtomicBool>,
179        _prompt_existed_last_check: &mut bool,
180    ) {
181        for path in &event.paths {
182            if path.as_os_str() == "PROMPT.md" {
183                // Check for remove event
184                if matches!(event.kind, notify::EventKind::Remove(_)) {
185                    // PROMPT.md was removed - restore it
186                    if Self::restore_from_backup() {
187                        restoration_detected.store(true, Ordering::Release);
188                    }
189                }
190            }
191        }
192    }
193
194    /// Fallback polling-based monitor when file system watcher fails.
195    ///
196    /// Some filesystems (NFS, network drives) don't support file system
197    /// events. This fallback polls every 100ms to check if PROMPT.md exists.
198    fn polling_monitor(restoration_detected: &Arc<AtomicBool>, stop_signal: &Arc<AtomicBool>) {
199        let mut prompt_existed = Path::new("PROMPT.md").exists();
200
201        while !stop_signal.load(Ordering::Relaxed) {
202            thread::sleep(Duration::from_millis(100));
203
204            let prompt_exists_now = Path::new("PROMPT.md").exists();
205
206            // Detect deletion (transition from exists to not exists)
207            if prompt_existed && !prompt_exists_now && Self::restore_from_backup() {
208                restoration_detected.store(true, Ordering::Release);
209            }
210
211            prompt_existed = prompt_exists_now;
212        }
213    }
214
215    /// Restore PROMPT.md from backup.
216    ///
217    /// Tries backups in order:
218    /// - .agent/PROMPT.md.backup
219    /// - .agent/PROMPT.md.backup.1
220    /// - .agent/PROMPT.md.backup.2
221    ///
222    /// Returns true if restoration succeeded, false otherwise.
223    ///
224    /// Uses atomic open to avoid TOCTOU race conditions - opens and reads
225    /// the file in one operation rather than checking existence separately.
226    fn restore_from_backup() -> bool {
227        let backup_paths = [
228            Path::new(".agent/PROMPT.md.backup"),
229            Path::new(".agent/PROMPT.md.backup.1"),
230            Path::new(".agent/PROMPT.md.backup.2"),
231        ];
232
233        for backup_path in &backup_paths {
234            // Use std::fs::File::open to atomically open the file, avoiding TOCTOU
235            // race conditions where the file could be replaced between exists() check
236            // and read operation
237            let backup_content = match std::fs::File::open(backup_path) {
238                Ok(mut file) => {
239                    // Verify it's a regular file, not a symlink or special file
240                    match file.metadata() {
241                        Ok(metadata) if metadata.is_file() => {
242                            // Read the content
243                            let mut buffer = String::new();
244                            match std::io::Read::read_to_string(&mut file, &mut buffer) {
245                                Ok(_) => buffer,
246                                Err(_) => continue,
247                            }
248                        }
249                        _ => continue, // Not a regular file, skip
250                    }
251                }
252                Err(_) => continue, // File doesn't exist or can't be opened
253            };
254
255            if backup_content.trim().is_empty() {
256                continue;
257            }
258
259            // Restore from backup - ensure parent directory exists
260            let prompt_path = Path::new("PROMPT.md");
261            if let Some(parent) = prompt_path.parent() {
262                if let Err(e) = fs::create_dir_all(parent) {
263                    eprintln!("Failed to create parent directory for PROMPT.md: {e}");
264                    continue;
265                }
266            }
267
268            if fs::write(prompt_path, backup_content).is_err() {
269                eprintln!("Failed to write PROMPT.md from backup");
270                continue;
271            }
272
273            // Set read-only permissions
274            #[cfg(unix)]
275            {
276                use std::os::unix::fs::PermissionsExt;
277                if let Ok(metadata) = fs::metadata(prompt_path) {
278                    let mut perms = metadata.permissions();
279                    perms.set_mode(0o444);
280                    let _ = fs::set_permissions(prompt_path, perms);
281                }
282            }
283
284            #[cfg(windows)]
285            {
286                if let Ok(metadata) = fs::metadata(prompt_path) {
287                    let mut perms = metadata.permissions();
288                    perms.set_readonly(true);
289                    let _ = fs::set_permissions(prompt_path, perms);
290                }
291            }
292
293            return true;
294        }
295
296        false
297    }
298
299    /// Check if any restoration events were detected and reset the flag.
300    ///
301    /// Returns true if PROMPT.md was deleted and restored since the last
302    /// check. This is a one-time check - the flag is reset after reading.
303    ///
304    /// # Example
305    ///
306    /// ```no_run
307    /// # use ralph_workflow::files::protection::monitoring::PromptMonitor;
308    /// # let mut monitor = PromptMonitor::new().unwrap();
309    /// # monitor.start().unwrap();
310    /// // After running some agent code
311    /// if monitor.check_and_restore() {
312    ///     println!("PROMPT.md was restored during this phase!");
313    /// }
314    /// ```
315    pub fn check_and_restore(&self) -> bool {
316        self.restoration_detected.load(Ordering::Acquire)
317    }
318
319    /// Stop monitoring and cleanup resources.
320    ///
321    /// Signals the monitor thread to stop and waits for it to complete.
322    pub fn stop(mut self) {
323        // Signal the thread to stop
324        self.stop_signal.store(true, Ordering::Release);
325
326        // Wait for the thread to finish and check for panics
327        if let Some(handle) = self.monitor_thread.take() {
328            if let Err(panic_payload) = handle.join() {
329                // Thread panicked - extract and log panic message for diagnostics
330                // Try common panic payload types
331                let panic_msg = panic_payload
332                    .downcast_ref::<String>()
333                    .cloned()
334                    .or_else(|| {
335                        panic_payload
336                            .downcast_ref::<&str>()
337                            .map(ToString::to_string)
338                    })
339                    .or_else(|| {
340                        panic_payload
341                            .downcast_ref::<&String>()
342                            .map(|s| (*s).clone())
343                    })
344                    .unwrap_or_else(|| {
345                        // Fallback: Try to get any available information
346                        format!(
347                            "<unknown panic type: {}>",
348                            std::any::type_name_of_val(&panic_payload)
349                        )
350                    });
351                eprintln!("Warning: File monitoring thread panicked: {panic_msg}");
352            }
353        }
354    }
355}
356
357impl Drop for PromptMonitor {
358    fn drop(&mut self) {
359        // Signal the thread to stop when dropped
360        self.stop_signal.store(true, Ordering::Release);
361
362        // Take the handle and let it finish on its own
363        // (we can't wait in Drop because we might be panicking)
364        let _ = self.monitor_thread.take();
365    }
366}
367
368#[cfg(test)]
369mod tests {
370    // Note: Tests that change directories are problematic in test suites.
371    // The monitoring functionality will be tested through integration tests
372    // when the monitor is integrated into the pipeline.
373}