ralph_workflow/files/protection/monitoring.rs
1//! Real-time file system monitoring for PROMPT.md protection.
2//!
3//! This module provides proactive monitoring to detect deletion attempts
4//! on PROMPT.md immediately, rather than waiting for periodic checks.
5//! It uses the `notify` crate for cross-platform file system events.
6//!
7//! # Effect System Exception
8//!
9//! This module uses `std::fs` directly rather than the `Workspace` trait.
10//! This is a documented exception to the effect system architecture because:
11//!
12//! 1. **Real-time filesystem monitoring**: The `notify` crate requires watching
13//! the actual filesystem for events (inotify, FSEvents, ReadDirectoryChangesW).
14//! 2. **Background thread operation**: The monitor runs in a separate thread
15//! that cannot share `PhaseContext` or workspace references.
16//! 3. **OS-level event handling**: File system events are inherently tied to
17//! the real filesystem, not an abstraction layer.
18//!
19//! This exception is documented in `docs/architecture/effect-system.md`.
20//!
21//! # Design
22//!
23//! The monitor runs in a background thread and watches for deletion events
24//! on PROMPT.md. When a deletion is detected, it's automatically restored
25//! from backup. The main thread can poll the monitor to check if any
26//! restoration events occurred.
27//!
28//! # Platform Support
29//!
30//! - **Unix/Linux**: inotify via `notify` crate
31//! - **macOS**: `FSEvents` via `notify` crate
32//! - **Windows**: `ReadDirectoryChangesW` via `notify` crate
33
34use std::fs;
35use std::path::Path;
36use std::sync::atomic::{AtomicBool, Ordering};
37use std::sync::Arc;
38use std::thread;
39use std::time::Duration;
40
41/// File system monitor for detecting PROMPT.md deletion events.
42///
43/// The monitor watches for deletion events and automatically restores
44/// PROMPT.md from backup when detected. Monitoring happens in a background
45/// thread, so the main thread is not blocked.
46///
47/// # Example
48///
49/// ```no_run
50/// # use ralph_workflow::files::protection::monitoring::PromptMonitor;
51/// let mut monitor = PromptMonitor::new().unwrap();
52/// monitor.start().unwrap();
53///
54/// // ... run pipeline phases ...
55///
56/// // Check if any restoration occurred
57/// if monitor.check_and_restore() {
58/// println!("PROMPT.md was restored!");
59/// }
60///
61/// monitor.stop();
62/// # Ok::<(), std::io::Error>(())
63/// ```
64pub struct PromptMonitor {
65 /// Flag indicating if PROMPT.md was deleted and restored
66 restoration_detected: Arc<AtomicBool>,
67 /// Flag to signal the monitor thread to stop
68 stop_signal: Arc<AtomicBool>,
69 /// Handle to the monitor thread (None if not started)
70 monitor_thread: Option<thread::JoinHandle<()>>,
71}
72
73impl PromptMonitor {
74 /// Create a new file system monitor for PROMPT.md.
75 ///
76 /// Returns an error if the current directory cannot be accessed or
77 /// if PROMPT.md doesn't exist (we need to know what to watch for).
78 pub fn new() -> std::io::Result<Self> {
79 // Verify we're in a valid directory with PROMPT.md
80 let prompt_path = Path::new("PROMPT.md");
81 if !prompt_path.exists() {
82 return Err(std::io::Error::new(
83 std::io::ErrorKind::NotFound,
84 "PROMPT.md does not exist - cannot monitor",
85 ));
86 }
87
88 Ok(Self {
89 restoration_detected: Arc::new(AtomicBool::new(false)),
90 stop_signal: Arc::new(AtomicBool::new(false)),
91 monitor_thread: None,
92 })
93 }
94
95 /// Start monitoring PROMPT.md for deletion events.
96 ///
97 /// This spawns a background thread that watches for file system events.
98 /// Returns immediately; monitoring happens asynchronously.
99 ///
100 /// The monitor will automatically restore PROMPT.md from backup if
101 /// deletion is detected.
102 pub fn start(&mut self) -> std::io::Result<()> {
103 if self.monitor_thread.is_some() {
104 return Err(std::io::Error::new(
105 std::io::ErrorKind::AlreadyExists,
106 "Monitor is already running",
107 ));
108 }
109
110 let restoration_flag = Arc::clone(&self.restoration_detected);
111 let stop_signal = Arc::clone(&self.stop_signal);
112
113 let handle = thread::spawn(move || {
114 Self::monitor_thread_main(&restoration_flag, &stop_signal);
115 });
116
117 self.monitor_thread = Some(handle);
118 Ok(())
119 }
120
121 /// Background thread entry point for file system monitoring.
122 ///
123 /// This thread watches the current directory for deletion events on
124 /// PROMPT.md and restores from backup when detected.
125 fn monitor_thread_main(restoration_detected: &Arc<AtomicBool>, stop_signal: &Arc<AtomicBool>) {
126 use notify::Watcher;
127
128 // Create a channel to receive file system events
129 let (tx, rx) = std::sync::mpsc::channel();
130
131 // Create a watcher for the current directory
132 let mut watcher = match notify::recommended_watcher(tx) {
133 Ok(w) => w,
134 Err(e) => {
135 eprintln!("Warning: Failed to create file system watcher: {e}");
136 eprintln!("Falling back to periodic polling for PROMPT.md protection");
137 // Fallback to polling if watcher creation fails
138 Self::polling_monitor(restoration_detected, stop_signal);
139 return;
140 }
141 };
142
143 // Watch the current directory for events
144 if let Err(e) = watcher.watch(Path::new("."), notify::RecursiveMode::NonRecursive) {
145 eprintln!("Warning: Failed to watch current directory: {e}");
146 eprintln!("Falling back to periodic polling for PROMPT.md protection");
147 Self::polling_monitor(restoration_detected, stop_signal);
148 return;
149 }
150
151 // Process events until stop signal is received
152 let mut prompt_existed_last_check = true;
153
154 while !stop_signal.load(Ordering::Relaxed) {
155 // Check for events with a short timeout
156 match rx.recv_timeout(Duration::from_millis(100)) {
157 Ok(Ok(event)) => {
158 Self::handle_fs_event(
159 &event,
160 restoration_detected,
161 &mut prompt_existed_last_check,
162 );
163 }
164 Ok(Err(_)) | Err(std::sync::mpsc::RecvTimeoutError::Timeout) => {
165 // Error in watcher or timeout - continue anyway
166 }
167 Err(_) => {
168 // Channel disconnected - stop monitoring
169 break;
170 }
171 }
172 }
173 }
174
175 /// Handle a file system event from the watcher.
176 fn handle_fs_event(
177 event: ¬ify::Event,
178 restoration_detected: &Arc<AtomicBool>,
179 _prompt_existed_last_check: &mut bool,
180 ) {
181 for path in &event.paths {
182 if path.as_os_str() == "PROMPT.md" {
183 // Check for remove event
184 if matches!(event.kind, notify::EventKind::Remove(_)) {
185 // PROMPT.md was removed - restore it
186 if Self::restore_from_backup() {
187 restoration_detected.store(true, Ordering::Release);
188 }
189 }
190 }
191 }
192 }
193
194 /// Fallback polling-based monitor when file system watcher fails.
195 ///
196 /// Some filesystems (NFS, network drives) don't support file system
197 /// events. This fallback polls every 100ms to check if PROMPT.md exists.
198 fn polling_monitor(restoration_detected: &Arc<AtomicBool>, stop_signal: &Arc<AtomicBool>) {
199 let mut prompt_existed = Path::new("PROMPT.md").exists();
200
201 while !stop_signal.load(Ordering::Relaxed) {
202 thread::sleep(Duration::from_millis(100));
203
204 let prompt_exists_now = Path::new("PROMPT.md").exists();
205
206 // Detect deletion (transition from exists to not exists)
207 if prompt_existed && !prompt_exists_now && Self::restore_from_backup() {
208 restoration_detected.store(true, Ordering::Release);
209 }
210
211 prompt_existed = prompt_exists_now;
212 }
213 }
214
215 /// Restore PROMPT.md from backup.
216 ///
217 /// Tries backups in order:
218 /// - .agent/PROMPT.md.backup
219 /// - .agent/PROMPT.md.backup.1
220 /// - .agent/PROMPT.md.backup.2
221 ///
222 /// Returns true if restoration succeeded, false otherwise.
223 ///
224 /// Uses atomic open to avoid TOCTOU race conditions - opens and reads
225 /// the file in one operation rather than checking existence separately.
226 fn restore_from_backup() -> bool {
227 let backup_paths = [
228 Path::new(".agent/PROMPT.md.backup"),
229 Path::new(".agent/PROMPT.md.backup.1"),
230 Path::new(".agent/PROMPT.md.backup.2"),
231 ];
232
233 for backup_path in &backup_paths {
234 // Use std::fs::File::open to atomically open the file, avoiding TOCTOU
235 // race conditions where the file could be replaced between exists() check
236 // and read operation
237 let backup_content = match std::fs::File::open(backup_path) {
238 Ok(mut file) => {
239 // Verify it's a regular file, not a symlink or special file
240 match file.metadata() {
241 Ok(metadata) if metadata.is_file() => {
242 // Read the content
243 let mut buffer = String::new();
244 match std::io::Read::read_to_string(&mut file, &mut buffer) {
245 Ok(_) => buffer,
246 Err(_) => continue,
247 }
248 }
249 _ => continue, // Not a regular file, skip
250 }
251 }
252 Err(_) => continue, // File doesn't exist or can't be opened
253 };
254
255 if backup_content.trim().is_empty() {
256 continue;
257 }
258
259 // Restore from backup - ensure parent directory exists
260 let prompt_path = Path::new("PROMPT.md");
261 if let Some(parent) = prompt_path.parent() {
262 if let Err(e) = fs::create_dir_all(parent) {
263 eprintln!("Failed to create parent directory for PROMPT.md: {e}");
264 continue;
265 }
266 }
267
268 if fs::write(prompt_path, backup_content).is_err() {
269 eprintln!("Failed to write PROMPT.md from backup");
270 continue;
271 }
272
273 // Set read-only permissions
274 #[cfg(unix)]
275 {
276 use std::os::unix::fs::PermissionsExt;
277 if let Ok(metadata) = fs::metadata(prompt_path) {
278 let mut perms = metadata.permissions();
279 perms.set_mode(0o444);
280 let _ = fs::set_permissions(prompt_path, perms);
281 }
282 }
283
284 #[cfg(windows)]
285 {
286 if let Ok(metadata) = fs::metadata(prompt_path) {
287 let mut perms = metadata.permissions();
288 perms.set_readonly(true);
289 let _ = fs::set_permissions(prompt_path, perms);
290 }
291 }
292
293 return true;
294 }
295
296 false
297 }
298
299 /// Check if any restoration events were detected and reset the flag.
300 ///
301 /// Returns true if PROMPT.md was deleted and restored since the last
302 /// check. This is a one-time check - the flag is reset after reading.
303 ///
304 /// # Example
305 ///
306 /// ```no_run
307 /// # use ralph_workflow::files::protection::monitoring::PromptMonitor;
308 /// # let mut monitor = PromptMonitor::new().unwrap();
309 /// # monitor.start().unwrap();
310 /// // After running some agent code
311 /// if monitor.check_and_restore() {
312 /// println!("PROMPT.md was restored during this phase!");
313 /// }
314 /// ```
315 pub fn check_and_restore(&self) -> bool {
316 self.restoration_detected.load(Ordering::Acquire)
317 }
318
319 /// Stop monitoring and cleanup resources.
320 ///
321 /// Signals the monitor thread to stop and waits for it to complete.
322 pub fn stop(mut self) {
323 // Signal the thread to stop
324 self.stop_signal.store(true, Ordering::Release);
325
326 // Wait for the thread to finish and check for panics
327 if let Some(handle) = self.monitor_thread.take() {
328 if let Err(panic_payload) = handle.join() {
329 // Thread panicked - extract and log panic message for diagnostics
330 // Try common panic payload types
331 let panic_msg = panic_payload
332 .downcast_ref::<String>()
333 .cloned()
334 .or_else(|| {
335 panic_payload
336 .downcast_ref::<&str>()
337 .map(ToString::to_string)
338 })
339 .or_else(|| {
340 panic_payload
341 .downcast_ref::<&String>()
342 .map(|s| (*s).clone())
343 })
344 .unwrap_or_else(|| {
345 // Fallback: Try to get any available information
346 format!(
347 "<unknown panic type: {}>",
348 std::any::type_name_of_val(&panic_payload)
349 )
350 });
351 eprintln!("Warning: File monitoring thread panicked: {panic_msg}");
352 }
353 }
354 }
355}
356
357impl Drop for PromptMonitor {
358 fn drop(&mut self) {
359 // Signal the thread to stop when dropped
360 self.stop_signal.store(true, Ordering::Release);
361
362 // Take the handle and let it finish on its own
363 // (we can't wait in Drop because we might be panicking)
364 let _ = self.monitor_thread.take();
365 }
366}
367
368#[cfg(test)]
369mod tests {
370 // Note: Tests that change directories are problematic in test suites.
371 // The monitoring functionality will be tested through integration tests
372 // when the monitor is integrated into the pipeline.
373}