ralph_workflow/files/protection/monitoring.rs
1//! Real-time file system monitoring for PROMPT.md protection.
2//!
3//! This module provides proactive monitoring to detect deletion attempts
4//! on PROMPT.md immediately, rather than waiting for periodic checks.
5//! It uses the `notify` crate for cross-platform file system events.
6//!
7//! # Design
8//!
9//! The monitor runs in a background thread and watches for deletion events
10//! on PROMPT.md. When a deletion is detected, it's automatically restored
11//! from backup. The main thread can poll the monitor to check if any
12//! restoration events occurred.
13//!
14//! # Platform Support
15//!
16//! - **Unix/Linux**: inotify via `notify` crate
17//! - **macOS**: `FSEvents` via `notify` crate
18//! - **Windows**: `ReadDirectoryChangesW` via `notify` crate
19
20use std::fs;
21use std::path::Path;
22use std::sync::atomic::{AtomicBool, Ordering};
23use std::sync::Arc;
24use std::thread;
25use std::time::Duration;
26
27/// File system monitor for detecting PROMPT.md deletion events.
28///
29/// The monitor watches for deletion events and automatically restores
30/// PROMPT.md from backup when detected. Monitoring happens in a background
31/// thread, so the main thread is not blocked.
32///
33/// # Example
34///
35/// ```no_run
36/// # use ralph_workflow::files::protection::monitoring::PromptMonitor;
37/// let mut monitor = PromptMonitor::new().unwrap();
38/// monitor.start().unwrap();
39///
40/// // ... run pipeline phases ...
41///
42/// // Check if any restoration occurred
43/// if monitor.check_and_restore() {
44/// println!("PROMPT.md was restored!");
45/// }
46///
47/// monitor.stop();
48/// # Ok::<(), std::io::Error>(())
49/// ```
50pub struct PromptMonitor {
51 /// Flag indicating if PROMPT.md was deleted and restored
52 restoration_detected: Arc<AtomicBool>,
53 /// Flag to signal the monitor thread to stop
54 stop_signal: Arc<AtomicBool>,
55 /// Handle to the monitor thread (None if not started)
56 monitor_thread: Option<thread::JoinHandle<()>>,
57}
58
59impl PromptMonitor {
60 /// Create a new file system monitor for PROMPT.md.
61 ///
62 /// Returns an error if the current directory cannot be accessed or
63 /// if PROMPT.md doesn't exist (we need to know what to watch for).
64 pub fn new() -> std::io::Result<Self> {
65 // Verify we're in a valid directory with PROMPT.md
66 let prompt_path = Path::new("PROMPT.md");
67 if !prompt_path.exists() {
68 return Err(std::io::Error::new(
69 std::io::ErrorKind::NotFound,
70 "PROMPT.md does not exist - cannot monitor",
71 ));
72 }
73
74 Ok(Self {
75 restoration_detected: Arc::new(AtomicBool::new(false)),
76 stop_signal: Arc::new(AtomicBool::new(false)),
77 monitor_thread: None,
78 })
79 }
80
81 /// Start monitoring PROMPT.md for deletion events.
82 ///
83 /// This spawns a background thread that watches for file system events.
84 /// Returns immediately; monitoring happens asynchronously.
85 ///
86 /// The monitor will automatically restore PROMPT.md from backup if
87 /// deletion is detected.
88 pub fn start(&mut self) -> std::io::Result<()> {
89 if self.monitor_thread.is_some() {
90 return Err(std::io::Error::new(
91 std::io::ErrorKind::AlreadyExists,
92 "Monitor is already running",
93 ));
94 }
95
96 let restoration_flag = Arc::clone(&self.restoration_detected);
97 let stop_signal = Arc::clone(&self.stop_signal);
98
99 let handle = thread::spawn(move || {
100 Self::monitor_thread_main(&restoration_flag, &stop_signal);
101 });
102
103 self.monitor_thread = Some(handle);
104 Ok(())
105 }
106
107 /// Background thread entry point for file system monitoring.
108 ///
109 /// This thread watches the current directory for deletion events on
110 /// PROMPT.md and restores from backup when detected.
111 fn monitor_thread_main(restoration_detected: &Arc<AtomicBool>, stop_signal: &Arc<AtomicBool>) {
112 use notify::Watcher;
113
114 // Create a channel to receive file system events
115 let (tx, rx) = std::sync::mpsc::channel();
116
117 // Create a watcher for the current directory
118 let mut watcher = match notify::recommended_watcher(tx) {
119 Ok(w) => w,
120 Err(e) => {
121 eprintln!("Warning: Failed to create file system watcher: {e}");
122 eprintln!("Falling back to periodic polling for PROMPT.md protection");
123 // Fallback to polling if watcher creation fails
124 Self::polling_monitor(restoration_detected, stop_signal);
125 return;
126 }
127 };
128
129 // Watch the current directory for events
130 if let Err(e) = watcher.watch(Path::new("."), notify::RecursiveMode::NonRecursive) {
131 eprintln!("Warning: Failed to watch current directory: {e}");
132 eprintln!("Falling back to periodic polling for PROMPT.md protection");
133 Self::polling_monitor(restoration_detected, stop_signal);
134 return;
135 }
136
137 // Process events until stop signal is received
138 let mut prompt_existed_last_check = true;
139
140 while !stop_signal.load(Ordering::Relaxed) {
141 // Check for events with a short timeout
142 match rx.recv_timeout(Duration::from_millis(100)) {
143 Ok(Ok(event)) => {
144 Self::handle_fs_event(
145 &event,
146 restoration_detected,
147 &mut prompt_existed_last_check,
148 );
149 }
150 Ok(Err(_)) | Err(std::sync::mpsc::RecvTimeoutError::Timeout) => {
151 // Error in watcher or timeout - continue anyway
152 }
153 Err(_) => {
154 // Channel disconnected - stop monitoring
155 break;
156 }
157 }
158 }
159 }
160
161 /// Handle a file system event from the watcher.
162 fn handle_fs_event(
163 event: ¬ify::Event,
164 restoration_detected: &Arc<AtomicBool>,
165 _prompt_existed_last_check: &mut bool,
166 ) {
167 for path in &event.paths {
168 if path.as_os_str() == "PROMPT.md" {
169 // Check for remove event
170 if matches!(event.kind, notify::EventKind::Remove(_)) {
171 // PROMPT.md was removed - restore it
172 if Self::restore_from_backup() {
173 restoration_detected.store(true, Ordering::Release);
174 }
175 }
176 }
177 }
178 }
179
180 /// Fallback polling-based monitor when file system watcher fails.
181 ///
182 /// Some filesystems (NFS, network drives) don't support file system
183 /// events. This fallback polls every 100ms to check if PROMPT.md exists.
184 fn polling_monitor(restoration_detected: &Arc<AtomicBool>, stop_signal: &Arc<AtomicBool>) {
185 let mut prompt_existed = Path::new("PROMPT.md").exists();
186
187 while !stop_signal.load(Ordering::Relaxed) {
188 thread::sleep(Duration::from_millis(100));
189
190 let prompt_exists_now = Path::new("PROMPT.md").exists();
191
192 // Detect deletion (transition from exists to not exists)
193 if prompt_existed && !prompt_exists_now && Self::restore_from_backup() {
194 restoration_detected.store(true, Ordering::Release);
195 }
196
197 prompt_existed = prompt_exists_now;
198 }
199 }
200
201 /// Restore PROMPT.md from backup.
202 ///
203 /// Tries backups in order:
204 /// - .agent/PROMPT.md.backup
205 /// - .agent/PROMPT.md.backup.1
206 /// - .agent/PROMPT.md.backup.2
207 ///
208 /// Returns true if restoration succeeded, false otherwise.
209 ///
210 /// Uses atomic open to avoid TOCTOU race conditions - opens and reads
211 /// the file in one operation rather than checking existence separately.
212 fn restore_from_backup() -> bool {
213 let backup_paths = [
214 Path::new(".agent/PROMPT.md.backup"),
215 Path::new(".agent/PROMPT.md.backup.1"),
216 Path::new(".agent/PROMPT.md.backup.2"),
217 ];
218
219 for backup_path in &backup_paths {
220 // Use std::fs::File::open to atomically open the file, avoiding TOCTOU
221 // race conditions where the file could be replaced between exists() check
222 // and read operation
223 let backup_content = match std::fs::File::open(backup_path) {
224 Ok(mut file) => {
225 // Verify it's a regular file, not a symlink or special file
226 match file.metadata() {
227 Ok(metadata) if metadata.is_file() => {
228 // Read the content
229 let mut buffer = String::new();
230 match std::io::Read::read_to_string(&mut file, &mut buffer) {
231 Ok(_) => buffer,
232 Err(_) => continue,
233 }
234 }
235 _ => continue, // Not a regular file, skip
236 }
237 }
238 Err(_) => continue, // File doesn't exist or can't be opened
239 };
240
241 if backup_content.trim().is_empty() {
242 continue;
243 }
244
245 // Restore from backup - ensure parent directory exists
246 let prompt_path = Path::new("PROMPT.md");
247 if let Some(parent) = prompt_path.parent() {
248 if let Err(e) = fs::create_dir_all(parent) {
249 eprintln!("Failed to create parent directory for PROMPT.md: {e}");
250 continue;
251 }
252 }
253
254 if fs::write(prompt_path, backup_content).is_err() {
255 eprintln!("Failed to write PROMPT.md from backup");
256 continue;
257 }
258
259 // Set read-only permissions
260 #[cfg(unix)]
261 {
262 use std::os::unix::fs::PermissionsExt;
263 if let Ok(metadata) = fs::metadata(prompt_path) {
264 let mut perms = metadata.permissions();
265 perms.set_mode(0o444);
266 let _ = fs::set_permissions(prompt_path, perms);
267 }
268 }
269
270 #[cfg(windows)]
271 {
272 if let Ok(metadata) = fs::metadata(prompt_path) {
273 let mut perms = metadata.permissions();
274 perms.set_readonly(true);
275 let _ = fs::set_permissions(prompt_path, perms);
276 }
277 }
278
279 return true;
280 }
281
282 false
283 }
284
285 /// Check if any restoration events were detected and reset the flag.
286 ///
287 /// Returns true if PROMPT.md was deleted and restored since the last
288 /// check. This is a one-time check - the flag is reset after reading.
289 ///
290 /// # Example
291 ///
292 /// ```no_run
293 /// # use ralph_workflow::files::protection::monitoring::PromptMonitor;
294 /// # let mut monitor = PromptMonitor::new().unwrap();
295 /// # monitor.start().unwrap();
296 /// // After running some agent code
297 /// if monitor.check_and_restore() {
298 /// println!("PROMPT.md was restored during this phase!");
299 /// }
300 /// ```
301 pub fn check_and_restore(&self) -> bool {
302 self.restoration_detected.load(Ordering::Acquire)
303 }
304
305 /// Stop monitoring and cleanup resources.
306 ///
307 /// Signals the monitor thread to stop and waits for it to complete.
308 pub fn stop(mut self) {
309 // Signal the thread to stop
310 self.stop_signal.store(true, Ordering::Release);
311
312 // Wait for the thread to finish and check for panics
313 if let Some(handle) = self.monitor_thread.take() {
314 if let Err(panic_payload) = handle.join() {
315 // Thread panicked - extract and log panic message for diagnostics
316 // Try common panic payload types
317 let panic_msg = panic_payload
318 .downcast_ref::<String>()
319 .cloned()
320 .or_else(|| {
321 panic_payload
322 .downcast_ref::<&str>()
323 .map(ToString::to_string)
324 })
325 .or_else(|| {
326 panic_payload
327 .downcast_ref::<&String>()
328 .map(|s| (*s).clone())
329 })
330 .unwrap_or_else(|| {
331 // Fallback: Try to get any available information
332 format!(
333 "<unknown panic type: {}>",
334 std::any::type_name_of_val(&panic_payload)
335 )
336 });
337 eprintln!("Warning: File monitoring thread panicked: {panic_msg}");
338 }
339 }
340 }
341}
342
343impl Drop for PromptMonitor {
344 fn drop(&mut self) {
345 // Signal the thread to stop when dropped
346 self.stop_signal.store(true, Ordering::Release);
347
348 // Take the handle and let it finish on its own
349 // (we can't wait in Drop because we might be panicking)
350 let _ = self.monitor_thread.take();
351 }
352}
353
354#[cfg(test)]
355mod tests {
356 // Note: Tests that change directories are problematic in test suites.
357 // The monitoring functionality will be tested through integration tests
358 // when the monitor is integrated into the pipeline.
359}