Skip to main content

ralph_core/
loop_lock.rs

1//! Loop lock mechanism for preventing concurrent Ralph loops in the same workspace.
2//!
3//! Uses `flock()` on `.ralph/loop.lock` to ensure only one primary loop runs at a time.
4//! When a second loop attempts to start, it can detect the existing lock and spawn
5//! into a git worktree instead.
6//!
7//! # Example
8//!
9//! ```no_run
10//! use ralph_core::loop_lock::{LoopLock, LockError};
11//!
12//! fn main() -> Result<(), Box<dyn std::error::Error>> {
13//!     match LoopLock::try_acquire(".", "implement auth") {
14//!         Ok(guard) => {
15//!             // We're the primary loop - run normally
16//!             println!("Acquired lock, running as primary loop");
17//!             // Lock is held until guard is dropped
18//!         }
19//!         Err(LockError::AlreadyLocked(existing)) => {
20//!             // Another loop is running - spawn into worktree
21//!             println!("Lock held by PID {}, spawning worktree", existing.pid);
22//!         }
23//!         Err(e) => return Err(e.into()),
24//!     }
25//!     Ok(())
26//! }
27//! ```
28
29use chrono::{DateTime, Utc};
30use serde::{Deserialize, Serialize};
31use std::fs::{self, File, OpenOptions};
32use std::io::{self, Read, Seek, SeekFrom, Write};
33use std::path::{Path, PathBuf};
34use std::process;
35
36/// Metadata stored in the lock file, readable by other processes.
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct LockMetadata {
39    /// Process ID of the lock holder.
40    pub pid: u32,
41
42    /// When the lock was acquired.
43    pub started: DateTime<Utc>,
44
45    /// The prompt/task being executed.
46    pub prompt: String,
47}
48
49/// A guard that holds the loop lock. The lock is released when this is dropped.
50#[derive(Debug)]
51pub struct LockGuard {
52    /// The open file handle (keeps the flock).
53    #[cfg(unix)]
54    _flock: nix::fcntl::Flock<File>,
55
56    /// Placeholder for non-unix (compilation only, never actually used)
57    #[cfg(not(unix))]
58    _file: File,
59
60    /// Path to the lock file.
61    lock_path: PathBuf,
62}
63
64impl LockGuard {
65    /// Returns the path to the lock file.
66    pub fn lock_path(&self) -> &Path {
67        &self.lock_path
68    }
69}
70
71impl Drop for LockGuard {
72    fn drop(&mut self) {
73        // The Flock is automatically released when dropped.
74        tracing::debug!("Releasing loop lock at {}", self.lock_path.display());
75    }
76}
77
78/// Errors that can occur during lock operations.
79#[derive(Debug, thiserror::Error)]
80pub enum LockError {
81    /// The lock is already held by another process.
82    #[error("Lock already held by PID {}", .0.pid)]
83    AlreadyLocked(LockMetadata),
84
85    /// IO error during lock operations.
86    #[error("IO error: {0}")]
87    Io(#[from] io::Error),
88
89    /// Failed to parse lock metadata.
90    #[error("Failed to parse lock metadata: {0}")]
91    ParseError(String),
92
93    /// Platform not supported (non-Unix).
94    #[error("File locking not supported on this platform")]
95    UnsupportedPlatform,
96}
97
98/// The loop lock mechanism.
99///
100/// Uses `flock()` to provide advisory locking on `.ralph/loop.lock`.
101/// The lock is automatically released when the process exits (even on crash).
102pub struct LoopLock;
103
104impl LoopLock {
105    /// The relative path to the lock file within the workspace.
106    pub const LOCK_FILE: &'static str = ".ralph/loop.lock";
107
108    /// Try to acquire the loop lock (non-blocking).
109    ///
110    /// # Arguments
111    ///
112    /// * `workspace_root` - Root directory of the workspace
113    /// * `prompt` - The prompt/task being executed (stored in lock metadata)
114    ///
115    /// # Returns
116    ///
117    /// * `Ok(LockGuard)` - Lock acquired successfully
118    /// * `Err(LockError::AlreadyLocked(metadata))` - Another process holds the lock
119    /// * `Err(LockError::Io(_))` - IO error
120    pub fn try_acquire(
121        workspace_root: impl AsRef<Path>,
122        prompt: &str,
123    ) -> Result<LockGuard, LockError> {
124        let lock_path = workspace_root.as_ref().join(Self::LOCK_FILE);
125
126        // Ensure .ralph directory exists
127        if let Some(parent) = lock_path.parent() {
128            fs::create_dir_all(parent)?;
129        }
130
131        // Open or create the lock file
132        let file = OpenOptions::new()
133            .read(true)
134            .write(true)
135            .create(true)
136            .truncate(false)
137            .open(&lock_path)?;
138
139        // Try to acquire exclusive lock (non-blocking)
140        #[cfg(unix)]
141        {
142            use nix::fcntl::{Flock, FlockArg};
143
144            match Flock::lock(file, FlockArg::LockExclusiveNonblock) {
145                Ok(flock) => {
146                    // We got the lock - write our metadata
147                    Self::write_metadata(&flock, prompt)?;
148
149                    tracing::debug!("Acquired loop lock at {}", lock_path.display());
150
151                    Ok(LockGuard {
152                        _flock: flock,
153                        lock_path,
154                    })
155                }
156                Err((file, errno)) => {
157                    use nix::errno::Errno;
158                    // EWOULDBLOCK and EAGAIN are the same on some platforms (macOS)
159                    if errno == Errno::EWOULDBLOCK || errno == Errno::EAGAIN {
160                        // Lock is held by another process - read their metadata
161                        let metadata = Self::read_metadata(&file)?;
162                        Err(LockError::AlreadyLocked(metadata))
163                    } else {
164                        Err(LockError::Io(io::Error::new(
165                            io::ErrorKind::Other,
166                            format!("flock failed: {}", errno),
167                        )))
168                    }
169                }
170            }
171        }
172
173        #[cfg(not(unix))]
174        {
175            let _ = file;
176            let _ = prompt;
177            Err(LockError::UnsupportedPlatform)
178        }
179    }
180
181    /// Acquire the loop lock, blocking until available.
182    ///
183    /// This should be used with the `--exclusive` flag to wait for the
184    /// primary loop slot instead of spawning into a worktree.
185    ///
186    /// # Arguments
187    ///
188    /// * `workspace_root` - Root directory of the workspace
189    /// * `prompt` - The prompt/task being executed
190    ///
191    /// # Returns
192    ///
193    /// * `Ok(LockGuard)` - Lock acquired successfully
194    /// * `Err(LockError::Io(_))` - IO error
195    pub fn acquire_blocking(
196        workspace_root: impl AsRef<Path>,
197        prompt: &str,
198    ) -> Result<LockGuard, LockError> {
199        let lock_path = workspace_root.as_ref().join(Self::LOCK_FILE);
200
201        // Ensure .ralph directory exists
202        if let Some(parent) = lock_path.parent() {
203            fs::create_dir_all(parent)?;
204        }
205
206        let file = OpenOptions::new()
207            .read(true)
208            .write(true)
209            .create(true)
210            .truncate(false)
211            .open(&lock_path)?;
212
213        #[cfg(unix)]
214        {
215            use nix::fcntl::{Flock, FlockArg};
216
217            match Flock::lock(file, FlockArg::LockExclusive) {
218                Ok(flock) => {
219                    // We got the lock - write our metadata
220                    Self::write_metadata(&flock, prompt)?;
221
222                    tracing::debug!("Acquired loop lock (blocking) at {}", lock_path.display());
223
224                    Ok(LockGuard {
225                        _flock: flock,
226                        lock_path,
227                    })
228                }
229                Err((_, errno)) => Err(LockError::Io(io::Error::new(
230                    io::ErrorKind::Other,
231                    format!("flock failed: {}", errno),
232                ))),
233            }
234        }
235
236        #[cfg(not(unix))]
237        {
238            let _ = file;
239            let _ = prompt;
240            Err(LockError::UnsupportedPlatform)
241        }
242    }
243
244    /// Read the metadata from an existing lock file.
245    ///
246    /// This can be used to check who holds the lock without acquiring it.
247    pub fn read_existing(
248        workspace_root: impl AsRef<Path>,
249    ) -> Result<Option<LockMetadata>, LockError> {
250        let lock_path = workspace_root.as_ref().join(Self::LOCK_FILE);
251
252        if !lock_path.exists() {
253            return Ok(None);
254        }
255
256        let file = File::open(&lock_path)?;
257        match Self::read_metadata(&file) {
258            Ok(metadata) => Ok(Some(metadata)),
259            Err(LockError::ParseError(_)) => Ok(None),
260            Err(e) => Err(e),
261        }
262    }
263
264    /// Check if the lock is currently held (without acquiring it).
265    ///
266    /// Returns `true` if another process holds the lock.
267    pub fn is_locked(workspace_root: impl AsRef<Path>) -> Result<bool, LockError> {
268        let lock_path = workspace_root.as_ref().join(Self::LOCK_FILE);
269
270        if !lock_path.exists() {
271            return Ok(false);
272        }
273
274        let file = OpenOptions::new()
275            .read(true)
276            .write(true) // Need write for exclusive lock
277            .open(&lock_path)?;
278
279        #[cfg(unix)]
280        {
281            use nix::errno::Errno;
282            use nix::fcntl::{Flock, FlockArg};
283
284            match Flock::lock(file, FlockArg::LockExclusiveNonblock) {
285                Ok(_flock) => {
286                    // We got the lock - it will be released when _flock is dropped
287                    Ok(false)
288                }
289                Err((_, errno)) => {
290                    if errno == Errno::EWOULDBLOCK || errno == Errno::EAGAIN {
291                        Ok(true)
292                    } else {
293                        Err(LockError::Io(io::Error::new(
294                            io::ErrorKind::Other,
295                            format!("flock failed: {}", errno),
296                        )))
297                    }
298                }
299            }
300        }
301
302        #[cfg(not(unix))]
303        {
304            let _ = file;
305            Err(LockError::UnsupportedPlatform)
306        }
307    }
308
309    /// Write lock metadata to the file.
310    fn write_metadata(file: &File, prompt: &str) -> Result<(), LockError> {
311        let metadata = LockMetadata {
312            pid: process::id(),
313            started: Utc::now(),
314            prompt: prompt.to_string(),
315        };
316
317        // Use a mutable reference via clone for writing
318        let mut file_clone = file.try_clone()?;
319        file_clone.set_len(0)?;
320        file_clone.seek(SeekFrom::Start(0))?;
321
322        let json = serde_json::to_string_pretty(&metadata)
323            .map_err(|e| LockError::ParseError(e.to_string()))?;
324
325        file_clone.write_all(json.as_bytes())?;
326        file_clone.sync_all()?;
327
328        Ok(())
329    }
330
331    /// Read lock metadata from the file.
332    fn read_metadata(file: &File) -> Result<LockMetadata, LockError> {
333        let mut file_clone = file.try_clone()?;
334        file_clone.seek(SeekFrom::Start(0))?;
335        let mut contents = String::new();
336        file_clone.read_to_string(&mut contents)?;
337
338        if contents.trim().is_empty() {
339            return Err(LockError::ParseError("Empty lock file".to_string()));
340        }
341
342        serde_json::from_str(&contents).map_err(|e| LockError::ParseError(e.to_string()))
343    }
344}
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349    use tempfile::TempDir;
350
351    #[test]
352    fn test_acquire_lock_success() {
353        let temp_dir = TempDir::new().unwrap();
354
355        let guard = LoopLock::try_acquire(temp_dir.path(), "test prompt");
356        assert!(guard.is_ok());
357
358        // Lock file should exist
359        let lock_path = temp_dir.path().join(".ralph/loop.lock");
360        assert!(lock_path.exists());
361
362        // Metadata should be readable
363        let contents = fs::read_to_string(&lock_path).unwrap();
364        let metadata: LockMetadata = serde_json::from_str(&contents).unwrap();
365        assert_eq!(metadata.pid, process::id());
366        assert_eq!(metadata.prompt, "test prompt");
367    }
368
369    #[test]
370    fn test_lock_released_on_drop() {
371        let temp_dir = TempDir::new().unwrap();
372
373        {
374            let _guard = LoopLock::try_acquire(temp_dir.path(), "first").unwrap();
375            // Lock is held
376        }
377        // Guard dropped, lock released
378
379        // Should be able to acquire again
380        let guard = LoopLock::try_acquire(temp_dir.path(), "second");
381        assert!(guard.is_ok());
382    }
383
384    #[test]
385    fn test_is_locked() {
386        let temp_dir = TempDir::new().unwrap();
387
388        // Initially not locked
389        assert!(!LoopLock::is_locked(temp_dir.path()).unwrap());
390
391        let _guard = LoopLock::try_acquire(temp_dir.path(), "test").unwrap();
392
393        // Now locked (from our perspective - same process can re-lock)
394        // Note: flock allows same process to re-acquire, so this test
395        // might not work as expected in single-process context
396    }
397
398    #[test]
399    fn test_read_existing_no_file() {
400        let temp_dir = TempDir::new().unwrap();
401
402        let result = LoopLock::read_existing(temp_dir.path()).unwrap();
403        assert!(result.is_none());
404    }
405
406    #[test]
407    fn test_read_existing_with_lock() {
408        let temp_dir = TempDir::new().unwrap();
409
410        let _guard = LoopLock::try_acquire(temp_dir.path(), "my prompt").unwrap();
411
412        let metadata = LoopLock::read_existing(temp_dir.path()).unwrap().unwrap();
413        assert_eq!(metadata.pid, process::id());
414        assert_eq!(metadata.prompt, "my prompt");
415    }
416
417    #[test]
418    fn test_creates_ralph_directory() {
419        let temp_dir = TempDir::new().unwrap();
420        let ralph_dir = temp_dir.path().join(".ralph");
421
422        assert!(!ralph_dir.exists());
423
424        let _guard = LoopLock::try_acquire(temp_dir.path(), "test").unwrap();
425
426        assert!(ralph_dir.exists());
427    }
428
429    #[test]
430    fn test_lock_metadata_serialization() {
431        let metadata = LockMetadata {
432            pid: 12345,
433            started: Utc::now(),
434            prompt: "implement feature".to_string(),
435        };
436
437        let json = serde_json::to_string(&metadata).unwrap();
438        let deserialized: LockMetadata = serde_json::from_str(&json).unwrap();
439
440        assert_eq!(deserialized.pid, 12345);
441        assert_eq!(deserialized.prompt, "implement feature");
442    }
443}