Skip to main content

ralph_core/
loop_registry.rs

1//! Loop registry for tracking active Ralph loops across workspaces.
2//!
3//! The registry maintains a list of running loops with their metadata,
4//! providing discovery and coordination capabilities for multi-loop scenarios.
5//!
6//! # Design
7//!
8//! - **JSON persistence**: Single JSON file at `.ralph/loops.json`
9//! - **File locking**: Uses `flock()` for concurrent access safety
10//! - **PID-based stale detection**: Automatically cleans up entries for dead processes
11//!
12//! # Example
13//!
14//! ```no_run
15//! use ralph_core::loop_registry::{LoopRegistry, LoopEntry};
16//!
17//! fn main() -> Result<(), Box<dyn std::error::Error>> {
18//!     let registry = LoopRegistry::new(".");
19//!
20//!     // Register this loop
21//!     let entry = LoopEntry::new("implement auth", Some("/path/to/worktree"));
22//!     let id = registry.register(entry)?;
23//!
24//!     // List all active loops
25//!     for loop_entry in registry.list()? {
26//!         println!("Loop {}: {}", loop_entry.id, loop_entry.prompt);
27//!     }
28//!
29//!     // Deregister when done
30//!     registry.deregister(&id)?;
31//!     Ok(())
32//! }
33//! ```
34
35use chrono::{DateTime, Utc};
36use serde::{Deserialize, Serialize};
37use std::fs::{self, File, OpenOptions};
38use std::io::{self, Read, Seek, SeekFrom, Write};
39use std::path::{Path, PathBuf};
40use std::process;
41
42/// Metadata for a registered loop.
43#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
44pub struct LoopEntry {
45    /// Unique loop ID: loop-{unix_timestamp}-{4_hex_chars}
46    pub id: String,
47
48    /// Process ID of the loop.
49    pub pid: u32,
50
51    /// When the loop was started.
52    pub started: DateTime<Utc>,
53
54    /// The prompt/task being executed.
55    pub prompt: String,
56
57    /// Path to the worktree (None if running in main workspace).
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub worktree_path: Option<String>,
60
61    /// The workspace root where the loop is running.
62    pub workspace: String,
63}
64
65impl LoopEntry {
66    /// Creates a new loop entry for the current process.
67    pub fn new(prompt: impl Into<String>, worktree_path: Option<impl Into<String>>) -> Self {
68        Self {
69            id: Self::generate_id(),
70            pid: process::id(),
71            started: Utc::now(),
72            prompt: prompt.into(),
73            worktree_path: worktree_path.map(Into::into),
74            workspace: std::env::current_dir()
75                .map(|p| p.display().to_string())
76                .unwrap_or_default(),
77        }
78    }
79
80    /// Creates a new loop entry with a specific workspace.
81    pub fn with_workspace(
82        prompt: impl Into<String>,
83        worktree_path: Option<impl Into<String>>,
84        workspace: impl Into<String>,
85    ) -> Self {
86        Self {
87            id: Self::generate_id(),
88            pid: process::id(),
89            started: Utc::now(),
90            prompt: prompt.into(),
91            worktree_path: worktree_path.map(Into::into),
92            workspace: workspace.into(),
93        }
94    }
95
96    /// Creates a new loop entry with a specific ID.
97    ///
98    /// Use this when you need the loop ID to match other identifiers
99    /// (e.g., worktree name, branch name).
100    pub fn with_id(
101        id: impl Into<String>,
102        prompt: impl Into<String>,
103        worktree_path: Option<impl Into<String>>,
104        workspace: impl Into<String>,
105    ) -> Self {
106        Self {
107            id: id.into(),
108            pid: process::id(),
109            started: Utc::now(),
110            prompt: prompt.into(),
111            worktree_path: worktree_path.map(Into::into),
112            workspace: workspace.into(),
113        }
114    }
115
116    /// Generates a unique loop ID: loop-{timestamp}-{hex_suffix}
117    fn generate_id() -> String {
118        use std::time::{SystemTime, UNIX_EPOCH};
119        let duration = SystemTime::now()
120            .duration_since(UNIX_EPOCH)
121            .expect("Time went backwards");
122        let timestamp = duration.as_secs();
123        let hex_suffix = format!("{:04x}", duration.subsec_micros() % 0x10000);
124        format!("loop-{}-{}", timestamp, hex_suffix)
125    }
126
127    /// Checks if the process for this loop is still running.
128    #[cfg(unix)]
129    pub fn is_alive(&self) -> bool {
130        use nix::sys::signal::kill;
131        use nix::unistd::Pid;
132
133        // Signal 0 (None) checks if process exists without sending a signal
134        kill(Pid::from_raw(self.pid as i32), None)
135            .map(|_| true)
136            .unwrap_or(false)
137    }
138
139    #[cfg(not(unix))]
140    pub fn is_alive(&self) -> bool {
141        // On non-Unix platforms, assume alive (conservative)
142        true
143    }
144}
145
146/// The persisted registry data.
147#[derive(Debug, Default, Clone, Serialize, Deserialize)]
148struct RegistryData {
149    loops: Vec<LoopEntry>,
150}
151
152/// Errors that can occur during registry operations.
153#[derive(Debug, thiserror::Error)]
154pub enum RegistryError {
155    /// IO error during registry operations.
156    #[error("IO error: {0}")]
157    Io(#[from] io::Error),
158
159    /// Failed to parse registry data.
160    #[error("Failed to parse registry: {0}")]
161    ParseError(String),
162
163    /// Loop entry not found.
164    #[error("Loop not found: {0}")]
165    NotFound(String),
166
167    /// Platform not supported.
168    #[error("File locking not supported on this platform")]
169    UnsupportedPlatform,
170}
171
172/// Registry for tracking active Ralph loops.
173///
174/// Provides thread-safe registration and discovery of running loops.
175pub struct LoopRegistry {
176    /// Path to the registry file.
177    registry_path: PathBuf,
178}
179
180impl LoopRegistry {
181    /// The relative path to the registry file within the workspace.
182    pub const REGISTRY_FILE: &'static str = ".ralph/loops.json";
183
184    /// Creates a new registry instance for the given workspace.
185    pub fn new(workspace_root: impl AsRef<Path>) -> Self {
186        Self {
187            registry_path: workspace_root.as_ref().join(Self::REGISTRY_FILE),
188        }
189    }
190
191    /// Registers a new loop entry.
192    ///
193    /// Returns the entry's ID for later deregistration.
194    pub fn register(&self, entry: LoopEntry) -> Result<String, RegistryError> {
195        let id = entry.id.clone();
196        self.with_lock(|data| {
197            // Remove any existing entry with the same PID (stale from crash)
198            data.loops.retain(|e| e.pid != entry.pid);
199            data.loops.push(entry);
200        })?;
201        Ok(id)
202    }
203
204    /// Deregisters a loop by ID.
205    pub fn deregister(&self, id: &str) -> Result<(), RegistryError> {
206        let mut found = false;
207        self.with_lock(|data| {
208            let original_len = data.loops.len();
209            data.loops.retain(|e| e.id != id);
210            found = data.loops.len() != original_len;
211        })?;
212        if !found {
213            return Err(RegistryError::NotFound(id.to_string()));
214        }
215        Ok(())
216    }
217
218    /// Gets a loop entry by ID.
219    pub fn get(&self, id: &str) -> Result<Option<LoopEntry>, RegistryError> {
220        let mut result = None;
221        self.with_lock(|data| {
222            result = data.loops.iter().find(|e| e.id == id).cloned();
223        })?;
224        Ok(result)
225    }
226
227    /// Lists all active loops (after cleaning stale entries).
228    pub fn list(&self) -> Result<Vec<LoopEntry>, RegistryError> {
229        let mut result = Vec::new();
230        self.with_lock(|data| {
231            result = data.loops.clone();
232        })?;
233        Ok(result)
234    }
235
236    /// Cleans stale entries (dead PIDs) and returns the number removed.
237    pub fn clean_stale(&self) -> Result<usize, RegistryError> {
238        let mut removed = 0;
239        self.with_lock(|data| {
240            let original_len = data.loops.len();
241            data.loops.retain(|e| e.is_alive());
242            removed = original_len - data.loops.len();
243        })?;
244        Ok(removed)
245    }
246
247    /// Deregisters all entries for the current process.
248    ///
249    /// This is useful for cleanup on termination, since each process
250    /// can only have one active loop entry.
251    pub fn deregister_current_process(&self) -> Result<bool, RegistryError> {
252        let pid = std::process::id();
253        let mut found = false;
254        self.with_lock(|data| {
255            let original_len = data.loops.len();
256            data.loops.retain(|e| e.pid != pid);
257            found = data.loops.len() != original_len;
258        })?;
259        Ok(found)
260    }
261
262    /// Executes an operation with the registry file locked.
263    #[cfg(unix)]
264    fn with_lock<F>(&self, f: F) -> Result<(), RegistryError>
265    where
266        F: FnOnce(&mut RegistryData),
267    {
268        use nix::fcntl::{Flock, FlockArg};
269
270        // Ensure .ralph directory exists
271        if let Some(parent) = self.registry_path.parent() {
272            fs::create_dir_all(parent)?;
273        }
274
275        // Open or create the file
276        let file = OpenOptions::new()
277            .read(true)
278            .write(true)
279            .create(true)
280            .truncate(false)
281            .open(&self.registry_path)?;
282
283        // Acquire exclusive lock (blocking)
284        let flock = Flock::lock(file, FlockArg::LockExclusive).map_err(|(_, errno)| {
285            RegistryError::Io(io::Error::new(
286                io::ErrorKind::Other,
287                format!("flock failed: {}", errno),
288            ))
289        })?;
290
291        // Read existing data using the locked file
292        let mut data = self.read_data_from_file(&flock)?;
293
294        // Clean stale entries before any operation
295        data.loops.retain(|e| e.is_alive());
296
297        // Execute the user function
298        f(&mut data);
299
300        // Write back the data
301        self.write_data_to_file(&flock, &data)?;
302
303        Ok(())
304    }
305
306    #[cfg(not(unix))]
307    fn with_lock<F>(&self, _f: F) -> Result<(), RegistryError>
308    where
309        F: FnOnce(&mut RegistryData),
310    {
311        Err(RegistryError::UnsupportedPlatform)
312    }
313
314    /// Reads registry data from a locked file.
315    #[cfg(unix)]
316    fn read_data_from_file(
317        &self,
318        flock: &nix::fcntl::Flock<File>,
319    ) -> Result<RegistryData, RegistryError> {
320        use std::os::fd::AsFd;
321
322        // Get a clone of the underlying file via BorrowedFd
323        let borrowed_fd = flock.as_fd();
324        let owned_fd = borrowed_fd.try_clone_to_owned()?;
325        let mut file: File = owned_fd.into();
326
327        file.seek(SeekFrom::Start(0))?;
328
329        let mut contents = String::new();
330        file.read_to_string(&mut contents)?;
331
332        if contents.trim().is_empty() {
333            return Ok(RegistryData::default());
334        }
335
336        serde_json::from_str(&contents).map_err(|e| RegistryError::ParseError(e.to_string()))
337    }
338
339    /// Writes registry data to a locked file.
340    #[cfg(unix)]
341    fn write_data_to_file(
342        &self,
343        flock: &nix::fcntl::Flock<File>,
344        data: &RegistryData,
345    ) -> Result<(), RegistryError> {
346        use std::os::fd::AsFd;
347
348        // Get a clone of the underlying file via BorrowedFd
349        let borrowed_fd = flock.as_fd();
350        let owned_fd = borrowed_fd.try_clone_to_owned()?;
351        let mut file: File = owned_fd.into();
352
353        file.set_len(0)?;
354        file.seek(SeekFrom::Start(0))?;
355
356        let json = serde_json::to_string_pretty(data)
357            .map_err(|e| RegistryError::ParseError(e.to_string()))?;
358
359        file.write_all(json.as_bytes())?;
360        file.sync_all()?;
361
362        Ok(())
363    }
364}
365
366#[cfg(test)]
367mod tests {
368    use super::*;
369    use tempfile::TempDir;
370
371    #[test]
372    fn test_loop_entry_creation() {
373        let entry = LoopEntry::new("test prompt", None::<String>);
374        assert!(entry.id.starts_with("loop-"));
375        assert_eq!(entry.pid, process::id());
376        assert_eq!(entry.prompt, "test prompt");
377        assert!(entry.worktree_path.is_none());
378    }
379
380    #[test]
381    fn test_loop_entry_with_worktree() {
382        let entry = LoopEntry::new("test prompt", Some("/path/to/worktree"));
383        assert_eq!(entry.worktree_path, Some("/path/to/worktree".to_string()));
384    }
385
386    #[test]
387    fn test_loop_entry_id_format() {
388        let entry = LoopEntry::new("test", None::<String>);
389        let parts: Vec<&str> = entry.id.split('-').collect();
390        assert_eq!(parts.len(), 3);
391        assert_eq!(parts[0], "loop");
392    }
393
394    #[test]
395    fn test_loop_entry_is_alive() {
396        let entry = LoopEntry::new("test", None::<String>);
397        // Current process should be alive
398        assert!(entry.is_alive());
399    }
400
401    #[test]
402    fn test_loop_entry_with_id() {
403        let entry = LoopEntry::with_id(
404            "bright-maple",
405            "test prompt",
406            Some("/path/to/worktree"),
407            "/workspace",
408        );
409        assert_eq!(entry.id, "bright-maple");
410        assert_eq!(entry.pid, process::id());
411        assert_eq!(entry.prompt, "test prompt");
412        assert_eq!(entry.worktree_path, Some("/path/to/worktree".to_string()));
413        assert_eq!(entry.workspace, "/workspace");
414    }
415
416    #[test]
417    fn test_registry_creates_file() {
418        let temp_dir = TempDir::new().unwrap();
419        let registry_path = temp_dir.path().join(".ralph/loops.json");
420
421        assert!(!registry_path.exists());
422
423        let registry = LoopRegistry::new(temp_dir.path());
424        let entry = LoopEntry::new("test prompt", None::<String>);
425        registry.register(entry).unwrap();
426
427        assert!(registry_path.exists());
428    }
429
430    #[test]
431    fn test_registry_register_and_list() {
432        let temp_dir = TempDir::new().unwrap();
433        let registry = LoopRegistry::new(temp_dir.path());
434
435        let entry = LoopEntry::new("test prompt", None::<String>);
436        let id = entry.id.clone();
437
438        registry.register(entry).unwrap();
439
440        let loops = registry.list().unwrap();
441        assert_eq!(loops.len(), 1);
442        assert_eq!(loops[0].id, id);
443        assert_eq!(loops[0].prompt, "test prompt");
444    }
445
446    #[test]
447    fn test_registry_get() {
448        let temp_dir = TempDir::new().unwrap();
449        let registry = LoopRegistry::new(temp_dir.path());
450
451        let entry = LoopEntry::new("test prompt", None::<String>);
452        let id = entry.id.clone();
453
454        registry.register(entry).unwrap();
455
456        let retrieved = registry.get(&id).unwrap();
457        assert!(retrieved.is_some());
458        assert_eq!(retrieved.unwrap().prompt, "test prompt");
459    }
460
461    #[test]
462    fn test_registry_get_nonexistent() {
463        let temp_dir = TempDir::new().unwrap();
464        let registry = LoopRegistry::new(temp_dir.path());
465
466        // Need to create the file first
467        let entry = LoopEntry::new("dummy", None::<String>);
468        let id = entry.id.clone();
469        registry.register(entry).unwrap();
470        registry.deregister(&id).unwrap();
471
472        let retrieved = registry.get("nonexistent").unwrap();
473        assert!(retrieved.is_none());
474    }
475
476    #[test]
477    fn test_registry_deregister() {
478        let temp_dir = TempDir::new().unwrap();
479        let registry = LoopRegistry::new(temp_dir.path());
480
481        let entry = LoopEntry::new("test prompt", None::<String>);
482        let id = entry.id.clone();
483
484        registry.register(entry).unwrap();
485        assert_eq!(registry.list().unwrap().len(), 1);
486
487        registry.deregister(&id).unwrap();
488        assert_eq!(registry.list().unwrap().len(), 0);
489    }
490
491    #[test]
492    fn test_registry_deregister_nonexistent() {
493        let temp_dir = TempDir::new().unwrap();
494        let registry = LoopRegistry::new(temp_dir.path());
495
496        // Register and deregister to create the file
497        let entry = LoopEntry::new("dummy", None::<String>);
498        let id = entry.id.clone();
499        registry.register(entry).unwrap();
500        registry.deregister(&id).unwrap();
501
502        let result = registry.deregister("nonexistent");
503        assert!(matches!(result, Err(RegistryError::NotFound(_))));
504    }
505
506    #[test]
507    fn test_registry_same_pid_replaces() {
508        // Same PID entries replace each other (prevents stale entries from crashes)
509        let temp_dir = TempDir::new().unwrap();
510        let registry = LoopRegistry::new(temp_dir.path());
511
512        let entry1 = LoopEntry::new("prompt 1", None::<String>);
513        let entry2 = LoopEntry::new("prompt 2", Some("/worktree"));
514
515        // Both entries have the same PID (current process)
516        assert_eq!(entry1.pid, entry2.pid);
517
518        registry.register(entry1).unwrap();
519        registry.register(entry2).unwrap();
520
521        // Second entry should replace first (same PID)
522        let loops = registry.list().unwrap();
523        assert_eq!(loops.len(), 1);
524        assert_eq!(loops[0].prompt, "prompt 2");
525    }
526
527    #[test]
528    fn test_registry_different_pids_coexist() {
529        // Entries with different PIDs should coexist
530        let temp_dir = TempDir::new().unwrap();
531        let registry = LoopRegistry::new(temp_dir.path());
532
533        // Create entry with current PID
534        let entry1 = LoopEntry::new("prompt 1", None::<String>);
535        let id1 = entry1.id.clone();
536        registry.register(entry1).unwrap();
537
538        // Manually create entry with different PID (simulating another process)
539        let mut entry2 = LoopEntry::new("prompt 2", Some("/worktree"));
540        entry2.pid = 99999; // Fake PID - won't exist so will be cleaned as stale
541        let id2 = entry2.id.clone();
542
543        // Write entry2 directly to file to bypass PID check
544        let registry_path = temp_dir.path().join(".ralph/loops.json");
545        let content = fs::read_to_string(&registry_path).unwrap();
546        let mut data: serde_json::Value = serde_json::from_str(&content).unwrap();
547        let loops = data["loops"].as_array_mut().unwrap();
548        loops.push(serde_json::json!({
549            "id": id2,
550            "pid": 99999,
551            "started": entry2.started,
552            "prompt": "prompt 2",
553            "worktree_path": "/worktree",
554            "workspace": entry2.workspace
555        }));
556        fs::write(&registry_path, serde_json::to_string_pretty(&data).unwrap()).unwrap();
557
558        // List should clean the stale entry (PID 99999 doesn't exist)
559        // But our current process entry should remain
560        let loops = registry.list().unwrap();
561        assert_eq!(loops.len(), 1);
562        assert_eq!(loops[0].id, id1);
563    }
564
565    #[test]
566    fn test_registry_replaces_same_pid() {
567        let temp_dir = TempDir::new().unwrap();
568        let registry = LoopRegistry::new(temp_dir.path());
569
570        // Register first entry
571        let entry1 = LoopEntry::new("prompt 1", None::<String>);
572        registry.register(entry1).unwrap();
573
574        // Register second entry with same PID (simulates restart)
575        let entry2 = LoopEntry::new("prompt 2", None::<String>);
576        registry.register(entry2).unwrap();
577
578        // Should only have one entry (the new one replaced the old)
579        let loops = registry.list().unwrap();
580        assert_eq!(loops.len(), 1);
581        assert_eq!(loops[0].prompt, "prompt 2");
582    }
583
584    #[test]
585    fn test_registry_persistence() {
586        let temp_dir = TempDir::new().unwrap();
587
588        let id = {
589            let registry = LoopRegistry::new(temp_dir.path());
590            let entry = LoopEntry::new("persistent prompt", None::<String>);
591            let id = entry.id.clone();
592            registry.register(entry).unwrap();
593            id
594        };
595
596        // Load again and verify data persisted
597        let registry = LoopRegistry::new(temp_dir.path());
598        let loops = registry.list().unwrap();
599        assert_eq!(loops.len(), 1);
600        assert_eq!(loops[0].id, id);
601        assert_eq!(loops[0].prompt, "persistent prompt");
602    }
603
604    #[test]
605    fn test_entry_serialization() {
606        let entry = LoopEntry::new("test prompt", Some("/worktree/path"));
607        let json = serde_json::to_string(&entry).unwrap();
608        let deserialized: LoopEntry = serde_json::from_str(&json).unwrap();
609
610        assert_eq!(deserialized.id, entry.id);
611        assert_eq!(deserialized.pid, entry.pid);
612        assert_eq!(deserialized.prompt, "test prompt");
613        assert_eq!(
614            deserialized.worktree_path,
615            Some("/worktree/path".to_string())
616        );
617    }
618
619    #[test]
620    fn test_entry_serialization_no_worktree() {
621        let entry = LoopEntry::new("test prompt", None::<String>);
622        let json = serde_json::to_string(&entry).unwrap();
623
624        // Verify worktree_path is not in JSON when None
625        assert!(!json.contains("worktree_path"));
626
627        let deserialized: LoopEntry = serde_json::from_str(&json).unwrap();
628        assert!(deserialized.worktree_path.is_none());
629    }
630
631    #[test]
632    fn test_deregister_current_process() {
633        let temp_dir = TempDir::new().unwrap();
634        let registry = LoopRegistry::new(temp_dir.path());
635
636        // Register an entry (uses current PID)
637        let entry = LoopEntry::new("test prompt", None::<String>);
638        registry.register(entry).unwrap();
639        assert_eq!(registry.list().unwrap().len(), 1);
640
641        // Deregister by current process
642        let found = registry.deregister_current_process().unwrap();
643        assert!(found);
644        assert_eq!(registry.list().unwrap().len(), 0);
645
646        // Second deregister should return false (nothing to remove)
647        let found = registry.deregister_current_process().unwrap();
648        assert!(!found);
649    }
650}