Skip to main content

rec/recording/
state.rs

1use std::fs::{self, File, OpenOptions};
2use std::io::{Read, Write};
3use std::path::{Path, PathBuf};
4use std::time::{SystemTime, UNIX_EPOCH};
5
6use chrono::Local;
7use fd_lock::RwLock;
8use serde::{Deserialize, Serialize};
9use uuid::Uuid;
10
11use crate::error::{RecError, Result};
12use crate::storage::SessionStore;
13
14/// Information about a recovered stale session.
15#[derive(Debug)]
16pub struct RecoveryInfo {
17    /// PID of the dead process
18    pub dead_pid: u32,
19    /// Name of the recovered session (if any)
20    pub recovered_name: Option<String>,
21    /// Number of commands in the recovered session
22    pub command_count: usize,
23}
24
25/// Metadata for an active recording session.
26///
27/// Stored as JSON in the state directory while a recording is in progress.
28/// Contains enough information to resume or clean up after a crash.
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct ActiveSession {
31    /// Unique session identifier
32    pub id: Uuid,
33
34    /// Human-readable session name
35    pub name: String,
36
37    /// Path to the session NDJSON file
38    pub session_path: PathBuf,
39
40    /// Unix timestamp when recording started
41    pub started_at: f64,
42
43    /// PID of the recording process
44    pub pid: u32,
45}
46
47/// Manages recording lifecycle with file-based locking.
48///
49/// Uses `fd-lock` to ensure only one recording can be active at a time.
50/// State is persisted to disk so it survives process restarts and can
51/// detect stale locks from crashed processes.
52///
53/// # File Layout
54///
55/// - `{state_dir}/recording.lock` - Lock file for mutual exclusion
56/// - `{state_dir}/recording.json` - Active session metadata
57pub struct RecordingState {
58    /// Path to the lock file
59    lock_path: PathBuf,
60
61    /// Path to the state JSON file
62    state_path: PathBuf,
63}
64
65impl RecordingState {
66    /// Create a new `RecordingState` using the given state directory.
67    ///
68    /// Does not create the directory; caller should ensure it exists
69    /// (e.g., via `Paths::ensure_dirs()`).
70    #[must_use]
71    pub fn new(state_dir: &Path) -> Self {
72        Self {
73            lock_path: state_dir.join("recording.lock"),
74            state_path: state_dir.join("recording.json"),
75        }
76    }
77
78    /// Start a new recording session.
79    ///
80    /// Acquires an exclusive file lock, writes session metadata, and
81    /// returns the `ActiveSession`. If another recording is in progress
82    /// (lock held by live process), returns `RecError::RecordingInProgress`.
83    /// If a stale lock is detected (process no longer alive), cleans it up
84    /// first and then starts.
85    ///
86    /// # Arguments
87    ///
88    /// * `name` - Human-readable session name
89    /// * `session_path` - Path where the session NDJSON file will be written
90    /// # Errors
91    ///
92    /// Returns an error if the lock cannot be acquired or file I/O fails.
93    ///
94    /// # Panics
95    ///
96    /// Panics if the system clock is before the Unix epoch.
97    pub fn start(&self, name: &str, session_path: PathBuf) -> Result<ActiveSession> {
98        // Clean up stale lock if needed (no recovery in start — caller handles that)
99        self.cleanup_stale_lock(None)?;
100
101        // Try to acquire lock
102        let lock_file = OpenOptions::new()
103            .create(true)
104            .truncate(false)
105            .read(true)
106            .write(true)
107            .open(&self.lock_path)?;
108
109        let mut lock = RwLock::new(lock_file);
110        let Ok(mut guard) = lock.try_write() else {
111            return Err(RecError::RecordingInProgress);
112        };
113
114        let pid = std::process::id();
115        let started_at = SystemTime::now()
116            .duration_since(UNIX_EPOCH)
117            .expect("Time went backwards")
118            .as_secs_f64();
119
120        let session = ActiveSession {
121            id: Uuid::new_v4(),
122            name: name.to_string(),
123            session_path,
124            started_at,
125            pid,
126        };
127
128        // Write PID to lock file
129        guard.set_len(0)?;
130        write!(guard, "{pid}")?;
131        guard.flush()?;
132
133        // Write session state
134        let state_json = serde_json::to_string_pretty(&session)?;
135        fs::write(&self.state_path, state_json)?;
136
137        // Drop the guard explicitly - we keep the lock file with PID written,
138        // but release the fd-lock. The PID-based check handles crash detection.
139        drop(guard);
140        drop(lock);
141
142        Ok(session)
143    }
144
145    /// Stop the current recording session.
146    ///
147    /// Reads the active session metadata, removes the state and lock files,
148    /// and returns the session info. Returns `RecError::NoActiveRecording`
149    /// if no recording is in progress.
150    ///
151    /// # Errors
152    ///
153    /// Returns an error if no recording is active or file removal fails.
154    pub fn stop(&self) -> Result<ActiveSession> {
155        let session = self.current()?;
156
157        // Remove state file first, then lock file
158        if self.state_path.exists() {
159            fs::remove_file(&self.state_path)?;
160        }
161        if self.lock_path.exists() {
162            fs::remove_file(&self.lock_path)?;
163        }
164
165        Ok(session)
166    }
167
168    /// Check if a recording is currently in progress.
169    ///
170    /// Returns `true` if the state file exists and contains valid session data.
171    #[must_use]
172    pub fn is_recording(&self) -> bool {
173        self.current().is_ok()
174    }
175
176    /// Get the current active session metadata.
177    ///
178    /// Returns `RecError::NoActiveRecording` if no recording is active.
179    ///
180    /// # Errors
181    ///
182    /// Returns an error if no recording is active or the state file is corrupted.
183    pub fn current(&self) -> Result<ActiveSession> {
184        if !self.state_path.exists() {
185            return Err(RecError::NoActiveRecording);
186        }
187
188        let contents = fs::read_to_string(&self.state_path)?;
189        let session: ActiveSession = serde_json::from_str(&contents).map_err(|e| {
190            RecError::InvalidSession(format!("Failed to parse recording state: {e}"))
191        })?;
192
193        Ok(session)
194    }
195
196    /// Clean up stale lock from a crashed process, optionally recovering the session.
197    ///
198    /// Reads the PID from the lock file and checks if that process is still
199    /// alive using `libc::kill(pid, 0)` on Unix. If the process is dead,
200    /// attempts recovery (if `store` is provided) and then removes lock/state files.
201    ///
202    /// # Recovery behavior
203    ///
204    /// When `store` is `Some`:
205    /// - Sessions with ≥1 command are renamed to `recovered-YYYY-MM-DD-HHMMSS`
206    ///   and flagged with `recovered: Some(true)`.
207    /// - Sessions with 0 commands are silently deleted.
208    /// - Recovery is best-effort: errors loading/saving are logged and cleanup proceeds.
209    ///
210    /// When `store` is `None`:
211    /// - Existing behavior: just remove lock and state files.
212    ///
213    /// # Safety
214    ///
215    /// - We only clean up if the process is definitively dead
216    /// - PIDs can be reused, but the window is very small for a recording tool
217    /// - The lock file contains only a PID, so worst case we clean up an
218    ///   unrelated process's "lock" (but they wouldn't be using our lock file)
219    ///
220    /// # Errors
221    ///
222    /// Returns an error if file I/O operations fail unexpectedly.
223    pub fn cleanup_stale_lock(&self, store: Option<&SessionStore>) -> Result<Option<RecoveryInfo>> {
224        if !self.lock_path.exists() {
225            return Ok(None);
226        }
227
228        // Read PID from lock file
229        let Ok(mut lock_file) = File::open(&self.lock_path) else {
230            return Ok(None); // Lock file disappeared, that's fine
231        };
232
233        let mut pid_str = String::new();
234        if lock_file.read_to_string(&mut pid_str).is_err() {
235            // Can't read lock file, remove it
236            let _ = fs::remove_file(&self.lock_path);
237            let _ = fs::remove_file(&self.state_path);
238            return Ok(None);
239        }
240
241        let pid_str = pid_str.trim();
242        if pid_str.is_empty() {
243            // Empty lock file, remove it
244            let _ = fs::remove_file(&self.lock_path);
245            let _ = fs::remove_file(&self.state_path);
246            return Ok(None);
247        }
248
249        let pid: u32 = if let Ok(p) = pid_str.parse() {
250            p
251        } else {
252            // Invalid PID in lock file, remove it
253            let _ = fs::remove_file(&self.lock_path);
254            let _ = fs::remove_file(&self.state_path);
255            return Ok(None);
256        };
257
258        // Check if process is still alive
259        if is_process_alive(pid) {
260            return Ok(None);
261        }
262
263        // Process is dead — stale lock detected
264        let recovery_info = if let Some(store) = store {
265            Some(self.attempt_recovery(pid, store))
266        } else {
267            Some(RecoveryInfo {
268                dead_pid: pid,
269                recovered_name: None,
270                command_count: 0,
271            })
272        };
273
274        // Always clean up lock and state files
275        let _ = fs::remove_file(&self.lock_path);
276        let _ = fs::remove_file(&self.state_path);
277
278        Ok(recovery_info)
279    }
280
281    /// Attempt to recover a session from a dead recording process.
282    ///
283    /// Reads the active session metadata, loads the NDJSON file, and either
284    /// saves it as a recovered session or deletes it if empty.
285    /// Returns `None` only if recovery completely fails (errors are best-effort).
286    fn attempt_recovery(&self, pid: u32, store: &SessionStore) -> RecoveryInfo {
287        // Read active session metadata before cleaning up
288        let Ok(active) = self.current() else {
289            // Can't read state — no recovery possible
290            return RecoveryInfo {
291                dead_pid: pid,
292                recovered_name: None,
293                command_count: 0,
294            };
295        };
296
297        // Try to load the session from the NDJSON file
298        let Ok(mut session) = store.load(&active.id.to_string()) else {
299            // Can't load session — no recovery possible
300            return RecoveryInfo {
301                dead_pid: pid,
302                recovered_name: None,
303                command_count: 0,
304            };
305        };
306
307        let command_count = session.commands.len();
308
309        if command_count >= 1 {
310            // Recover: rename and flag
311            let recovered_name = Local::now().format("recovered-%Y-%m-%d-%H%M%S").to_string();
312            session.header.name.clone_from(&recovered_name);
313            session.header.recovered = Some(true);
314
315            match store.save(&session) {
316                Ok(()) => RecoveryInfo {
317                    dead_pid: pid,
318                    recovered_name: Some(recovered_name),
319                    command_count,
320                },
321                Err(_) => {
322                    // Save failed — best-effort, report what we can
323                    RecoveryInfo {
324                        dead_pid: pid,
325                        recovered_name: None,
326                        command_count,
327                    }
328                }
329            }
330        } else {
331            // Empty session — discard
332            let _ = store.delete(&active.id.to_string());
333            RecoveryInfo {
334                dead_pid: pid,
335                recovered_name: None,
336                command_count: 0,
337            }
338        }
339    }
340}
341
342/// Check if a process with the given PID is still alive.
343///
344/// On Unix, uses `libc::kill(pid, 0)` which checks process existence
345/// without sending a signal. Returns `true` if the process exists.
346#[cfg(unix)]
347#[allow(unsafe_code)]
348fn is_process_alive(pid: u32) -> bool {
349    // SAFETY: kill(pid, 0) is a standard POSIX operation that checks
350    // if a process exists without sending any signal. No memory is
351    // accessed; the syscall only queries the kernel process table.
352    unsafe { libc::kill(pid as i32, 0) == 0 }
353}
354
355/// Fallback for non-Unix platforms.
356///
357/// Always returns `true` (assumes process is alive) to avoid
358/// accidentally cleaning up active locks.
359#[cfg(not(unix))]
360fn is_process_alive(_pid: u32) -> bool {
361    true // Conservative: assume alive on non-Unix
362}
363
364#[cfg(test)]
365mod tests {
366    use super::*;
367    use crate::models::{Command, Session};
368    use crate::storage::Paths;
369    use tempfile::TempDir;
370
371    fn setup() -> (TempDir, RecordingState) {
372        let tmp = TempDir::new().unwrap();
373        let state = RecordingState::new(tmp.path());
374        (tmp, state)
375    }
376
377    fn create_test_paths(temp_dir: &TempDir) -> Paths {
378        Paths {
379            data_dir: temp_dir.path().join("sessions"),
380            config_dir: temp_dir.path().join("config"),
381            config_file: temp_dir.path().join("config").join("config.toml"),
382            state_dir: temp_dir.path().join("state"),
383        }
384    }
385
386    #[test]
387    fn test_start_creates_state_and_lock_files() {
388        let (tmp, state) = setup();
389        let session_path = tmp.path().join("test.ndjson");
390
391        let session = state.start("test-session", session_path.clone()).unwrap();
392
393        assert_eq!(session.name, "test-session");
394        assert_eq!(session.session_path, session_path);
395        assert!(session.started_at > 0.0);
396        assert_eq!(session.pid, std::process::id());
397
398        // State file should exist
399        assert!(state.state_path.exists());
400        // Lock file should exist
401        assert!(state.lock_path.exists());
402    }
403
404    #[test]
405    fn test_stop_removes_files() {
406        let (tmp, state) = setup();
407        let session_path = tmp.path().join("test.ndjson");
408
409        state.start("test-session", session_path).unwrap();
410        let session = state.stop().unwrap();
411
412        assert_eq!(session.name, "test-session");
413        assert!(!state.state_path.exists());
414        assert!(!state.lock_path.exists());
415    }
416
417    #[test]
418    fn test_stop_without_recording_returns_error() {
419        let (_tmp, state) = setup();
420
421        let result = state.stop();
422        assert!(result.is_err());
423        match result.unwrap_err() {
424            RecError::NoActiveRecording => {}
425            e => panic!("Expected NoActiveRecording, got {e:?}"),
426        }
427    }
428
429    #[test]
430    fn test_is_recording() {
431        let (tmp, state) = setup();
432
433        assert!(!state.is_recording());
434
435        let session_path = tmp.path().join("test.ndjson");
436        state.start("test-session", session_path).unwrap();
437
438        assert!(state.is_recording());
439
440        state.stop().unwrap();
441
442        assert!(!state.is_recording());
443    }
444
445    #[test]
446    fn test_current_returns_session_info() {
447        let (tmp, state) = setup();
448        let session_path = tmp.path().join("test.ndjson");
449
450        let started = state.start("test-session", session_path.clone()).unwrap();
451        let current = state.current().unwrap();
452
453        assert_eq!(current.id, started.id);
454        assert_eq!(current.name, "test-session");
455        assert_eq!(current.session_path, session_path);
456    }
457
458    #[test]
459    fn test_current_without_recording_returns_error() {
460        let (_tmp, state) = setup();
461
462        let result = state.current();
463        assert!(result.is_err());
464    }
465
466    #[test]
467    fn test_cleanup_stale_lock_removes_dead_process_lock() {
468        let (tmp, state) = setup();
469
470        // Write a lock file with a PID that almost certainly doesn't exist
471        // Using a very high PID that's unlikely to be a real process
472        let fake_pid = 4_000_000_000u32;
473        fs::write(&state.lock_path, fake_pid.to_string()).unwrap();
474
475        // Write a fake state file
476        let fake_session = ActiveSession {
477            id: Uuid::new_v4(),
478            name: "stale-session".to_string(),
479            session_path: tmp.path().join("stale.ndjson"),
480            started_at: 0.0,
481            pid: fake_pid,
482        };
483        let state_json = serde_json::to_string(&fake_session).unwrap();
484        fs::write(&state.state_path, state_json).unwrap();
485
486        // Cleanup should remove both files
487        let result = state.cleanup_stale_lock(None).unwrap();
488
489        assert!(!state.lock_path.exists());
490        assert!(!state.state_path.exists());
491        // Should return RecoveryInfo with dead_pid
492        let info = result.expect("Should return RecoveryInfo for dead process");
493        assert_eq!(info.dead_pid, fake_pid);
494    }
495
496    #[test]
497    fn test_cleanup_stale_lock_preserves_live_process() {
498        let (tmp, state) = setup();
499
500        // Write lock file with current process PID (definitely alive)
501        let our_pid = std::process::id();
502        fs::write(&state.lock_path, our_pid.to_string()).unwrap();
503
504        let fake_session = ActiveSession {
505            id: Uuid::new_v4(),
506            name: "live-session".to_string(),
507            session_path: tmp.path().join("live.ndjson"),
508            started_at: 0.0,
509            pid: our_pid,
510        };
511        let state_json = serde_json::to_string(&fake_session).unwrap();
512        fs::write(&state.state_path, state_json).unwrap();
513
514        // Cleanup should preserve both files (our process is alive)
515        let result = state.cleanup_stale_lock(None).unwrap();
516
517        assert!(state.lock_path.exists());
518        assert!(state.state_path.exists());
519        assert!(result.is_none(), "Should return None for live process");
520    }
521
522    #[test]
523    fn test_cleanup_empty_lock_file() {
524        let (_tmp, state) = setup();
525
526        // Write an empty lock file
527        fs::write(&state.lock_path, "").unwrap();
528
529        let result = state.cleanup_stale_lock(None).unwrap();
530
531        assert!(!state.lock_path.exists());
532        assert!(result.is_none());
533    }
534
535    #[test]
536    fn test_cleanup_invalid_pid_in_lock() {
537        let (_tmp, state) = setup();
538
539        // Write an invalid PID
540        fs::write(&state.lock_path, "not-a-pid").unwrap();
541
542        let result = state.cleanup_stale_lock(None).unwrap();
543
544        assert!(!state.lock_path.exists());
545        assert!(result.is_none());
546    }
547
548    #[test]
549    fn test_start_after_stale_cleanup() {
550        let (tmp, state) = setup();
551
552        // Create a stale lock with a dead PID
553        let fake_pid = 4_000_000_000u32;
554        fs::write(&state.lock_path, fake_pid.to_string()).unwrap();
555        let fake_session = ActiveSession {
556            id: Uuid::new_v4(),
557            name: "stale".to_string(),
558            session_path: tmp.path().join("stale.ndjson"),
559            started_at: 0.0,
560            pid: fake_pid,
561        };
562        fs::write(
563            &state.state_path,
564            serde_json::to_string(&fake_session).unwrap(),
565        )
566        .unwrap();
567
568        // start() should auto-clean stale lock and succeed
569        let session_path = tmp.path().join("new.ndjson");
570        let session = state.start("new-session", session_path).unwrap();
571
572        assert_eq!(session.name, "new-session");
573    }
574
575    #[cfg(unix)]
576    #[test]
577    fn test_is_process_alive_current_process() {
578        assert!(is_process_alive(std::process::id()));
579    }
580
581    #[cfg(unix)]
582    #[test]
583    fn test_is_process_alive_dead_process() {
584        // PID 4 billion is almost certainly not running
585        assert!(!is_process_alive(4_000_000_000));
586    }
587
588    #[test]
589    fn test_cleanup_stale_lock_recovers_session_with_commands() {
590        let tmp = TempDir::new().unwrap();
591        let paths = create_test_paths(&tmp);
592        let store = SessionStore::new(paths.clone());
593
594        // Create state dir for RecordingState
595        let state_dir = tmp.path().join("state");
596        fs::create_dir_all(&state_dir).unwrap();
597        let rec_state = RecordingState::new(&state_dir);
598
599        // Create a session with 2 commands and save it via the store
600        let mut session = Session::new("original-name");
601        session.commands.push(Command::new(
602            0,
603            "echo hello".to_string(),
604            std::path::PathBuf::from("/tmp"),
605        ));
606        session.commands.push(Command::new(
607            1,
608            "ls -la".to_string(),
609            std::path::PathBuf::from("/tmp"),
610        ));
611        let session_id = session.header.id;
612        store.save(&session).unwrap();
613
614        // Create stale lock and state files pointing to this session
615        let fake_pid = 4_000_000_000u32;
616        fs::write(&rec_state.lock_path, fake_pid.to_string()).unwrap();
617        let active = ActiveSession {
618            id: session_id,
619            name: "original-name".to_string(),
620            session_path: paths.session_file(&session_id.to_string()),
621            started_at: 0.0,
622            pid: fake_pid,
623        };
624        fs::write(
625            &rec_state.state_path,
626            serde_json::to_string(&active).unwrap(),
627        )
628        .unwrap();
629
630        // Call cleanup with store — should recover
631        let result = rec_state.cleanup_stale_lock(Some(&store)).unwrap();
632
633        // Verify lock and state files removed
634        assert!(!rec_state.lock_path.exists());
635        assert!(!rec_state.state_path.exists());
636
637        // Verify recovery info
638        let info = result.expect("Should return RecoveryInfo");
639        assert_eq!(info.dead_pid, fake_pid);
640        assert_eq!(info.command_count, 2);
641        assert!(
642            info.recovered_name.is_some(),
643            "Should have recovered_name for session with commands"
644        );
645        let recovered_name = info.recovered_name.unwrap();
646        assert!(
647            recovered_name.starts_with("recovered-"),
648            "Name should start with 'recovered-': {recovered_name}"
649        );
650
651        // Verify the session file was updated
652        let loaded = store.load(&session_id.to_string()).unwrap();
653        assert!(
654            loaded.header.name.starts_with("recovered-"),
655            "Session name should be updated"
656        );
657        assert_eq!(loaded.header.recovered, Some(true));
658        assert_eq!(loaded.commands.len(), 2);
659    }
660
661    #[test]
662    fn test_cleanup_stale_lock_discards_empty_session() {
663        let tmp = TempDir::new().unwrap();
664        let paths = create_test_paths(&tmp);
665        let store = SessionStore::new(paths.clone());
666
667        // Create state dir for RecordingState
668        let state_dir = tmp.path().join("state");
669        fs::create_dir_all(&state_dir).unwrap();
670        let rec_state = RecordingState::new(&state_dir);
671
672        // Create a session with 0 commands
673        let session = Session::new("empty-session");
674        let session_id = session.header.id;
675        store.save(&session).unwrap();
676
677        // Verify session file exists
678        assert!(store.exists(&session_id.to_string()));
679
680        // Create stale lock and state files
681        let fake_pid = 4_000_000_000u32;
682        fs::write(&rec_state.lock_path, fake_pid.to_string()).unwrap();
683        let active = ActiveSession {
684            id: session_id,
685            name: "empty-session".to_string(),
686            session_path: paths.session_file(&session_id.to_string()),
687            started_at: 0.0,
688            pid: fake_pid,
689        };
690        fs::write(
691            &rec_state.state_path,
692            serde_json::to_string(&active).unwrap(),
693        )
694        .unwrap();
695
696        // Call cleanup with store — should discard
697        let result = rec_state.cleanup_stale_lock(Some(&store)).unwrap();
698
699        // Verify lock and state files removed
700        assert!(!rec_state.lock_path.exists());
701        assert!(!rec_state.state_path.exists());
702
703        // Verify recovery info
704        let info = result.expect("Should return RecoveryInfo");
705        assert_eq!(info.dead_pid, fake_pid);
706        assert_eq!(info.command_count, 0);
707        assert!(
708            info.recovered_name.is_none(),
709            "Should not have recovered_name for empty session"
710        );
711
712        // Verify the session file was deleted
713        assert!(
714            !store.exists(&session_id.to_string()),
715            "Empty session file should be deleted"
716        );
717    }
718
719    #[test]
720    fn test_cleanup_stale_lock_without_store_backward_compat() {
721        let (tmp, state) = setup();
722
723        // Create stale lock with dead PID
724        let fake_pid = 4_000_000_000u32;
725        fs::write(&state.lock_path, fake_pid.to_string()).unwrap();
726
727        let fake_session = ActiveSession {
728            id: Uuid::new_v4(),
729            name: "stale-compat".to_string(),
730            session_path: tmp.path().join("stale.ndjson"),
731            started_at: 0.0,
732            pid: fake_pid,
733        };
734        fs::write(
735            &state.state_path,
736            serde_json::to_string(&fake_session).unwrap(),
737        )
738        .unwrap();
739
740        // Call cleanup without store (backward compat)
741        let result = state.cleanup_stale_lock(None).unwrap();
742
743        // Verify lock and state files removed
744        assert!(!state.lock_path.exists());
745        assert!(!state.state_path.exists());
746
747        // Verify recovery info — dead_pid only, no recovery
748        let info = result.expect("Should return RecoveryInfo");
749        assert_eq!(info.dead_pid, fake_pid);
750        assert!(info.recovered_name.is_none());
751        assert_eq!(info.command_count, 0);
752    }
753}