xchecker_lock/
lib.rs

1//! File locking system for xchecker with advisory semantics and crash recovery
2//!
3//! This module provides exclusive file locking per spec ID directory to prevent
4//! concurrent execution. The locking is advisory and coordinates xchecker processes
5//! but is not a security boundary.
6
7use anyhow::Result;
8use camino::Utf8PathBuf;
9use chrono::{DateTime, Utc};
10use fd_lock::RwLock;
11use serde::{Deserialize, Serialize};
12use std::cell::RefCell;
13use std::fs;
14use std::io::{self, Write};
15use std::path::{Path, PathBuf};
16use std::process;
17use std::time::{SystemTime, UNIX_EPOCH};
18
19// Thread-local override used only in tests to avoid process-global env races.
20thread_local! {
21    static THREAD_HOME: RefCell<Option<Utf8PathBuf>> = const { RefCell::new(None) };
22}
23
24/// Default age threshold for considering a lock stale (in seconds)
25const DEFAULT_STALE_THRESHOLD_SECS: u64 = 3600; // 1 hour
26
27/// Lock information stored in the lock file
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct LockInfo {
30    /// Process ID that created the lock
31    pub pid: u32,
32    /// Process start time (seconds since UNIX epoch)
33    pub start_time: u64,
34    /// Timestamp when the lock was created (seconds since UNIX epoch)
35    pub created_at: u64,
36    /// Spec ID being locked
37    pub spec_id: String,
38    /// xchecker version that created the lock
39    pub xchecker_version: String,
40}
41
42/// `XChecker` lockfile for reproducibility tracking (schema v1)
43/// Pins model, CLI version, and schema version to detect drift
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct XCheckerLock {
46    /// Schema version for this lockfile format
47    pub schema_version: String,
48    /// RFC3339 UTC timestamp when the lockfile was created
49    pub created_at: DateTime<Utc>,
50    /// Full model name that was used (e.g., "haiku")
51    pub model_full_name: String,
52    /// Claude CLI version that was used
53    pub claude_cli_version: String,
54}
55
56/// Context for current run to compare against lockfile
57#[derive(Debug, Clone)]
58pub struct RunContext {
59    pub model_full_name: String,
60    pub claude_cli_version: String,
61    pub schema_version: String,
62}
63
64/// Drift pair showing locked vs current value
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct DriftPair {
67    /// Value from lockfile
68    pub locked: String,
69    /// Current value
70    pub current: String,
71}
72
73/// Lock drift information
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct LockDrift {
76    /// Model full name drift
77    pub model_full_name: Option<DriftPair>,
78    /// Claude CLI version drift
79    pub claude_cli_version: Option<DriftPair>,
80    /// Schema version drift
81    pub schema_version: Option<DriftPair>,
82}
83
84/// Lock errors for file locking operations
85#[derive(Debug, thiserror::Error)]
86pub enum LockError {
87    #[error(
88        "Concurrent execution detected for spec '{spec_id}' (PID {pid}, created {created_ago} ago)"
89    )]
90    ConcurrentExecution {
91        spec_id: String,
92        pid: u32,
93        created_ago: String,
94    },
95
96    #[error(
97        "Stale lock detected for spec '{spec_id}' (PID {pid}, age {age_secs}s). Use --force to override"
98    )]
99    StaleLock {
100        spec_id: String,
101        pid: u32,
102        age_secs: u64,
103    },
104
105    #[error("Lock file is corrupted or invalid: {reason}")]
106    CorruptedLock { reason: String },
107
108    #[error("Failed to acquire lock: {reason}")]
109    AcquisitionFailed { reason: String },
110
111    #[error("Failed to release lock: {reason}")]
112    ReleaseFailed { reason: String },
113
114    #[error("IO error during lock operation: {0}")]
115    Io(#[from] io::Error),
116}
117
118/// Write file atomically using a temporary file and atomic rename
119///
120/// This is a simplified version of atomic_write that doesn't depend on xchecker-utils
121fn write_file_atomic(path: &Utf8PathBuf, content: &str) -> Result<(), io::Error> {
122    let parent = path
123        .parent()
124        .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "No parent directory"))?;
125
126    // Ensure parent directory exists
127    fs::create_dir_all(parent)?;
128
129    // Create temporary file in the same directory
130    let temp_path = parent.join(format!(".{}.tmp", path.file_name().unwrap_or("file")));
131
132    // Write content to temporary file
133    fs::write(&temp_path, content)?;
134
135    // Atomically rename temporary file to target path
136    fs::rename(&temp_path, path)?;
137
138    Ok(())
139}
140
141/// Get the spec root directory for a given spec ID
142///
143/// This is a simplified version of paths::spec_root that doesn't depend on xchecker-utils
144fn xchecker_home() -> Utf8PathBuf {
145    if let Some(tl) = THREAD_HOME.with(|tl| tl.borrow().clone()) {
146        return tl;
147    }
148    if let Ok(p) = std::env::var("XCHECKER_HOME") {
149        return Utf8PathBuf::from(p);
150    }
151    Utf8PathBuf::from(".xchecker")
152}
153
154/// Get the spec root directory for a given spec ID
155///
156/// This mirrors xchecker-utils path resolution to keep lock paths consistent.
157fn spec_root(spec_id: &str) -> Utf8PathBuf {
158    xchecker_home().join("specs").join(spec_id)
159}
160
161/// Ensure a directory exists, creating it if necessary
162///
163/// This is a simplified version of paths::ensure_dir_all that doesn't depend on xchecker-utils
164fn ensure_dir_all(path: &Utf8PathBuf) -> Result<(), io::Error> {
165    if !path.as_std_path().exists() {
166        fs::create_dir_all(path.as_std_path())?;
167    }
168    Ok(())
169}
170
171/// Set a thread-local override for XCHECKER_HOME during tests.
172#[cfg(any(test, feature = "test-utils"))]
173pub fn set_thread_home_for_tests(path: Utf8PathBuf) {
174    THREAD_HOME.with(|tl| *tl.borrow_mut() = Some(path));
175}
176
177/// Set up an isolated home directory for testing.
178///
179/// This avoids process-global environment changes by using thread-local state.
180#[cfg(test)]
181pub fn with_isolated_home() -> tempfile::TempDir {
182    let td = tempfile::TempDir::new().expect("Failed to create temp dir");
183    let p = Utf8PathBuf::from_path_buf(td.path().to_path_buf()).unwrap();
184    set_thread_home_for_tests(p);
185    td
186}
187
188impl XCheckerLock {
189    /// Create a new lockfile with current context
190    #[must_use]
191    pub fn new(model_full_name: String, claude_cli_version: String) -> Self {
192        Self {
193            schema_version: "1".to_string(),
194            created_at: Utc::now(),
195            model_full_name,
196            claude_cli_version,
197        }
198    }
199
200    /// Detect drift between locked values and current run context
201    /// Returns None if no drift detected, Some(LockDrift) if drift exists
202    #[must_use]
203    pub fn detect_drift(&self, current: &RunContext) -> Option<LockDrift> {
204        let mut drift = LockDrift {
205            model_full_name: None,
206            claude_cli_version: None,
207            schema_version: None,
208        };
209
210        // Check model drift
211        if self.model_full_name != current.model_full_name {
212            drift.model_full_name = Some(DriftPair {
213                locked: self.model_full_name.clone(),
214                current: current.model_full_name.clone(),
215            });
216        }
217
218        // Check Claude CLI version drift
219        if self.claude_cli_version != current.claude_cli_version {
220            drift.claude_cli_version = Some(DriftPair {
221                locked: self.claude_cli_version.clone(),
222                current: current.claude_cli_version.clone(),
223            });
224        }
225
226        // Check schema version drift
227        if self.schema_version != current.schema_version {
228            drift.schema_version = Some(DriftPair {
229                locked: self.schema_version.clone(),
230                current: current.schema_version.clone(),
231            });
232        }
233
234        // Return None if no drift detected
235        if drift.model_full_name.is_none()
236            && drift.claude_cli_version.is_none()
237            && drift.schema_version.is_none()
238        {
239            None
240        } else {
241            Some(drift)
242        }
243    }
244
245    /// Load lockfile from spec directory
246    pub fn load(spec_id: &str) -> Result<Option<Self>, io::Error> {
247        let lock_path = Self::get_lock_path(spec_id);
248
249        if !lock_path.exists() {
250            return Ok(None);
251        }
252
253        let content = fs::read_to_string(&lock_path)?;
254        let lock: Self = serde_json::from_str(&content)
255            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
256
257        Ok(Some(lock))
258    }
259
260    /// Save lockfile to spec directory
261    pub fn save(&self, spec_id: &str) -> Result<(), io::Error> {
262        let lock_path = Self::get_lock_path_utf8(spec_id);
263
264        let json = serde_json::to_string_pretty(self)
265            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
266
267        write_file_atomic(&lock_path, &json).map_err(io::Error::other)?;
268
269        Ok(())
270    }
271
272    /// Get the path to the lockfile for a spec ID
273    fn get_lock_path(spec_id: &str) -> PathBuf {
274        Self::get_lock_path_utf8(spec_id).into_std_path_buf()
275    }
276
277    /// Get the UTF-8 path to the lockfile for a spec ID
278    fn get_lock_path_utf8(spec_id: &str) -> Utf8PathBuf {
279        spec_root(spec_id).join("lock.json")
280    }
281}
282
283/// File lock manager for spec directories
284pub struct FileLock {
285    /// Path to the lock file
286    lock_path: PathBuf,
287    /// File descriptor lock (held while active)
288    _fd_lock: Option<Box<RwLock<fs::File>>>,
289    /// Lock information
290    lock_info: LockInfo,
291}
292
293impl FileLock {
294    /// Attempt to acquire an exclusive lock for the given spec ID
295    ///
296    /// Uses atomic O_EXCL/create_new semantics to prevent TOCTOU race conditions.
297    /// If the lock file already exists, validates the existing lock before deciding
298    /// whether to override it.
299    ///
300    /// # Arguments
301    /// * `spec_id` - The spec ID to lock
302    /// * `force` - Whether to override stale locks
303    /// * `ttl_seconds` - Time-to-live for lock staleness detection (None uses default)
304    ///
305    /// # Returns
306    /// * `Ok(FileLock)` - Successfully acquired lock
307    /// * `Err(LockError)` - Failed to acquire lock (concurrent execution, stale lock, etc.)
308    pub fn acquire(
309        spec_id: &str,
310        force: bool,
311        ttl_seconds: Option<u64>,
312    ) -> Result<Self, LockError> {
313        let spec_root = spec_root(spec_id);
314
315        // Ensure the spec directory exists (ignore benign races)
316        ensure_dir_all(&spec_root).map_err(|e| LockError::AcquisitionFailed {
317            reason: format!("Failed to create spec directory: {e}"),
318        })?;
319
320        let lock_path = Self::get_lock_path(spec_id);
321        let ttl = ttl_seconds.unwrap_or(DEFAULT_STALE_THRESHOLD_SECS);
322
323        // Attempt atomic lock acquisition with retries for stale lock handling
324        Self::acquire_with_retry(spec_id, &lock_path, force, ttl, 3)
325    }
326
327    /// Internal helper for atomic lock acquisition with retry logic
328    fn acquire_with_retry(
329        spec_id: &str,
330        lock_path: &Path,
331        force: bool,
332        ttl_seconds: u64,
333        max_retries: u32,
334    ) -> Result<Self, LockError> {
335        for attempt in 0..max_retries {
336            // Create lock info for this attempt
337            let lock_info = LockInfo {
338                pid: process::id(),
339                start_time: Self::get_process_start_time()?,
340                created_at: SystemTime::now()
341                    .duration_since(UNIX_EPOCH)
342                    .unwrap()
343                    .as_secs(),
344                spec_id: spec_id.to_string(),
345                xchecker_version: env!("CARGO_PKG_VERSION").to_string(),
346            };
347
348            // Attempt atomic file creation with O_EXCL semantics (create_new)
349            match fs::OpenOptions::new()
350                .create_new(true)
351                .write(true)
352                .open(lock_path)
353            {
354                Ok(lock_file) => {
355                    // Successfully created the file atomically - no race possible
356                    return Self::finalize_lock(lock_path.to_path_buf(), lock_file, lock_info);
357                }
358                Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {
359                    // Lock file exists - validate it
360                    match Self::check_existing_lock(lock_path, spec_id, force, ttl_seconds) {
361                        Ok(()) => {
362                            // Lock is stale/overridable - attempt atomic removal and retry
363                            match Self::try_remove_stale_lock(lock_path, spec_id) {
364                                Ok(()) => {
365                                    // Immediately attempt acquisition after removing stale lock
366                                    match fs::OpenOptions::new()
367                                        .create_new(true)
368                                        .write(true)
369                                        .open(lock_path)
370                                    {
371                                        Ok(lock_file) => {
372                                            return Self::finalize_lock(
373                                                lock_path.to_path_buf(),
374                                                lock_file,
375                                                lock_info,
376                                            );
377                                        }
378                                        Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {
379                                            // Another process grabbed it - apply backoff if retries remain
380                                            if attempt + 1 < max_retries {
381                                                let base_delay_ms = 10u64
382                                                    .saturating_mul(2u64.saturating_pow(attempt));
383                                                // Deterministic jitter based on PID to avoid lockstep retries
384                                                // without requiring RNG (0-6ms based on attempt and PID)
385                                                let jitter_ms = ((attempt as u64)
386                                                    .wrapping_mul(3)
387                                                    .wrapping_add((process::id() as u64) % 7))
388                                                    % 7;
389                                                let delay_ms =
390                                                    base_delay_ms.saturating_add(jitter_ms);
391                                                std::thread::sleep(
392                                                    std::time::Duration::from_millis(
393                                                        delay_ms.min(100),
394                                                    ),
395                                                );
396                                                continue;
397                                            }
398                                            // Max retries reached after another process grabbed lock
399                                            return Err(LockError::AcquisitionFailed {
400                                                reason: format!(
401                                                    "Max retries exceeded for spec '{}': another process acquired lock immediately after stale removal",
402                                                    spec_id
403                                                ),
404                                            });
405                                        }
406                                        Err(e) => {
407                                            return Err(LockError::AcquisitionFailed {
408                                                reason: format!(
409                                                    "Failed to create lock for spec '{}' after removing stale lock: {e}",
410                                                    spec_id
411                                                ),
412                                            });
413                                        }
414                                    }
415                                }
416                                Err(e) => {
417                                    // Propagate the specific stale-removal error
418                                    return Err(e);
419                                }
420                            }
421                        }
422                        Err(e) => return Err(e),
423                    }
424                }
425                Err(e) => {
426                    return Err(LockError::AcquisitionFailed {
427                        reason: format!(
428                            "Failed to create lock file for spec '{}' at '{}': {e}",
429                            spec_id,
430                            lock_path.display()
431                        ),
432                    });
433                }
434            }
435        }
436
437        // Reachable only when max_retries == 0 (edge case). All other paths return/continue
438        // within the loop. This provides a safety net for the zero-retry edge case.
439        Err(LockError::AcquisitionFailed {
440            reason: format!(
441                "Max retries ({}) exceeded for lock acquisition on spec '{}'",
442                max_retries, spec_id
443            ),
444        })
445    }
446
447    /// Finalize lock acquisition by writing lock info and acquiring fd_lock
448    fn finalize_lock(
449        lock_path: PathBuf,
450        lock_file: fs::File,
451        lock_info: LockInfo,
452    ) -> Result<Self, LockError> {
453        let lock_json =
454            serde_json::to_string_pretty(&lock_info).map_err(|e| LockError::AcquisitionFailed {
455                reason: format!(
456                    "Failed to serialize lock info for spec '{}': {e}",
457                    lock_info.spec_id
458                ),
459            })?;
460
461        // Acquire exclusive file descriptor lock and write in one step
462        let mut rw_lock = Box::new(RwLock::new(lock_file));
463        {
464            let fd_lock = rw_lock
465                .try_write()
466                .map_err(|_e| LockError::ConcurrentExecution {
467                    spec_id: lock_info.spec_id.clone(),
468                    pid: 0, // Unknown PID since we couldn't read the lock
469                    created_ago: "unknown".to_string(),
470                })?;
471
472            // Write to the locked file
473            let mut file_ref = &*fd_lock;
474            file_ref
475                .write_all(lock_json.as_bytes())
476                .map_err(|e| LockError::AcquisitionFailed {
477                    reason: format!(
478                        "Failed to write lock info for spec '{}': {e}",
479                        lock_info.spec_id
480                    ),
481                })?;
482            file_ref.flush().map_err(|e| LockError::AcquisitionFailed {
483                reason: format!(
484                    "Failed to flush lock file for spec '{}': {e}",
485                    lock_info.spec_id
486                ),
487            })?;
488
489            // Sync to disk for crash-resilience (small file, acceptable cost)
490            file_ref
491                .sync_all()
492                .map_err(|e| LockError::AcquisitionFailed {
493                    reason: format!(
494                        "Failed to sync lock file for spec '{}': {e}",
495                        lock_info.spec_id
496                    ),
497                })?;
498        }
499
500        Ok(Self {
501            lock_path,
502            _fd_lock: Some(rw_lock),
503            lock_info,
504        })
505    }
506
507    /// Attempt to remove a stale lock file atomically
508    ///
509    /// Uses rename-to-stale then delete pattern to minimize race window.
510    /// Treats `NotFound` as success since another process may have already removed it.
511    /// Includes PID in stale filename to prevent collision under high parallelism.
512    fn try_remove_stale_lock(lock_path: &Path, spec_id: &str) -> Result<(), LockError> {
513        let timestamp = SystemTime::now()
514            .duration_since(UNIX_EPOCH)
515            .unwrap()
516            .as_millis();
517        let pid = process::id();
518        let stale_path = lock_path.with_extension(format!("stale.{timestamp}.{pid}"));
519
520        // Atomic rename to mark as stale
521        match fs::rename(lock_path, &stale_path) {
522            Ok(()) => {
523                // Best-effort cleanup of stale file (ignore errors)
524                let _ = fs::remove_file(&stale_path);
525                Ok(())
526            }
527            Err(e) if e.kind() == io::ErrorKind::NotFound => {
528                // Another process already removed/renamed it - that's fine
529                Ok(())
530            }
531            Err(e) => Err(LockError::AcquisitionFailed {
532                reason: format!("Failed to rename stale lock for spec '{spec_id}': {e}"),
533            }),
534        }
535    }
536
537    /// Check if a lock exists for the given spec ID
538    #[must_use]
539    #[allow(dead_code)] // Lock introspection utility
540    pub fn exists(spec_id: &str) -> bool {
541        let lock_path = Self::get_lock_path(spec_id);
542        lock_path.exists()
543    }
544
545    /// Get information about an existing lock (if any)
546    pub fn get_lock_info(spec_id: &str) -> Result<Option<LockInfo>, LockError> {
547        let lock_path = Self::get_lock_path(spec_id);
548
549        if !lock_path.exists() {
550            return Ok(None);
551        }
552
553        let lock_content =
554            fs::read_to_string(&lock_path).map_err(|e| LockError::CorruptedLock {
555                reason: format!("Failed to read lock file: {e}"),
556            })?;
557
558        let lock_info: LockInfo =
559            serde_json::from_str(&lock_content).map_err(|e| LockError::CorruptedLock {
560                reason: format!("Failed to parse lock file: {e}"),
561            })?;
562
563        Ok(Some(lock_info))
564    }
565
566    /// Release the lock (called automatically on drop)
567    #[allow(dead_code)] // Lock management utility
568    pub fn release(mut self) -> Result<(), LockError> {
569        // Drop the file descriptor lock first
570        self._fd_lock.take();
571
572        // Remove the lock file
573        if self.lock_path.exists() {
574            fs::remove_file(&self.lock_path).map_err(|e| LockError::ReleaseFailed {
575                reason: format!("Failed to remove lock file: {e}"),
576            })?;
577        }
578
579        Ok(())
580    }
581
582    /// Get the spec ID for this lock
583    #[must_use]
584    #[allow(dead_code)] // Lock introspection utility
585    pub fn spec_id(&self) -> &str {
586        &self.lock_info.spec_id
587    }
588
589    /// Get the lock information
590    #[must_use]
591    #[allow(dead_code)] // Lock introspection utility
592    pub const fn lock_info(&self) -> &LockInfo {
593        &self.lock_info
594    }
595
596    /// Get the path to the lock file for a spec ID
597    fn get_lock_path(spec_id: &str) -> PathBuf {
598        spec_root(spec_id).as_std_path().join(".lock")
599    }
600
601    /// Check an existing lock and determine if it should be overridden
602    ///
603    /// Includes retry logic for empty/partial lockfile reads to handle the case where
604    /// another process has just created the file but hasn't written content yet.
605    fn check_existing_lock(
606        lock_path: &Path,
607        spec_id: &str,
608        force: bool,
609        ttl_seconds: u64,
610    ) -> Result<(), LockError> {
611        // Retry parameters for handling concurrent initialization
612        const MAX_READ_RETRIES: u32 = 3;
613        const READ_RETRY_DELAY_MS: u64 = 10;
614
615        for attempt in 0..MAX_READ_RETRIES {
616            let lock_content = match fs::read_to_string(lock_path) {
617                Ok(content) => content,
618                Err(e) if e.kind() == io::ErrorKind::NotFound => {
619                    // Lock was removed between create_new(AlreadyExists) and read.
620                    // Treat as "no lock"; caller will retry acquisition.
621                    return Ok(());
622                }
623                Err(e) => {
624                    // IO errors during read might be transient (file being written)
625                    if attempt + 1 < MAX_READ_RETRIES {
626                        std::thread::sleep(std::time::Duration::from_millis(READ_RETRY_DELAY_MS));
627                        continue;
628                    }
629                    return Err(LockError::CorruptedLock {
630                        reason: format!("Failed to read existing lock for spec '{}': {e}", spec_id),
631                    });
632                }
633            };
634
635            // Check for empty content (file exists but not yet written)
636            if lock_content.is_empty() {
637                if attempt + 1 < MAX_READ_RETRIES {
638                    std::thread::sleep(std::time::Duration::from_millis(READ_RETRY_DELAY_MS));
639                    continue;
640                }
641                return Err(LockError::CorruptedLock {
642                    reason: format!(
643                        "Lock file for spec '{}' is empty (may be initializing)",
644                        spec_id
645                    ),
646                });
647            }
648
649            // Try to parse the JSON content
650            match serde_json::from_str::<LockInfo>(&lock_content) {
651                Ok(existing_lock) => {
652                    // Successfully parsed - proceed with lock validation
653                    return Self::validate_existing_lock(
654                        &existing_lock,
655                        spec_id,
656                        force,
657                        ttl_seconds,
658                    );
659                }
660                Err(e) => {
661                    // Check if this looks like a partial/incomplete JSON (EOF error)
662                    let is_likely_incomplete = e.is_eof()
663                        || lock_content.trim().is_empty()
664                        || (lock_content.starts_with('{') && !lock_content.contains('}'));
665
666                    // Only retry if it looks like the file might be mid-write
667                    if is_likely_incomplete && attempt + 1 < MAX_READ_RETRIES {
668                        std::thread::sleep(std::time::Duration::from_millis(READ_RETRY_DELAY_MS));
669                        continue;
670                    }
671
672                    return Err(LockError::CorruptedLock {
673                        reason: format!(
674                            "Failed to parse existing lock for spec '{}': {e}",
675                            spec_id
676                        ),
677                    });
678                }
679            }
680        }
681        // Note: This is unreachable since MAX_READ_RETRIES > 0 and all paths return/continue.
682        // Kept for safety if MAX_READ_RETRIES is ever changed to 0.
683        unreachable!("check_existing_lock loop exhausted without returning")
684    }
685
686    /// Validate an existing lock and determine if it should be overridden
687    fn validate_existing_lock(
688        existing_lock: &LockInfo,
689        spec_id: &str,
690        force: bool,
691        ttl_seconds: u64,
692    ) -> Result<(), LockError> {
693        // Calculate lock age (handle future timestamps gracefully - clock skew)
694        let now_secs = SystemTime::now()
695            .duration_since(UNIX_EPOCH)
696            .unwrap()
697            .as_secs();
698
699        let lock_age = now_secs.saturating_sub(existing_lock.created_at);
700
701        let is_stale = lock_age > ttl_seconds;
702
703        // Check if the process is still running
704        if Self::is_process_running(existing_lock.pid) {
705            // Process is running - this is a fresh lock
706            if !force {
707                let created_ago = Self::format_duration_since(existing_lock.created_at);
708                return Err(LockError::ConcurrentExecution {
709                    spec_id: spec_id.to_string(),
710                    pid: existing_lock.pid,
711                    created_ago,
712                });
713            }
714            // Force allows overriding even fresh locks
715            return Ok(());
716        }
717
718        // Process is not running - check staleness
719        if is_stale {
720            if force {
721                // Force flag allows overriding stale locks
722                Ok(())
723            } else {
724                Err(LockError::StaleLock {
725                    spec_id: spec_id.to_string(),
726                    pid: existing_lock.pid,
727                    age_secs: lock_age,
728                })
729            }
730        } else {
731            // Lock is recent but process is dead - fail without force
732            if force {
733                Ok(())
734            } else {
735                let created_ago = Self::format_duration_since(existing_lock.created_at);
736                Err(LockError::ConcurrentExecution {
737                    spec_id: spec_id.to_string(),
738                    pid: existing_lock.pid,
739                    created_ago,
740                })
741            }
742        }
743    }
744
745    /// Check if a process with the given PID is still running
746    fn is_process_running(pid: u32) -> bool {
747        #[cfg(unix)]
748        {
749            // On Unix systems, use kill(pid, 0) to check if process exists
750            // Returns 0 if process exists and we can signal it
751            // Returns -1 with ESRCH if process doesn't exist
752            // Returns -1 with EPERM if process exists but we lack permission
753            let rc = unsafe { libc::kill(pid as i32, 0) };
754            if rc == 0 {
755                true
756            } else {
757                // If EPERM, the process exists but we can't signal it
758                matches!(
759                    io::Error::last_os_error().raw_os_error(),
760                    Some(code) if code == libc::EPERM
761                )
762            }
763        }
764
765        #[cfg(windows)]
766        {
767            // On Windows, try to open the process handle and check if it's still running
768            use winapi::um::handleapi::CloseHandle;
769            use winapi::um::minwinbase::STILL_ACTIVE;
770            use winapi::um::processthreadsapi::{GetExitCodeProcess, OpenProcess};
771            use winapi::um::winnt::PROCESS_QUERY_LIMITED_INFORMATION;
772
773            unsafe {
774                // Use PROCESS_QUERY_LIMITED_INFORMATION which is sufficient for GetExitCodeProcess
775                // and works with more processes than PROCESS_QUERY_INFORMATION
776                let handle = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid);
777                if handle.is_null() {
778                    return false;
779                }
780
781                // Check if the process is still running by getting its exit code
782                let mut exit_code: u32 = 0;
783                let result = GetExitCodeProcess(handle, &mut exit_code);
784
785                // If GetExitCodeProcess fails, assume process is not running
786                if result == 0 {
787                    CloseHandle(handle);
788                    return false;
789                }
790
791                // STILL_ACTIVE (259) means the process is still running
792                CloseHandle(handle);
793                exit_code == STILL_ACTIVE
794            }
795        }
796
797        #[cfg(not(any(unix, windows)))]
798        {
799            // Fallback: assume process is running (conservative approach)
800            true
801        }
802    }
803
804    /// Get the start time of the current process (best effort)
805    fn get_process_start_time() -> Result<u64, LockError> {
806        // This is a best-effort implementation
807        // In practice, we use the current time as an approximation
808        Ok(SystemTime::now()
809            .duration_since(UNIX_EPOCH)
810            .unwrap()
811            .as_secs())
812    }
813
814    /// Format a duration since a timestamp in a human-readable way
815    fn format_duration_since(timestamp: u64) -> String {
816        let now = SystemTime::now()
817            .duration_since(UNIX_EPOCH)
818            .unwrap()
819            .as_secs();
820
821        let duration = now.saturating_sub(timestamp);
822
823        if duration < 60 {
824            format!("{duration}s")
825        } else if duration < 3600 {
826            format!("{}m", duration / 60)
827        } else if duration < 86400 {
828            format!("{}h", duration / 3600)
829        } else {
830            format!("{}d", duration / 86400)
831        }
832    }
833}
834
835impl std::fmt::Debug for FileLock {
836    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
837        f.debug_struct("FileLock")
838            .field("lock_path", &self.lock_path)
839            .field("lock_info", &self.lock_info)
840            .field("_fd_lock", &"<RwLock>")
841            .finish()
842    }
843}
844
845impl Drop for FileLock {
846    /// Automatically release the lock when the `FileLock` is dropped
847    fn drop(&mut self) {
848        // Drop the file descriptor lock first
849        self._fd_lock.take();
850
851        // Remove the lock file (ignore errors in drop)
852        if self.lock_path.exists() {
853            let _ = fs::remove_file(&self.lock_path);
854        }
855    }
856}
857
858/// Utility functions for lock management
859pub mod utils {
860    use super::{
861        DEFAULT_STALE_THRESHOLD_SECS, FileLock, LockError, Result, SystemTime, UNIX_EPOCH, fs,
862    };
863
864    /// Check if clean operation should be allowed (no active locks unless forced)
865    pub fn can_clean(
866        spec_id: &str,
867        force: bool,
868        ttl_seconds: Option<u64>,
869    ) -> Result<(), LockError> {
870        let ttl = ttl_seconds.unwrap_or(DEFAULT_STALE_THRESHOLD_SECS);
871        if let Some(lock_info) = FileLock::get_lock_info(spec_id)? {
872            if FileLock::is_process_running(lock_info.pid) {
873                if force {
874                    // Force flag allows cleaning even with active locks (--hard --force overrides active locks)
875                    return Ok(());
876                }
877                return Err(LockError::ConcurrentExecution {
878                    spec_id: spec_id.to_string(),
879                    pid: lock_info.pid,
880                    created_ago: FileLock::format_duration_since(lock_info.created_at),
881                });
882            }
883
884            // Process is dead, check if we should allow cleaning
885            if !force {
886                let lock_age = SystemTime::now()
887                    .duration_since(UNIX_EPOCH)
888                    .unwrap()
889                    .as_secs()
890                    - lock_info.created_at;
891
892                if lock_age <= ttl {
893                    return Err(LockError::StaleLock {
894                        spec_id: spec_id.to_string(),
895                        pid: lock_info.pid,
896                        age_secs: lock_age,
897                    });
898                }
899            }
900        }
901
902        Ok(())
903    }
904
905    /// Force remove a lock file (for emergency cleanup)
906    #[allow(dead_code)] // Lock cleanup utility for CLI commands
907    pub fn force_remove_lock(spec_id: &str) -> Result<(), LockError> {
908        let lock_path = FileLock::get_lock_path(spec_id);
909
910        if lock_path.exists() {
911            fs::remove_file(&lock_path).map_err(|e| LockError::ReleaseFailed {
912                reason: format!("Failed to force remove lock: {e}"),
913            })?;
914        }
915
916        Ok(())
917    }
918}
919
920#[cfg(test)]
921mod tests {
922    use super::*;
923
924    use std::fs;
925    use tempfile::TempDir;
926
927    fn setup_test_env() -> TempDir {
928        with_isolated_home()
929    }
930
931    #[test]
932    fn test_lock_acquisition_and_release() {
933        let _temp_dir = setup_test_env();
934
935        let spec_id = "test-spec-acquisition-123";
936
937        // Should be able to acquire lock
938        let lock = FileLock::acquire(spec_id, false, None).unwrap();
939        assert_eq!(lock.spec_id(), spec_id);
940
941        // The lock file should exist while the lock is held
942        let lock_path = FileLock::get_lock_path(spec_id);
943        assert!(
944            lock_path.exists(),
945            "Lock file should exist at: {lock_path:?}"
946        );
947        assert!(FileLock::exists(spec_id));
948
949        // Should not be able to acquire another lock for same spec
950        let result = FileLock::acquire(spec_id, false, None);
951        assert!(result.is_err());
952
953        // Release the lock
954        lock.release().unwrap();
955        assert!(!FileLock::exists(spec_id));
956
957        // Should be able to acquire again after release
958        let _lock2 = FileLock::acquire(spec_id, false, None).unwrap();
959    }
960
961    #[test]
962    fn test_lock_info_serialization() {
963        let _temp_dir = setup_test_env();
964
965        let spec_id = "test-spec-serialization-456";
966        let _lock = FileLock::acquire(spec_id, false, None).unwrap();
967
968        // Should be able to read lock info
969        let lock_info = FileLock::get_lock_info(spec_id).unwrap().unwrap();
970        assert_eq!(lock_info.spec_id, spec_id);
971        assert_eq!(lock_info.pid, process::id());
972        assert!(!lock_info.xchecker_version.is_empty());
973    }
974
975    #[test]
976    fn test_automatic_cleanup_on_drop() {
977        let _temp_dir = setup_test_env();
978
979        let spec_id = "test-spec-cleanup-789";
980
981        {
982            let _lock = FileLock::acquire(spec_id, false, None).unwrap();
983            assert!(FileLock::exists(spec_id));
984        } // lock goes out of scope here
985
986        // Lock should be automatically cleaned up
987        assert!(!FileLock::exists(spec_id));
988    }
989
990    #[test]
991    fn test_force_override_stale_lock() {
992        let _temp_dir = setup_test_env();
993
994        let spec_id = "test-spec-stale-override";
995
996        // Create a lock file manually with old timestamp
997        let lock_path = FileLock::get_lock_path(spec_id);
998        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
999
1000        let old_lock_info = LockInfo {
1001            pid: 99999, // Non-existent PID
1002            start_time: 0,
1003            created_at: 0, // Very old timestamp
1004            spec_id: spec_id.to_string(),
1005            xchecker_version: "0.1.0".to_string(),
1006        };
1007
1008        let lock_json = serde_json::to_string_pretty(&old_lock_info).unwrap();
1009        fs::write(&lock_path, lock_json).unwrap();
1010
1011        // Should fail without force
1012        let result = FileLock::acquire(spec_id, false, None);
1013        assert!(result.is_err());
1014        assert!(matches!(result.unwrap_err(), LockError::StaleLock { .. }));
1015
1016        // Should succeed with force
1017        let lock = FileLock::acquire(spec_id, true, None).unwrap();
1018        assert_eq!(lock.spec_id(), spec_id);
1019    }
1020
1021    #[test]
1022    fn test_clean_operation_checks() {
1023        let _temp_dir = setup_test_env();
1024
1025        let spec_id = "test-spec-clean-checks";
1026
1027        // Should be able to clean when no lock exists
1028        assert!(utils::can_clean(spec_id, false, None).is_ok());
1029
1030        // Acquire a lock
1031        let _lock = FileLock::acquire(spec_id, false, None).unwrap();
1032
1033        // Should not be able to clean with active lock
1034        let result = utils::can_clean(spec_id, false, None);
1035        assert!(result.is_err());
1036        assert!(matches!(
1037            result.unwrap_err(),
1038            LockError::ConcurrentExecution { .. }
1039        ));
1040
1041        // Should be able to clean with force (--hard --force overrides active locks)
1042        assert!(utils::can_clean(spec_id, true, None).is_ok());
1043    }
1044
1045    #[test]
1046    fn test_lock_path_generation() {
1047        let _temp_dir = setup_test_env();
1048
1049        let spec_id = "my-test-spec";
1050        let expected_path = spec_root(spec_id).as_std_path().join(".lock");
1051        assert_eq!(FileLock::get_lock_path(spec_id), expected_path);
1052    }
1053
1054    #[test]
1055    fn test_duration_formatting() {
1056        assert_eq!(
1057            FileLock::format_duration_since(
1058                SystemTime::now()
1059                    .duration_since(UNIX_EPOCH)
1060                    .unwrap()
1061                    .as_secs()
1062                    - 30
1063            ),
1064            "30s"
1065        );
1066        assert_eq!(
1067            FileLock::format_duration_since(
1068                SystemTime::now()
1069                    .duration_since(UNIX_EPOCH)
1070                    .unwrap()
1071                    .as_secs()
1072                    - 120
1073            ),
1074            "2m"
1075        );
1076        assert_eq!(
1077            FileLock::format_duration_since(
1078                SystemTime::now()
1079                    .duration_since(UNIX_EPOCH)
1080                    .unwrap()
1081                    .as_secs()
1082                    - 7200
1083            ),
1084            "2h"
1085        );
1086    }
1087
1088    #[test]
1089    fn test_xchecker_lock_creation() {
1090        let lock = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1091
1092        assert_eq!(lock.schema_version, "1");
1093        assert_eq!(lock.model_full_name, "haiku");
1094        assert_eq!(lock.claude_cli_version, "0.8.1");
1095    }
1096
1097    #[test]
1098    fn test_xchecker_lock_no_drift() {
1099        let lock = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1100
1101        let context = RunContext {
1102            model_full_name: "haiku".to_string(),
1103            claude_cli_version: "0.8.1".to_string(),
1104            schema_version: "1".to_string(),
1105        };
1106
1107        let drift = lock.detect_drift(&context);
1108        assert!(drift.is_none(), "Expected no drift when values match");
1109    }
1110
1111    #[test]
1112    fn test_xchecker_lock_model_drift() {
1113        let lock = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1114
1115        let context = RunContext {
1116            model_full_name: "sonnet".to_string(),
1117            claude_cli_version: "0.8.1".to_string(),
1118            schema_version: "1".to_string(),
1119        };
1120
1121        let drift = lock.detect_drift(&context).expect("Expected drift");
1122        assert!(drift.model_full_name.is_some());
1123        assert!(drift.claude_cli_version.is_none());
1124        assert!(drift.schema_version.is_none());
1125
1126        let model_drift = drift.model_full_name.unwrap();
1127        assert_eq!(model_drift.locked, "haiku");
1128        assert_eq!(model_drift.current, "sonnet");
1129    }
1130
1131    #[test]
1132    fn test_xchecker_lock_cli_version_drift() {
1133        let lock = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1134
1135        let context = RunContext {
1136            model_full_name: "haiku".to_string(),
1137            claude_cli_version: "0.9.0".to_string(),
1138            schema_version: "1".to_string(),
1139        };
1140
1141        let drift = lock.detect_drift(&context).expect("Expected drift");
1142        assert!(drift.model_full_name.is_none());
1143        assert!(drift.claude_cli_version.is_some());
1144        assert!(drift.schema_version.is_none());
1145
1146        let cli_drift = drift.claude_cli_version.unwrap();
1147        assert_eq!(cli_drift.locked, "0.8.1");
1148        assert_eq!(cli_drift.current, "0.9.0");
1149    }
1150
1151    #[test]
1152    fn test_xchecker_lock_schema_version_drift() {
1153        let lock = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1154
1155        let context = RunContext {
1156            model_full_name: "haiku".to_string(),
1157            claude_cli_version: "0.8.1".to_string(),
1158            schema_version: "2".to_string(),
1159        };
1160
1161        let drift = lock.detect_drift(&context).expect("Expected drift");
1162        assert!(drift.model_full_name.is_none());
1163        assert!(drift.claude_cli_version.is_none());
1164        assert!(drift.schema_version.is_some());
1165
1166        let schema_drift = drift.schema_version.unwrap();
1167        assert_eq!(schema_drift.locked, "1");
1168        assert_eq!(schema_drift.current, "2");
1169    }
1170
1171    #[test]
1172    fn test_xchecker_lock_multiple_drift() {
1173        let lock = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1174
1175        let context = RunContext {
1176            model_full_name: "sonnet".to_string(),
1177            claude_cli_version: "0.9.0".to_string(),
1178            schema_version: "2".to_string(),
1179        };
1180
1181        let drift = lock.detect_drift(&context).expect("Expected drift");
1182        assert!(drift.model_full_name.is_some());
1183        assert!(drift.claude_cli_version.is_some());
1184        assert!(drift.schema_version.is_some());
1185    }
1186
1187    #[test]
1188    fn test_xchecker_lock_save_and_load() {
1189        let _temp_dir = setup_test_env();
1190
1191        let spec_id = "test-spec-lockfile";
1192        let lock = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1193
1194        // Save lockfile
1195        lock.save(spec_id).expect("Failed to save lockfile");
1196
1197        // Load lockfile
1198        let loaded = XCheckerLock::load(spec_id)
1199            .expect("Failed to load lockfile")
1200            .expect("Lockfile should exist");
1201
1202        assert_eq!(loaded.schema_version, lock.schema_version);
1203        assert_eq!(loaded.model_full_name, lock.model_full_name);
1204        assert_eq!(loaded.claude_cli_version, lock.claude_cli_version);
1205    }
1206
1207    #[test]
1208    fn test_xchecker_lock_load_nonexistent() {
1209        let _temp_dir = setup_test_env();
1210
1211        let spec_id = "nonexistent-spec";
1212        let loaded = XCheckerLock::load(spec_id).expect("Load should succeed");
1213
1214        assert!(
1215            loaded.is_none(),
1216            "Should return None for nonexistent lockfile"
1217        );
1218    }
1219
1220    #[test]
1221    fn test_xchecker_lock_corrupted_file() {
1222        let _temp_dir = setup_test_env();
1223
1224        let spec_id = "test-spec-corrupted";
1225        let lock_path = XCheckerLock::get_lock_path(spec_id);
1226
1227        // Create spec directory
1228        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1229
1230        // Write corrupted JSON
1231        fs::write(&lock_path, "{ invalid json }").unwrap();
1232
1233        // Should return error for corrupted file
1234        let result = XCheckerLock::load(spec_id);
1235        assert!(result.is_err(), "Should fail to load corrupted lockfile");
1236    }
1237
1238    #[test]
1239    fn test_xchecker_lock_empty_file() {
1240        let _temp_dir = setup_test_env();
1241
1242        let spec_id = "test-spec-empty";
1243        let lock_path = XCheckerLock::get_lock_path(spec_id);
1244
1245        // Create spec directory
1246        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1247
1248        // Write empty file
1249        fs::write(&lock_path, "").unwrap();
1250
1251        // Should return error for empty file
1252        let result = XCheckerLock::load(spec_id);
1253        assert!(result.is_err(), "Should fail to load empty lockfile");
1254    }
1255
1256    #[test]
1257    fn test_xchecker_lock_overwrite_existing() {
1258        let _temp_dir = setup_test_env();
1259
1260        let spec_id = "test-spec-overwrite";
1261
1262        // Create first lockfile
1263        let lock1 = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1264        lock1.save(spec_id).unwrap();
1265
1266        // Create second lockfile with different values
1267        let lock2 = XCheckerLock::new("sonnet".to_string(), "0.9.0".to_string());
1268        lock2.save(spec_id).unwrap();
1269
1270        // Load and verify it has the second lockfile's values
1271        let loaded = XCheckerLock::load(spec_id)
1272            .expect("Failed to load lockfile")
1273            .expect("Lockfile should exist");
1274
1275        assert_eq!(loaded.model_full_name, "sonnet");
1276        assert_eq!(loaded.claude_cli_version, "0.9.0");
1277    }
1278
1279    #[test]
1280    fn test_xchecker_lock_drift_all_fields_match() {
1281        let lock = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1282
1283        let context = RunContext {
1284            model_full_name: "haiku".to_string(),
1285            claude_cli_version: "0.8.1".to_string(),
1286            schema_version: "1".to_string(),
1287        };
1288
1289        let drift = lock.detect_drift(&context);
1290        assert!(
1291            drift.is_none(),
1292            "Should return None when all fields match exactly"
1293        );
1294    }
1295
1296    #[test]
1297    fn test_xchecker_lock_drift_case_sensitive() {
1298        let lock = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1299
1300        // Test with different case
1301        let context = RunContext {
1302            model_full_name: "Claude-3-5-Sonnet-20241022".to_string(),
1303            claude_cli_version: "0.8.1".to_string(),
1304            schema_version: "1".to_string(),
1305        };
1306
1307        let drift = lock.detect_drift(&context);
1308        assert!(drift.is_some(), "Drift detection should be case-sensitive");
1309        assert!(drift.unwrap().model_full_name.is_some());
1310    }
1311
1312    #[test]
1313    fn test_xchecker_lock_drift_whitespace_sensitive() {
1314        let lock = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1315
1316        // Test with extra whitespace
1317        let context = RunContext {
1318            model_full_name: "haiku ".to_string(),
1319            claude_cli_version: "0.8.1".to_string(),
1320            schema_version: "1".to_string(),
1321        };
1322
1323        let drift = lock.detect_drift(&context);
1324        assert!(
1325            drift.is_some(),
1326            "Drift detection should be whitespace-sensitive"
1327        );
1328        assert!(drift.unwrap().model_full_name.is_some());
1329    }
1330
1331    #[test]
1332    fn test_xchecker_lock_save_creates_directory() {
1333        let _temp_dir = setup_test_env();
1334
1335        let spec_id = "test-spec-new-dir";
1336        let lock = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1337
1338        // Directory should not exist yet
1339        let lock_path = XCheckerLock::get_lock_path(spec_id);
1340        assert!(!lock_path.exists());
1341
1342        // Save should create directory
1343        lock.save(spec_id).unwrap();
1344
1345        // Directory and file should now exist
1346        assert!(lock_path.exists());
1347        assert!(lock_path.parent().unwrap().exists());
1348    }
1349
1350    #[test]
1351    fn test_xchecker_lock_json_format() {
1352        let _temp_dir = setup_test_env();
1353
1354        let spec_id = "test-spec-json-format";
1355        let lock = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1356
1357        lock.save(spec_id).unwrap();
1358
1359        // Read raw JSON and verify format
1360        let lock_path = XCheckerLock::get_lock_path(spec_id);
1361        let json_content = fs::read_to_string(&lock_path).unwrap();
1362
1363        // Should be valid JSON
1364        let parsed: serde_json::Value =
1365            serde_json::from_str(&json_content).expect("Should be valid JSON");
1366
1367        // Verify required fields exist
1368        assert!(parsed.get("schema_version").is_some());
1369        assert!(parsed.get("created_at").is_some());
1370        assert!(parsed.get("model_full_name").is_some());
1371        assert!(parsed.get("claude_cli_version").is_some());
1372
1373        // Verify values
1374        assert_eq!(parsed["schema_version"], "1");
1375        assert_eq!(parsed["model_full_name"], "haiku");
1376        assert_eq!(parsed["claude_cli_version"], "0.8.1");
1377    }
1378
1379    #[test]
1380    fn test_xchecker_lock_timestamp_format() {
1381        let lock = XCheckerLock::new("haiku".to_string(), "0.8.1".to_string());
1382
1383        // Verify created_at is a valid RFC3339 timestamp
1384        let timestamp_str = lock.created_at.to_rfc3339();
1385        assert!(!timestamp_str.is_empty());
1386
1387        // Should be parseable back to DateTime
1388        let parsed = DateTime::parse_from_rfc3339(&timestamp_str);
1389        assert!(parsed.is_ok(), "Should be parseable RFC3339 timestamp");
1390    }
1391
1392    #[test]
1393    fn test_configurable_ttl_parameter() {
1394        let _temp_dir = setup_test_env();
1395
1396        let spec_id = "test-spec-configurable-ttl";
1397
1398        // Create a lock file with timestamp 2 minutes ago
1399        let lock_path = FileLock::get_lock_path(spec_id);
1400        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1401
1402        let two_minutes_ago = SystemTime::now()
1403            .duration_since(UNIX_EPOCH)
1404            .unwrap()
1405            .as_secs()
1406            - 120;
1407
1408        let old_lock_info = LockInfo {
1409            pid: 99999, // Non-existent PID
1410            start_time: 0,
1411            created_at: two_minutes_ago,
1412            spec_id: spec_id.to_string(),
1413            xchecker_version: "0.1.0".to_string(),
1414        };
1415
1416        let lock_json = serde_json::to_string_pretty(&old_lock_info).unwrap();
1417        fs::write(&lock_path, lock_json).unwrap();
1418
1419        // With TTL of 60 seconds (1 minute), lock should be stale
1420        let result = FileLock::acquire(spec_id, false, Some(60));
1421        assert!(result.is_err());
1422        assert!(matches!(result.unwrap_err(), LockError::StaleLock { .. }));
1423
1424        // With TTL of 180 seconds (3 minutes), lock should not be stale yet
1425        // but process is dead, so it should still fail without force
1426        let result = FileLock::acquire(spec_id, false, Some(180));
1427        assert!(result.is_err());
1428
1429        // With force, should succeed regardless of TTL
1430        let lock = FileLock::acquire(spec_id, true, Some(60)).unwrap();
1431        assert_eq!(lock.spec_id(), spec_id);
1432    }
1433
1434    #[test]
1435    fn test_stale_lock_detection_by_age() {
1436        let _temp_dir = setup_test_env();
1437
1438        let spec_id = "test-spec-stale-by-age";
1439
1440        // Create a lock file with very old timestamp (2 hours ago)
1441        let lock_path = FileLock::get_lock_path(spec_id);
1442        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1443
1444        let two_hours_ago = SystemTime::now()
1445            .duration_since(UNIX_EPOCH)
1446            .unwrap()
1447            .as_secs()
1448            - 7200;
1449
1450        let old_lock_info = LockInfo {
1451            pid: 99999, // Non-existent PID
1452            start_time: 0,
1453            created_at: two_hours_ago,
1454            spec_id: spec_id.to_string(),
1455            xchecker_version: "0.1.0".to_string(),
1456        };
1457
1458        let lock_json = serde_json::to_string_pretty(&old_lock_info).unwrap();
1459        fs::write(&lock_path, lock_json).unwrap();
1460
1461        // Should detect as stale with default TTL (1 hour)
1462        let result = FileLock::acquire(spec_id, false, None);
1463        assert!(result.is_err());
1464        assert!(matches!(result.unwrap_err(), LockError::StaleLock { .. }));
1465
1466        // Should succeed with force
1467        let lock = FileLock::acquire(spec_id, true, None).unwrap();
1468        assert_eq!(lock.spec_id(), spec_id);
1469    }
1470
1471    #[test]
1472    fn test_stale_lock_detection_by_dead_process() {
1473        let _temp_dir = setup_test_env();
1474
1475        let spec_id = "test-spec-stale-by-pid";
1476
1477        // Create a lock file with recent timestamp but non-existent PID
1478        let lock_path = FileLock::get_lock_path(spec_id);
1479        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1480
1481        let recent_time = SystemTime::now()
1482            .duration_since(UNIX_EPOCH)
1483            .unwrap()
1484            .as_secs()
1485            - 60; // 1 minute ago
1486
1487        let old_lock_info = LockInfo {
1488            pid: 99999, // Non-existent PID
1489            start_time: 0,
1490            created_at: recent_time,
1491            spec_id: spec_id.to_string(),
1492            xchecker_version: "0.1.0".to_string(),
1493        };
1494
1495        let lock_json = serde_json::to_string_pretty(&old_lock_info).unwrap();
1496        fs::write(&lock_path, lock_json).unwrap();
1497
1498        // Should fail even though lock is recent, because process is dead
1499        let result = FileLock::acquire(spec_id, false, None);
1500        assert!(result.is_err());
1501
1502        // Should succeed with force
1503        let lock = FileLock::acquire(spec_id, true, None).unwrap();
1504        assert_eq!(lock.spec_id(), spec_id);
1505    }
1506
1507    #[test]
1508    fn test_concurrent_execution_detection() {
1509        let _temp_dir = setup_test_env();
1510
1511        let spec_id = "test-spec-concurrent";
1512
1513        // Acquire first lock
1514        let _lock1 = FileLock::acquire(spec_id, false, None).unwrap();
1515
1516        // Try to acquire second lock - should fail with ConcurrentExecution
1517        let result = FileLock::acquire(spec_id, false, None);
1518        assert!(result.is_err());
1519        assert!(matches!(
1520            result.unwrap_err(),
1521            LockError::ConcurrentExecution { .. }
1522        ));
1523
1524        // Even with force, should succeed if process is still running
1525        let result = FileLock::acquire(spec_id, true, None);
1526        assert!(result.is_ok());
1527    }
1528
1529    #[test]
1530    fn test_lock_release_on_normal_exit() {
1531        let _temp_dir = setup_test_env();
1532
1533        let spec_id = "test-spec-normal-exit";
1534
1535        // Acquire lock
1536        let lock = FileLock::acquire(spec_id, false, None).unwrap();
1537        assert!(FileLock::exists(spec_id));
1538
1539        // Explicitly release
1540        lock.release().unwrap();
1541
1542        // Lock should be gone
1543        assert!(!FileLock::exists(spec_id));
1544
1545        // Should be able to acquire again after release
1546        let _lock2 = FileLock::acquire(spec_id, false, None).unwrap();
1547    }
1548
1549    #[test]
1550    fn test_lock_cleanup_on_panic() {
1551        let _temp_dir = setup_test_env();
1552
1553        let spec_id = "test-spec-panic-cleanup";
1554
1555        {
1556            let _lock = FileLock::acquire(spec_id, false, None).unwrap();
1557            assert!(FileLock::exists(spec_id));
1558        } // lock goes out of scope here, Drop should clean up
1559
1560        // Lock should be automatically cleaned up by Drop
1561        assert!(!FileLock::exists(spec_id));
1562    }
1563
1564    #[test]
1565    fn test_force_flag_breaks_stale_lock() {
1566        let _temp_dir = setup_test_env();
1567
1568        let spec_id = "test-spec-force-break";
1569
1570        // Create a stale lock
1571        let lock_path = FileLock::get_lock_path(spec_id);
1572        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1573
1574        let old_lock_info = LockInfo {
1575            pid: 99999,
1576            start_time: 0,
1577            created_at: 0,
1578            spec_id: spec_id.to_string(),
1579            xchecker_version: "0.1.0".to_string(),
1580        };
1581
1582        let lock_json = serde_json::to_string_pretty(&old_lock_info).unwrap();
1583        fs::write(&lock_path, lock_json).unwrap();
1584
1585        // Should fail without force
1586        let result = FileLock::acquire(spec_id, false, None);
1587        assert!(result.is_err());
1588
1589        // Should succeed with force
1590        let lock = FileLock::acquire(spec_id, true, None).unwrap();
1591        assert_eq!(lock.spec_id(), spec_id);
1592
1593        // Lock info should be updated with current process
1594        let new_lock_info = FileLock::get_lock_info(spec_id).unwrap().unwrap();
1595        assert_eq!(new_lock_info.pid, process::id());
1596    }
1597
1598    #[test]
1599    fn test_lock_info_with_invalid_pid() {
1600        let _temp_dir = setup_test_env();
1601
1602        let spec_id = "test-spec-invalid-pid";
1603        let lock_path = FileLock::get_lock_path(spec_id);
1604        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1605
1606        // Create a lock with an invalid PID (0 is never a valid PID)
1607        let invalid_lock_info = LockInfo {
1608            pid: 0,
1609            start_time: SystemTime::now()
1610                .duration_since(UNIX_EPOCH)
1611                .unwrap()
1612                .as_secs(),
1613            created_at: SystemTime::now()
1614                .duration_since(UNIX_EPOCH)
1615                .unwrap()
1616                .as_secs(),
1617            spec_id: spec_id.to_string(),
1618            xchecker_version: "0.1.0".to_string(),
1619        };
1620
1621        let lock_json = serde_json::to_string_pretty(&invalid_lock_info).unwrap();
1622        fs::write(&lock_path, lock_json).unwrap();
1623
1624        // Should be able to acquire with force (PID 0 is never running)
1625        let result = FileLock::acquire(spec_id, true, None);
1626        assert!(result.is_ok());
1627    }
1628
1629    #[test]
1630    fn test_lock_info_with_invalid_host() {
1631        let _temp_dir = setup_test_env();
1632
1633        let spec_id = "test-spec-invalid-host";
1634
1635        // Create a lock with current PID but we'll test that it still works
1636        let lock = FileLock::acquire(spec_id, false, None).unwrap();
1637        let lock_info = lock.lock_info();
1638
1639        // Verify lock info is valid
1640        assert_eq!(lock_info.spec_id, spec_id);
1641        assert_eq!(lock_info.pid, process::id());
1642        assert!(!lock_info.xchecker_version.is_empty());
1643    }
1644
1645    #[test]
1646    fn test_lock_with_corrupted_lock_file() {
1647        let _temp_dir = setup_test_env();
1648
1649        let spec_id = "test-spec-corrupted-lock";
1650        let lock_path = FileLock::get_lock_path(spec_id);
1651        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1652
1653        // Write corrupted JSON to lock file
1654        fs::write(&lock_path, "{ invalid json content }").unwrap();
1655
1656        // Should fail with CorruptedLock error
1657        let result = FileLock::acquire(spec_id, false, None);
1658        assert!(result.is_err());
1659        assert!(matches!(
1660            result.unwrap_err(),
1661            LockError::CorruptedLock { .. }
1662        ));
1663
1664        // Force flag doesn't bypass corrupted lock detection - it only bypasses stale lock detection
1665        // Corrupted locks are always an error that requires manual intervention
1666        let result_force = FileLock::acquire(spec_id, true, None);
1667        assert!(result_force.is_err());
1668        assert!(matches!(
1669            result_force.unwrap_err(),
1670            LockError::CorruptedLock { .. }
1671        ));
1672    }
1673
1674    #[test]
1675    fn test_lock_with_partial_json() {
1676        let _temp_dir = setup_test_env();
1677
1678        let spec_id = "test-spec-partial-json";
1679        let lock_path = FileLock::get_lock_path(spec_id);
1680        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1681
1682        // Write partial JSON (missing closing brace)
1683        fs::write(&lock_path, r#"{"pid": 12345, "start_time":"#).unwrap();
1684
1685        // Should fail with CorruptedLock error
1686        let result = FileLock::acquire(spec_id, false, None);
1687        assert!(result.is_err());
1688        assert!(matches!(
1689            result.unwrap_err(),
1690            LockError::CorruptedLock { .. }
1691        ));
1692    }
1693
1694    #[test]
1695    fn test_lock_with_wrong_json_structure() {
1696        let _temp_dir = setup_test_env();
1697
1698        let spec_id = "test-spec-wrong-structure";
1699        let lock_path = FileLock::get_lock_path(spec_id);
1700        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1701
1702        // Write valid JSON but wrong structure (array instead of object)
1703        fs::write(&lock_path, r#"["not", "a", "lock", "object"]"#).unwrap();
1704
1705        // Should fail with CorruptedLock error
1706        let result = FileLock::acquire(spec_id, false, None);
1707        assert!(result.is_err());
1708        assert!(matches!(
1709            result.unwrap_err(),
1710            LockError::CorruptedLock { .. }
1711        ));
1712    }
1713
1714    #[test]
1715    fn test_lock_with_missing_required_fields() {
1716        let _temp_dir = setup_test_env();
1717
1718        let spec_id = "test-spec-missing-fields";
1719        let lock_path = FileLock::get_lock_path(spec_id);
1720        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1721
1722        // Write JSON with missing required fields
1723        fs::write(&lock_path, r#"{"pid": 12345}"#).unwrap();
1724
1725        // Should fail with CorruptedLock error
1726        let result = FileLock::acquire(spec_id, false, None);
1727        assert!(result.is_err());
1728        assert!(matches!(
1729            result.unwrap_err(),
1730            LockError::CorruptedLock { .. }
1731        ));
1732    }
1733
1734    #[test]
1735    fn test_lock_with_extra_fields() {
1736        let _temp_dir = setup_test_env();
1737
1738        let spec_id = "test-spec-extra-fields";
1739        let lock_path = FileLock::get_lock_path(spec_id);
1740        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1741
1742        // Create lock info with all required fields plus extra
1743        let lock_info_json = r#"{
1744            "pid": 12345,
1745            "start_time": 0,
1746            "created_at": 0,
1747            "spec_id": "test-spec-extra-fields",
1748            "xchecker_version": "0.1.0",
1749            "extra_field": "should be ignored"
1750        }"#;
1751
1752        fs::write(&lock_path, lock_info_json).unwrap();
1753
1754        // Should succeed with force (extra fields should be ignored)
1755        let result = FileLock::acquire(spec_id, true, None);
1756        assert!(result.is_ok());
1757    }
1758
1759    #[test]
1760    fn test_lock_with_very_old_timestamp() {
1761        let _temp_dir = setup_test_env();
1762
1763        let spec_id = "test-spec-very-old";
1764
1765        // Create a lock with timestamp from year 1970
1766        let lock_path = FileLock::get_lock_path(spec_id);
1767        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1768
1769        let old_lock_info = LockInfo {
1770            pid: 99999,
1771            start_time: 0,
1772            created_at: 0, // Unix epoch
1773            spec_id: spec_id.to_string(),
1774            xchecker_version: "0.1.0".to_string(),
1775        };
1776
1777        let lock_json = serde_json::to_string_pretty(&old_lock_info).unwrap();
1778        fs::write(&lock_path, lock_json).unwrap();
1779
1780        // Should be detected as stale
1781        let result = FileLock::acquire(spec_id, false, None);
1782        assert!(result.is_err());
1783        assert!(matches!(result.unwrap_err(), LockError::StaleLock { .. }));
1784
1785        // Should succeed with force
1786        let lock = FileLock::acquire(spec_id, true, None).unwrap();
1787        assert_eq!(lock.spec_id(), spec_id);
1788    }
1789
1790    #[test]
1791    fn test_lock_with_future_timestamp() {
1792        let _temp_dir = setup_test_env();
1793
1794        let spec_id = "test-spec-future";
1795
1796        // Create a lock with timestamp 1 hour in the future (clock skew scenario)
1797        let lock_path = FileLock::get_lock_path(spec_id);
1798        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1799
1800        let future_timestamp = SystemTime::now()
1801            .duration_since(UNIX_EPOCH)
1802            .unwrap()
1803            .as_secs()
1804            + 3600; // 1 hour in the future
1805
1806        let future_lock_info = LockInfo {
1807            pid: 99999, // Non-existent PID
1808            start_time: future_timestamp,
1809            created_at: future_timestamp,
1810            spec_id: spec_id.to_string(),
1811            xchecker_version: "0.1.0".to_string(),
1812        };
1813
1814        let lock_json = serde_json::to_string_pretty(&future_lock_info).unwrap();
1815        fs::write(&lock_path, lock_json).unwrap();
1816
1817        // Future timestamps should be handled gracefully (no panic)
1818        // Treated as age=0 (not stale), but PID check should still apply
1819        let result = FileLock::acquire(spec_id, false, None);
1820        // Result depends on whether PID 99999 exists (unlikely), but no overflow/panic should occur
1821        assert!(
1822            result.is_ok() || result.is_err(),
1823            "Should handle future timestamp without panic"
1824        );
1825    }
1826
1827    #[test]
1828    fn test_lock_info_with_empty_spec_id() {
1829        let _temp_dir = setup_test_env();
1830
1831        let spec_id = "";
1832
1833        // Should handle empty spec_id gracefully
1834        let result = FileLock::acquire(spec_id, false, None);
1835        // May succeed or fail depending on path handling, but shouldn't panic
1836        assert!(
1837            result.is_ok() || result.is_err(),
1838            "Should handle empty spec_id without panic"
1839        );
1840    }
1841
1842    #[test]
1843    fn test_lock_info_with_special_characters_in_spec_id() {
1844        let _temp_dir = setup_test_env();
1845
1846        let spec_id = "test-spec-with-special-@#$%";
1847
1848        // Should handle special characters in spec_id
1849        let result = FileLock::acquire(spec_id, false, None);
1850        // May succeed or fail depending on filesystem, but shouldn't panic
1851        if let Ok(lock) = result {
1852            assert_eq!(lock.spec_id(), spec_id);
1853        }
1854    }
1855
1856    #[test]
1857    fn test_get_lock_info_with_nonexistent_lock() {
1858        let _temp_dir = setup_test_env();
1859
1860        let spec_id = "nonexistent-lock-spec";
1861
1862        let result = FileLock::get_lock_info(spec_id);
1863        assert!(result.is_ok());
1864        assert!(result.unwrap().is_none());
1865    }
1866
1867    #[test]
1868    fn test_get_lock_info_with_corrupted_lock() {
1869        let _temp_dir = setup_test_env();
1870
1871        let spec_id = "corrupted-lock-info-spec";
1872        let lock_path = FileLock::get_lock_path(spec_id);
1873        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1874
1875        // Write corrupted content
1876        fs::write(&lock_path, "not json at all").unwrap();
1877
1878        let result = FileLock::get_lock_info(spec_id);
1879        assert!(result.is_err());
1880        assert!(matches!(
1881            result.unwrap_err(),
1882            LockError::CorruptedLock { .. }
1883        ));
1884    }
1885
1886    #[test]
1887    fn test_xchecker_lock_with_empty_values() {
1888        let lock = XCheckerLock::new(String::new(), String::new());
1889
1890        assert_eq!(lock.schema_version, "1");
1891        assert_eq!(lock.model_full_name, "");
1892        assert_eq!(lock.claude_cli_version, "");
1893    }
1894
1895    #[test]
1896    fn test_xchecker_lock_with_very_long_values() {
1897        let long_model = "a".repeat(1000);
1898        let long_version = "b".repeat(1000);
1899
1900        let lock = XCheckerLock::new(long_model.clone(), long_version.clone());
1901
1902        assert_eq!(lock.model_full_name, long_model);
1903        assert_eq!(lock.claude_cli_version, long_version);
1904    }
1905
1906    #[test]
1907    fn test_xchecker_lock_with_unicode_values() {
1908        let unicode_model = "claude-测试-🚀";
1909        let unicode_version = "版本-1.0-✨";
1910
1911        let lock = XCheckerLock::new(unicode_model.to_string(), unicode_version.to_string());
1912
1913        assert_eq!(lock.model_full_name, unicode_model);
1914        assert_eq!(lock.claude_cli_version, unicode_version);
1915    }
1916
1917    #[test]
1918    fn test_empty_lockfile_error_includes_spec_id() {
1919        let _temp_dir = setup_test_env();
1920
1921        let spec_id = "test-spec-empty-lockfile-msg";
1922        let lock_path = FileLock::get_lock_path(spec_id);
1923        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1924
1925        // Write empty file (simulates race condition during initialization)
1926        fs::write(&lock_path, "").unwrap();
1927
1928        // Should fail with CorruptedLock error that includes spec_id
1929        let result = FileLock::acquire(spec_id, false, None);
1930        assert!(result.is_err());
1931
1932        match result.unwrap_err() {
1933            LockError::CorruptedLock { reason } => {
1934                assert!(
1935                    reason.contains(spec_id),
1936                    "Error message should contain spec_id: {reason}"
1937                );
1938                assert!(
1939                    reason.contains("empty") || reason.contains("initializing"),
1940                    "Error message should mention empty/initializing: {reason}"
1941                );
1942            }
1943            other => panic!("Expected CorruptedLock error, got: {other:?}"),
1944        }
1945    }
1946
1947    #[test]
1948    fn test_partial_json_lockfile_error_includes_spec_id() {
1949        let _temp_dir = setup_test_env();
1950
1951        let spec_id = "test-spec-partial-json-msg";
1952        let lock_path = FileLock::get_lock_path(spec_id);
1953        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1954
1955        // Write partial JSON (simulates mid-write race condition)
1956        fs::write(&lock_path, r#"{"pid": 12345, "start_time":"#).unwrap();
1957
1958        // Should fail with CorruptedLock error that includes spec_id
1959        let result = FileLock::acquire(spec_id, false, None);
1960        assert!(result.is_err());
1961
1962        match result.unwrap_err() {
1963            LockError::CorruptedLock { reason } => {
1964                assert!(
1965                    reason.contains(spec_id),
1966                    "Error message should contain spec_id: {reason}"
1967                );
1968            }
1969            other => panic!("Expected CorruptedLock error, got: {other:?}"),
1970        }
1971    }
1972
1973    #[test]
1974    fn test_corrupted_json_error_includes_spec_id() {
1975        let _temp_dir = setup_test_env();
1976
1977        let spec_id = "test-spec-error-format";
1978
1979        let lock_path = FileLock::get_lock_path(spec_id);
1980        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
1981
1982        // Write corrupted JSON that won't be retried (definite corruption, not EOF)
1983        fs::write(&lock_path, r#"{"invalid": "structure", "no_pid": true}"#).unwrap();
1984
1985        // Should fail with CorruptedLock error that includes spec_id
1986        let result = FileLock::acquire(spec_id, false, None);
1987        assert!(result.is_err());
1988
1989        match result.unwrap_err() {
1990            LockError::CorruptedLock { reason } => {
1991                assert!(
1992                    reason.contains(spec_id),
1993                    "Error message should contain spec_id: {reason}"
1994                );
1995            }
1996            other => panic!("Expected CorruptedLock error, got: {other:?}"),
1997        }
1998    }
1999
2000    #[test]
2001    fn test_concurrent_lock_error_includes_spec_id() {
2002        let _temp_dir = setup_test_env();
2003
2004        let spec_id = "test-spec-concurrent-msg";
2005
2006        // First acquire a lock
2007        let _lock1 = FileLock::acquire(spec_id, false, None).unwrap();
2008
2009        // Try to acquire again - this will fail with ConcurrentExecution
2010        let result = FileLock::acquire(spec_id, false, None);
2011        assert!(result.is_err());
2012
2013        // ConcurrentExecution error should include spec_id
2014        match result.unwrap_err() {
2015            LockError::ConcurrentExecution {
2016                spec_id: err_spec, ..
2017            } => {
2018                assert_eq!(err_spec, spec_id);
2019            }
2020            other => panic!("Expected ConcurrentExecution error, got: {other:?}"),
2021        }
2022    }
2023
2024    #[test]
2025    fn test_validate_existing_lock_handles_clock_skew() {
2026        let _temp_dir = setup_test_env();
2027
2028        let spec_id = "test-spec-clock-skew-validation";
2029        let lock_path = FileLock::get_lock_path(spec_id);
2030        fs::create_dir_all(lock_path.parent().unwrap()).unwrap();
2031
2032        // Create a lock with timestamp 1 hour in the future (clock skew)
2033        let future_timestamp = SystemTime::now()
2034            .duration_since(UNIX_EPOCH)
2035            .unwrap()
2036            .as_secs()
2037            + 3600;
2038
2039        let lock_info = LockInfo {
2040            pid: 99999, // Non-existent PID
2041            start_time: future_timestamp,
2042            created_at: future_timestamp,
2043            spec_id: spec_id.to_string(),
2044            xchecker_version: "0.1.0".to_string(),
2045        };
2046
2047        let lock_json = serde_json::to_string_pretty(&lock_info).unwrap();
2048        fs::write(&lock_path, lock_json).unwrap();
2049
2050        // Should not panic due to clock skew - saturating_sub handles this
2051        // With force=true, should succeed
2052        let result = FileLock::acquire(spec_id, true, None);
2053        assert!(result.is_ok(), "Should handle clock skew gracefully");
2054    }
2055}
xchecker_lock/lib.rs

xchecker_lock/
lib.rs