Skip to main content

socket_patch_cli/commands/
lock_cli.rs

1//! Envelope-aware wrapper around the
2//! `socket_patch_core::patch::apply_lock` advisory lock.
3//!
4//! Mutating subcommands (`apply`, `rollback`, `repair`, `remove`) all
5//! need the same shape: acquire the lock at the top of `run`, on
6//! contention emit a JSON envelope with `errorCode: "lock_held"` (or
7//! stderr in human mode) and exit 1. This module centralises that
8//! emission so the four call sites stay one line each.
9//!
10//! The lock itself is in `socket-patch-core` (cross-crate, also used
11//! by tests). This module is the CLI-side glue that knows how to
12//! render the failure through the shared [`crate::json_envelope`].
13
14use std::path::Path;
15use std::time::Duration;
16
17use socket_patch_core::patch::apply_lock::{acquire, LockError, LockGuard};
18
19use crate::json_envelope::{
20    Command, Envelope, EnvelopeError, PatchAction, PatchEvent,
21};
22
23/// Stable `errorCode` tag emitted as a `Skipped` warning event when
24/// `--break-lock` actually deletes a pre-existing lock file. Exposed
25/// for downstream consumers and integration tests that pattern-match
26/// on it.
27pub const LOCK_BROKEN_CODE: &str = "lock_broken";
28
29/// Outcome of a successful lock acquisition. Callers attach a
30/// `lock_broken` event to their own envelope when [`broke_lock`] is
31/// true, so the audit trail follows the same conventions as the
32/// rest of the command's output.
33///
34/// [`broke_lock`]: LockAcquired::broke_lock
35#[derive(Debug)]
36pub struct LockAcquired {
37    pub guard: LockGuard,
38    /// True iff `--break-lock` was set AND the helper actually
39    /// removed a pre-existing `apply.lock` file before acquiring.
40    /// False when the file didn't exist (nothing to break) — the
41    /// flag was a no-op in that case so no warning is warranted.
42    pub broke_lock: bool,
43}
44
45/// Try to acquire `<socket_dir>/apply.lock` and return the guard, or
46/// emit a failure envelope and a non-zero exit code.
47///
48/// `command` selects the envelope's `command` field so downstream
49/// consumers see `apply` / `rollback` / `repair` / `remove` rather
50/// than a generic "lock failed". `dry_run` is plumbed through to the
51/// envelope's `dry_run` field for the (rare) case where lock
52/// contention happens during a dry-run apply.
53///
54/// `timeout = Duration::ZERO` keeps the historical non-blocking
55/// try-once shape. Positive values wait with a 100 ms backoff —
56/// see `socket_patch_core::patch::apply_lock::acquire`.
57///
58/// `break_lock = true` deletes `<socket_dir>/apply.lock` before the
59/// acquire attempt. The motivating case is a crashed prior run that
60/// left the file but no OS lock. When the file exists and is
61/// successfully removed the return value's `broke_lock` is true and
62/// the caller should attach a `lock_broken` warning event to their
63/// envelope.
64pub fn acquire_or_emit(
65    socket_dir: &Path,
66    command: Command,
67    json: bool,
68    silent: bool,
69    dry_run: bool,
70    timeout: Duration,
71    break_lock: bool,
72) -> Result<LockAcquired, i32> {
73    let mut broke_lock = false;
74    if break_lock {
75        let path = socket_dir.join("apply.lock");
76        match std::fs::remove_file(&path) {
77            Ok(()) => {
78                broke_lock = true;
79                if !silent && !json {
80                    eprintln!(
81                        "Warning: --break-lock removed {} before acquisition.",
82                        path.display()
83                    );
84                }
85            }
86            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
87                // No file to break — silently proceed to the normal
88                // acquire path. Documented as a no-op so scripts can
89                // pass --break-lock unconditionally on retry.
90            }
91            Err(source) => {
92                let msg = format!(
93                    "failed to remove lock file at {}: {}",
94                    path.display(),
95                    source
96                );
97                emit(command, json, silent, dry_run, "lock_break_failed", &msg, None);
98                return Err(1);
99            }
100        }
101    }
102
103    match acquire(socket_dir, timeout) {
104        Ok(guard) => Ok(LockAcquired { guard, broke_lock }),
105        Err(LockError::Held) => {
106            let msg = held_message(timeout);
107            emit(
108                command,
109                json,
110                silent,
111                dry_run,
112                "lock_held",
113                &msg,
114                Some(socket_dir),
115            );
116            Err(1)
117        }
118        Err(LockError::Io { path, source }) => {
119            let msg = format!("failed to open lock file at {}: {}", path.display(), source);
120            emit(command, json, silent, dry_run, "lock_io", &msg, None);
121            Err(1)
122        }
123    }
124}
125
126/// Build the warning event that callers attach to their envelope
127/// when [`LockAcquired::broke_lock`] is true. Artifact-level (no
128/// PURL) since the action targets the `.socket/` directory itself,
129/// not a specific package.
130pub fn lock_broken_event(socket_dir: &Path) -> PatchEvent {
131    PatchEvent::artifact(PatchAction::Skipped).with_reason(
132        LOCK_BROKEN_CODE,
133        format!(
134            "--break-lock removed {}/apply.lock before acquisition",
135            socket_dir.display()
136        ),
137    )
138}
139
140/// Convenience: record the `lock_broken` warning event on an
141/// envelope. Mirrors the inline pattern at each call site so we
142/// don't drift on the action / errorCode pair.
143pub fn record_lock_broken(env: &mut Envelope, socket_dir: &Path) {
144    env.record(lock_broken_event(socket_dir));
145}
146
147/// Human-readable description of a `lock_held` contention for the given
148/// wait budget. A zero budget means the historical non-blocking
149/// try-once, so we omit the "(waited …)" clause entirely.
150fn held_message(timeout: Duration) -> String {
151    if timeout > Duration::ZERO {
152        format!(
153            "another socket-patch process is operating in this directory (waited {})",
154            fmt_duration(timeout)
155        )
156    } else {
157        "another socket-patch process is operating in this directory".to_string()
158    }
159}
160
161/// Format a wait budget for humans. Whole seconds read naturally
162/// (`5s`); sub-second budgets — reachable through the library API even
163/// though the CLI only ever passes whole seconds — render as
164/// milliseconds rather than truncating to a misleading `0s`.
165fn fmt_duration(d: Duration) -> String {
166    if d.subsec_nanos() == 0 {
167        format!("{}s", d.as_secs())
168    } else {
169        format!("{}ms", d.as_millis())
170    }
171}
172
173/// Build the top-level error envelope emitted in `--json` mode when
174/// lock acquisition fails. Split out from [`emit`] so the serialized
175/// shape (status / error.code / command / dryRun) is unit-testable
176/// without capturing stdout.
177fn error_envelope(command: Command, dry_run: bool, code: &str, message: &str) -> Envelope {
178    let mut env = Envelope::new(command);
179    env.dry_run = dry_run;
180    env.mark_error(EnvelopeError::new(code, message));
181    env
182}
183
184fn emit(
185    command: Command,
186    json: bool,
187    silent: bool,
188    dry_run: bool,
189    code: &str,
190    message: &str,
191    hint_dir: Option<&Path>,
192) {
193    if json {
194        println!("{}", error_envelope(command, dry_run, code, message).to_pretty_json());
195    } else if !silent {
196        eprintln!("Error: {message}.");
197        if hint_dir.is_some() {
198            eprintln!(
199                "  Run `socket-patch unlock` to inspect, or rerun with --break-lock if you're sure no holder exists."
200            );
201        }
202    }
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    #[test]
210    fn acquire_or_emit_succeeds_on_fresh_dir() {
211        let dir = tempfile::tempdir().unwrap();
212        let acquired = acquire_or_emit(
213            dir.path(),
214            Command::Apply,
215            false,
216            true,
217            false,
218            Duration::ZERO,
219            false,
220        )
221        .unwrap();
222        assert!(!acquired.broke_lock);
223        drop(acquired.guard);
224    }
225
226    #[test]
227    fn acquire_or_emit_returns_one_on_contention() {
228        let dir = tempfile::tempdir().unwrap();
229        let _first = acquire_or_emit(
230            dir.path(),
231            Command::Apply,
232            false,
233            true,
234            false,
235            Duration::ZERO,
236            false,
237        )
238        .unwrap();
239        let code = acquire_or_emit(
240            dir.path(),
241            Command::Apply,
242            false,
243            true,
244            false,
245            Duration::ZERO,
246            false,
247        )
248        .unwrap_err();
249        assert_eq!(code, 1);
250    }
251
252    #[test]
253    fn acquire_or_emit_returns_one_when_socket_dir_missing() {
254        let dir = tempfile::tempdir().unwrap();
255        let code = acquire_or_emit(
256            &dir.path().join("nope"),
257            Command::Apply,
258            false,
259            true,
260            false,
261            Duration::ZERO,
262            false,
263        )
264        .unwrap_err();
265        assert_eq!(code, 1);
266    }
267
268    /// Positive timeout waits then errors `lock_held` — confirms the
269    /// budget is plumbed through to `acquire`. Mirrors the
270    /// `apply_lock::tests::timeout_held` shape so a regression in
271    /// either layer surfaces here.
272    #[test]
273    fn acquire_or_emit_honors_lock_timeout() {
274        let dir = tempfile::tempdir().unwrap();
275        let _first = acquire_or_emit(
276            dir.path(),
277            Command::Apply,
278            false,
279            true,
280            false,
281            Duration::ZERO,
282            false,
283        )
284        .unwrap();
285        let start = std::time::Instant::now();
286        let code = acquire_or_emit(
287            dir.path(),
288            Command::Apply,
289            false,
290            true,
291            false,
292            Duration::from_millis(250),
293            false,
294        )
295        .unwrap_err();
296        let elapsed = start.elapsed();
297        assert_eq!(code, 1);
298        assert!(
299            elapsed >= Duration::from_millis(200),
300            "expected at least 200ms wait, got {:?}",
301            elapsed
302        );
303    }
304
305    /// `break_lock=true` against a pre-existing lock file with no
306    /// holder removes the file and acquires fresh. `broke_lock` flag
307    /// surfaces so callers can attach the warning event.
308    #[test]
309    fn acquire_or_emit_break_lock_removes_and_acquires() {
310        let dir = tempfile::tempdir().unwrap();
311        // Pre-stage a lock file with no holder — simulates the
312        // post-crash leftover scenario.
313        std::fs::write(dir.path().join("apply.lock"), b"").unwrap();
314
315        let acquired = acquire_or_emit(
316            dir.path(),
317            Command::Apply,
318            false,
319            true,
320            false,
321            Duration::ZERO,
322            true,
323        )
324        .unwrap();
325        assert!(
326            acquired.broke_lock,
327            "broke_lock should be true when a lock file existed and was removed"
328        );
329        // Lock file has been re-created by `acquire` and we hold it.
330        assert!(dir.path().join("apply.lock").is_file());
331    }
332
333    /// `break_lock=true` on a clean directory (no lock file) is a
334    /// no-op for the warning surface — `broke_lock` stays false so
335    /// callers don't emit a spurious event.
336    #[test]
337    fn acquire_or_emit_break_lock_is_noop_when_no_file() {
338        let dir = tempfile::tempdir().unwrap();
339        let acquired = acquire_or_emit(
340            dir.path(),
341            Command::Apply,
342            false,
343            true,
344            false,
345            Duration::ZERO,
346            true,
347        )
348        .unwrap();
349        assert!(
350            !acquired.broke_lock,
351            "broke_lock should be false when there was nothing to remove"
352        );
353    }
354
355    /// Whole-second budgets read naturally in the contention message.
356    #[test]
357    fn held_message_reports_whole_seconds() {
358        assert_eq!(
359            held_message(Duration::from_secs(5)),
360            "another socket-patch process is operating in this directory (waited 5s)"
361        );
362    }
363
364    /// Regression: `timeout.as_secs()` truncated a 250ms budget to
365    /// `(waited 0s)`, which read as "we didn't wait at all". Sub-second
366    /// budgets now surface as milliseconds. The 250ms budget mirrors
367    /// `acquire_or_emit_honors_lock_timeout`, so the message stays
368    /// honest for the exact value that test exercises.
369    #[test]
370    fn held_message_does_not_truncate_sub_second_to_zero() {
371        let msg = held_message(Duration::from_millis(250));
372        assert!(msg.contains("250ms"), "expected ms rendering, got: {msg}");
373        assert!(
374            !msg.contains("0s"),
375            "sub-second budget must not collapse to 0s: {msg}"
376        );
377    }
378
379    /// A zero budget is the non-blocking try-once shape — no "(waited …)"
380    /// clause, since we never actually waited.
381    #[test]
382    fn held_message_zero_timeout_omits_waited_clause() {
383        let msg = held_message(Duration::ZERO);
384        assert!(!msg.contains("waited"), "zero budget should not claim a wait: {msg}");
385    }
386
387    /// The `--json` failure envelope (previously emitted only via
388    /// `println!`, so untested) has the stable error shape downstream
389    /// consumers pattern-match on: top-level `status: "error"` and
390    /// `error.code` carrying the lock reason tag.
391    #[test]
392    fn error_envelope_has_stable_lock_held_shape() {
393        let env = error_envelope(Command::Apply, false, "lock_held", "held by another run");
394        let v: serde_json::Value = serde_json::from_str(&env.to_pretty_json()).unwrap();
395        assert_eq!(v["command"], "apply");
396        assert_eq!(v["status"], "error");
397        assert_eq!(v["dryRun"], false);
398        assert_eq!(v["error"]["code"], "lock_held");
399        assert_eq!(v["error"]["message"], "held by another run");
400        // A pre-event failure carries no events.
401        assert_eq!(v["events"].as_array().unwrap().len(), 0);
402    }
403
404    /// `dry_run` and `command` are plumbed through to the envelope so a
405    /// contention during a dry-run apply/rollback is still reported as
406    /// a dry run. Covers the other two reason tags too.
407    #[test]
408    fn error_envelope_propagates_dry_run_and_command() {
409        let env = error_envelope(Command::Rollback, true, "lock_io", "open failed");
410        let v: serde_json::Value = serde_json::from_str(&env.to_pretty_json()).unwrap();
411        assert_eq!(v["command"], "rollback");
412        assert_eq!(v["dryRun"], true);
413        assert_eq!(v["error"]["code"], "lock_io");
414    }
415
416    #[test]
417    fn lock_broken_event_uses_documented_code() {
418        let dir = tempfile::tempdir().unwrap();
419        let event = lock_broken_event(dir.path());
420        let v: serde_json::Value =
421            serde_json::from_str(&serde_json::to_string(&event).unwrap()).unwrap();
422        assert_eq!(v["action"], "skipped");
423        assert_eq!(v["errorCode"], LOCK_BROKEN_CODE);
424        assert!(
425            v.as_object().unwrap().get("purl").is_none(),
426            "lock_broken is an artifact-level event — no purl"
427        );
428    }
429}