sqry-daemon 10.0.0

sqry daemon (sqryd) — persistent code-graph service
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
//! Daemon-wide error type.
//!
//! Thin `thiserror` enum covering every fallible surface of the daemon:
//! config loading, workspace lifecycle, admission control, IPC transport,
//! rebuild dispatch, and lifecycle management (pidfile, signals, auto-start).
//! Tasks 6–10 extend this enum as each surface lands.
//! Every variant maps cleanly to a JSON-RPC error code when the error
//! crosses the IPC boundary (see [`DaemonError::jsonrpc_code`]).
//!
//! # Exit-code mapping (Task 9 U1)
//!
//! Variants that can be returned before the IPC server binds (lifecycle errors)
//! map to POSIX `sysexits.h` exit codes via [`DaemonError::exit_code`]:
//!
//! | Variant             | Exit code | `sysexits.h` constant  |
//! |---------------------|-----------|------------------------|
//! | `AlreadyRunning`    | 75        | `EX_TEMPFAIL`          |
//! | `AutoStartTimeout`  | 69        | `EX_UNAVAILABLE`       |
//! | `SignalSetup`       | 70        | `EX_SOFTWARE`          |
//! | `Config`            | 78        | `EX_CONFIG`            |
//! | `Io`                | 73        | `EX_CANTCREAT`         |
//! | Other variants      | 70        | `EX_SOFTWARE` (default)|

use std::{path::PathBuf, time::SystemTime};

use thiserror::Error;

use crate::{
    JSONRPC_INTERNAL_ERROR, JSONRPC_INVALID_PARAMS, JSONRPC_MEMORY_BUDGET_EXCEEDED,
    JSONRPC_TOOL_TIMEOUT, JSONRPC_WORKSPACE_BUILD_FAILED, JSONRPC_WORKSPACE_EVICTED,
    JSONRPC_WORKSPACE_STALE_EXPIRED,
};

/// Result alias for daemon operations.
pub type DaemonResult<T> = Result<T, DaemonError>;

/// All daemon-surface error variants.
#[derive(Debug, Error)]
pub enum DaemonError {
    /// Config file could not be read or parsed.
    #[error("config error at {path}: {source}")]
    Config {
        path: PathBuf,
        #[source]
        source: anyhow::Error,
    },

    /// An `io::Error` occurred outside the config surface (socket bind,
    /// pidfile lock, filesystem probe, etc.).
    #[error(transparent)]
    Io(#[from] std::io::Error),

    /// Workspace load / rebuild failed with no prior-good graph to serve from.
    ///
    /// Maps to JSON-RPC `-32001`.
    #[error("workspace {root} build failed: {reason}")]
    WorkspaceBuildFailed { root: PathBuf, reason: String },

    /// Workspace is in the Failed state and the most recent successful build
    /// is older than the configured `stale_serve_max_age_hours` cap.
    ///
    /// Maps to JSON-RPC `-32002`.
    #[error("workspace {root} stale-serve window expired ({age_hours}h >= {cap_hours}h cap)")]
    WorkspaceStaleExpired {
        root: PathBuf,
        age_hours: u64,
        cap_hours: u32,
        /// Last successful build timestamp, if any. `None` when the workspace
        /// has never successfully built (edge case: should not reach
        /// `WorkspaceStaleExpired` in that case — `WorkspaceBuildFailed` is
        /// returned instead — but the type is permissive for future-proofing).
        last_good_at: Option<SystemTime>,
        /// Textual diagnostic from the most recent failed build, if any.
        last_error: Option<String>,
    },

    /// Admission control could not satisfy a reservation after evicting every
    /// non-pinned workspace.
    ///
    /// Maps to JSON-RPC `-32003`.
    #[error(
        "memory budget exceeded: requested {requested_bytes} B, \
         {current_bytes} B loaded + {reserved_bytes} B reserved + \
         {retained_bytes} B retained / {limit_bytes} B limit"
    )]
    MemoryBudgetExceeded {
        limit_bytes: u64,
        current_bytes: u64,
        reserved_bytes: u64,
        retained_bytes: u64,
        requested_bytes: u64,
    },

    /// Workspace was evicted or removed between a rebuild dispatch and its
    /// admission / publish commit. Signals the Task 7b2 watcher task and any
    /// direct `handle_changes` caller to terminate their per-workspace loop —
    /// subsequent dispatches on the same `WorkspaceKey` must route through a
    /// fresh `get_or_load` first.
    ///
    /// Surfaced by `RebuildDispatcher::handle_changes`' top-of-drain-loop
    /// eviction gate AND by `WorkspaceManager::reserve_rebuild`'s Phase-1
    /// `workspaces.read()` membership + cancellation check (both paths use
    /// this typed variant so 7b2 can match on it without string parsing).
    ///
    /// Maps to JSON-RPC `-32004`.
    #[error("workspace {root} evicted mid-rebuild")]
    WorkspaceEvicted { root: PathBuf },

    /// Caller requested `daemon/rebuild` or `daemon/cancel_rebuild` for a
    /// path that is not currently registered in the `WorkspaceManager`.
    ///
    /// Shares the JSON-RPC `-32004` code with [`Self::WorkspaceEvicted`].
    /// The `error_data` `"hint"` field distinguishes the two situations on
    /// the wire.
    ///
    /// Maps to JSON-RPC `-32004`.
    #[error("workspace {root} is not loaded")]
    WorkspaceNotLoaded { root: PathBuf },

    /// Tool invocation exceeded [`DaemonConfig::tool_timeout_secs`].
    /// Emitted by `tool_core::classify_and_execute` (Task 8 Phase 8c U6)
    /// when the `tokio::time::timeout(tool_timeout, spawn_blocking(run))`
    /// outer timer fires. The detached [`tokio::task::JoinHandle`] is
    /// dropped — the OS thread may continue executing the tool closure
    /// but its result is discarded.
    ///
    /// The `deadline_ms` field is the canonical wire value (populated by
    /// the constructor as `secs * 1000`) so `error_data` does not have
    /// to re-derive it on every call and serialised payloads remain
    /// byte-for-byte identical regardless of constructor shape.
    ///
    /// Maps to JSON-RPC `-32000`.
    ///
    /// [`DaemonConfig::tool_timeout_secs`]: crate::config::DaemonConfig
    #[error(
        "tool invocation exceeded deadline of {deadline_ms}ms for workspace {}",
        root.display()
    )]
    ToolTimeout {
        root: PathBuf,
        secs: u64,
        /// Derived: `secs * 1000`. Stored explicitly to avoid
        /// re-calculating inside `error_data` / `Display` impls and to
        /// give the MCP-path wrapper (`daemon_err_to_mcp`, Phase 8c U8)
        /// a single field to read.
        deadline_ms: u64,
    },

    /// Argument validation failure surfaced by `tool_core` BEFORE any
    /// workspace classification runs. Used for `resolve_index_root`
    /// failures, missing `path` arguments in MCP tool args, and any
    /// other precondition violation that must be rejected with a
    /// JSON-RPC `-32602` "Invalid params" response.
    ///
    /// Maps to JSON-RPC `-32602`.
    #[error("invalid argument: {reason}")]
    InvalidArgument { reason: String },

    /// Catch-all for errors surfaced by
    /// [`sqry_mcp::daemon_adapter`][1] tool execution that do not map
    /// to a more specific `DaemonError` variant. The wrapped
    /// `anyhow::Error` is flattened into a string on the wire via the
    /// `Display`/`#[source]` chain.
    ///
    /// Maps to JSON-RPC `-32603`.
    ///
    /// [1]: https://docs.rs/sqry-mcp/latest/sqry_mcp/daemon_adapter/index.html
    #[error("internal error: {0}")]
    Internal(#[source] anyhow::Error),

    // ── Task 9 U1 — lifecycle error variants ─────────────────────────────
    /// A sqryd process already holds the exclusive flock on `lock` and has
    /// written its PID to `pidfile`.  The caller should surface this to the
    /// user with the owner PID (if legible) and exit `EX_TEMPFAIL` (75).
    ///
    /// This error fires before [`IpcServer::bind`] and therefore before any
    /// workspace is registered; it should never be stored in the workspace
    /// `last_error` field.  [`crate::workspace::manager::clone_err`] maps it
    /// to `WorkspaceBuildFailed` as a defensive fallback.
    ///
    /// [`IpcServer::bind`]: crate::ipc::IpcServer
    #[error(
        "sqryd is already running (pid={}) on socket {} (lock: {})",
        owner_pid.map_or_else(|| "?".to_owned(), |p| p.to_string()),
        socket.display(),
        lock.display()
    )]
    AlreadyRunning {
        /// The IPC socket path that the running daemon owns.
        socket: PathBuf,
        /// The flock file that proves ownership.
        lock: PathBuf,
        /// PID of the owner process, if the pidfile was legible.
        owner_pid: Option<u32>,
    },

    /// The daemon did not become ready within `timeout_secs` seconds.
    /// Used by both the `--detach` parent wait loop and the
    /// `lifecycle::start_detached` auto-spawn helper (Task 10).
    ///
    /// Callers should exit `EX_UNAVAILABLE` (69).
    #[error(
        "daemon did not become ready within {timeout_secs}s on socket {}",
        socket.display()
    )]
    AutoStartTimeout {
        /// How long we waited.
        timeout_secs: u64,
        /// The socket we polled.
        socket: PathBuf,
    },

    /// Installing OS signal handlers failed (e.g. `sigaction` returned
    /// `ENOSYS` in a highly-restricted container, or tokio's signal
    /// registration failed).
    ///
    /// Callers should exit `EX_SOFTWARE` (70).
    #[error("failed to install signal handlers: {source}")]
    SignalSetup {
        #[source]
        source: std::io::Error,
    },
}

impl DaemonError {
    /// Map to the stable JSON-RPC error code used on the wire.
    ///
    /// Returns `None` for errors that have no public JSON-RPC code — these
    /// are serialised as `-32603 "Internal error"` per the JSON-RPC 2.0 spec
    /// at the IPC boundary (wired in Task 8).
    ///
    /// The Task 9 lifecycle variants (`AlreadyRunning`, `AutoStartTimeout`,
    /// `SignalSetup`) fire before `IpcServer::bind` so they never cross the
    /// IPC boundary directly; `None` is returned for them here.  They are
    /// only surfaced to human users via `exit_code()` and process exit.
    #[must_use]
    pub const fn jsonrpc_code(&self) -> Option<i32> {
        match self {
            Self::WorkspaceBuildFailed { .. } => Some(JSONRPC_WORKSPACE_BUILD_FAILED),
            Self::WorkspaceStaleExpired { .. } => Some(JSONRPC_WORKSPACE_STALE_EXPIRED),
            Self::MemoryBudgetExceeded { .. } => Some(JSONRPC_MEMORY_BUDGET_EXCEEDED),
            Self::WorkspaceEvicted { .. } | Self::WorkspaceNotLoaded { .. } => {
                Some(JSONRPC_WORKSPACE_EVICTED)
            }
            Self::ToolTimeout { .. } => Some(JSONRPC_TOOL_TIMEOUT),
            Self::InvalidArgument { .. } => Some(JSONRPC_INVALID_PARAMS),
            Self::Internal(_) => Some(JSONRPC_INTERNAL_ERROR),
            // Lifecycle errors don't cross the IPC boundary.
            Self::AlreadyRunning { .. }
            | Self::AutoStartTimeout { .. }
            | Self::SignalSetup { .. }
            | Self::Config { .. }
            | Self::Io(_) => None,
        }
    }

    /// Map to a POSIX process exit code following the BSD `sysexits.h`
    /// conventions used for daemon CLI errors (Task 9 U1).
    ///
    /// | Code | Symbol        | Semantics                                   |
    /// |------|---------------|---------------------------------------------|
    /// | 0    | `EX_OK`       | Success (not an error; included for completeness) |
    /// | 69   | `EX_UNAVAILABLE` | Service unavailable (timeout, not-ready)  |
    /// | 70   | `EX_SOFTWARE` | Internal software error                     |
    /// | 73   | `EX_CANTCREAT`| IO error / cannot create required file      |
    /// | 75   | `EX_TEMPFAIL` | Try again (e.g. another instance is running)|
    /// | 78   | `EX_CONFIG`   | Configuration error                         |
    ///
    /// For variants that only occur inside the IPC / workspace layer
    /// (not at process-startup time) the JSON-RPC code's sign-flipped
    /// magnitude is used as a proxy, falling back to `70` (`EX_SOFTWARE`)
    /// for anything not covered.
    #[must_use]
    pub const fn exit_code(&self) -> u8 {
        match self {
            // BSD sysexits.h (man 3 sysexits) exit codes for lifecycle errors.
            // 75 EX_TEMPFAIL: another process already owns the socket/lock.
            Self::AlreadyRunning { .. } => 75,
            // 69 EX_UNAVAILABLE: daemon didn't start in time.
            Self::AutoStartTimeout { .. } => 69,
            // 70 EX_SOFTWARE: internal OS-level failure (signal registration).
            Self::SignalSetup { .. } => 70,
            // 78 EX_CONFIG: malformed or unreadable config file.
            Self::Config { .. } => 78,
            // 73 EX_CANTCREAT: I/O failure (pidfile write, socket bind, etc.).
            Self::Io(_) => 73,
            // IPC-layer errors that escape to the CLI surface default to 70.
            Self::WorkspaceBuildFailed { .. }
            | Self::WorkspaceStaleExpired { .. }
            | Self::MemoryBudgetExceeded { .. }
            | Self::WorkspaceEvicted { .. }
            | Self::WorkspaceNotLoaded { .. }
            | Self::ToolTimeout { .. }
            | Self::InvalidArgument { .. }
            | Self::Internal(_) => 70,
        }
    }

    /// Build the `error.data` JSON payload surfaced alongside the JSON-RPC
    /// error code. Returns `None` when no structured payload should be
    /// attached (typically `Io`/`Config` errors routed through `-32603`).
    ///
    /// Task 8 Phase 8a. The IPC method dispatch consumes this to populate
    /// `JsonRpcError.data` so clients can render actionable diagnostics
    /// without parsing the free-form `message` string.
    #[must_use]
    pub fn error_data(&self) -> Option<serde_json::Value> {
        use serde_json::json;
        match self {
            Self::MemoryBudgetExceeded {
                limit_bytes,
                current_bytes,
                reserved_bytes,
                retained_bytes,
                requested_bytes,
            } => Some(json!({
                "limit_bytes": limit_bytes,
                "current_bytes": current_bytes,
                "reserved_bytes": reserved_bytes,
                "retained_bytes": retained_bytes,
                "requested_bytes": requested_bytes,
            })),
            Self::WorkspaceStaleExpired {
                root,
                age_hours,
                cap_hours,
                last_good_at,
                last_error,
            } => {
                // UTC-Zulu RFC3339 (`YYYY-MM-DDTHH:MM:SSZ`). `chrono` is
                // already a workspace dependency used throughout the repo
                // for RFC3339 rendering; `to_rfc3339_opts(Secs, true)`
                // emits the UTC-Zulu form required by Task 7.
                let last_good_rfc3339 = last_good_at.map(|t| {
                    chrono::DateTime::<chrono::Utc>::from(t)
                        .to_rfc3339_opts(chrono::SecondsFormat::Secs, true)
                });
                Some(json!({
                    "root": root,
                    "age_hours": age_hours,
                    "cap_hours": cap_hours,
                    "last_good_at": last_good_rfc3339,
                    "last_error": last_error,
                }))
            }
            Self::WorkspaceBuildFailed { root, reason } => Some(json!({
                "root": root,
                "reason": reason,
            })),
            Self::WorkspaceEvicted { root } => Some(json!({ "root": root })),
            Self::WorkspaceNotLoaded { root } => Some(json!({
                "root": root,
                "hint": "use daemon/load to load the workspace before calling daemon/rebuild",
            })),
            // Phase 8c §O canonical 4-key envelope
            // `{kind, retryable, retry_after_ms, details}` matching
            // standalone `sqry-mcp::rpc_error_to_mcp` shape so clients
            // can handle daemon-path and direct-path errors with a
            // single parser.
            Self::ToolTimeout {
                root,
                secs: _,
                deadline_ms,
            } => Some(json!({
                "kind": "deadline_exceeded",
                "retryable": true,
                "retry_after_ms": 1000,
                "details": {
                    // `tool` is `null` here; the MCP-path wrapper
                    // `daemon_err_to_mcp` (Phase 8c U8) populates it
                    // with the method name pulled from the inbound
                    // JSON-RPC request.
                    "tool": serde_json::Value::Null,
                    "deadline_ms": deadline_ms,
                    "root": root.display().to_string(),
                },
            })),
            Self::InvalidArgument { reason } => Some(json!({
                "kind": "validation_error",
                "retryable": false,
                "retry_after_ms": serde_json::Value::Null,
                "details": {
                    "reason": reason,
                },
            })),
            Self::Internal(_) => Some(json!({
                "kind": "internal",
                "retryable": false,
                "retry_after_ms": serde_json::Value::Null,
                "details": serde_json::Value::Null,
            })),
            Self::Io(_) | Self::Config { .. } => None,
            // Lifecycle errors don't cross the IPC boundary; no structured
            // payload is needed.
            Self::AlreadyRunning { .. }
            | Self::AutoStartTimeout { .. }
            | Self::SignalSetup { .. } => None,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn jsonrpc_code_covers_every_public_variant() {
        let mem = DaemonError::MemoryBudgetExceeded {
            limit_bytes: 2_048 * 1024 * 1024,
            current_bytes: 0,
            reserved_bytes: 0,
            retained_bytes: 0,
            requested_bytes: 4_096 * 1024 * 1024,
        };
        assert_eq!(mem.jsonrpc_code(), Some(JSONRPC_MEMORY_BUDGET_EXCEEDED));

        let stale = DaemonError::WorkspaceStaleExpired {
            root: PathBuf::from("/repo"),
            age_hours: 48,
            cap_hours: 24,
            last_good_at: None,
            last_error: None,
        };
        assert_eq!(stale.jsonrpc_code(), Some(JSONRPC_WORKSPACE_STALE_EXPIRED));

        let failed = DaemonError::WorkspaceBuildFailed {
            root: PathBuf::from("/repo"),
            reason: "plugin panic".into(),
        };
        assert_eq!(failed.jsonrpc_code(), Some(JSONRPC_WORKSPACE_BUILD_FAILED));

        let evicted = DaemonError::WorkspaceEvicted {
            root: PathBuf::from("/repo"),
        };
        assert_eq!(evicted.jsonrpc_code(), Some(JSONRPC_WORKSPACE_EVICTED));
    }

    #[test]
    fn jsonrpc_code_is_none_for_internal_variants() {
        let io = DaemonError::Io(std::io::Error::other("boom"));
        assert!(io.jsonrpc_code().is_none());

        let cfg = DaemonError::Config {
            path: PathBuf::from("/etc/sqry.toml"),
            source: anyhow::anyhow!("malformed"),
        };
        assert!(cfg.jsonrpc_code().is_none());
    }

    // -----------------------------------------------------------------
    // Task 8 Phase 8c U5 — Tool-dispatch error variants
    // -----------------------------------------------------------------
    //
    // These tests pin the stable wire contract defined in the design
    // doc §O for `ToolTimeout` / `InvalidArgument` / `Internal`. Any
    // change to the JSON-RPC codes or the `{kind, retryable,
    // retry_after_ms, details}` envelope shape will fail at least one
    // of these tests and force a matching update to the MCP-path
    // wrapper (`daemon_err_to_mcp`) so daemon-path and direct-path
    // MCP responses stay byte-identical.

    #[test]
    fn tool_timeout_has_jsonrpc_code_32000_and_deadline_exceeded_kind() {
        let err = DaemonError::ToolTimeout {
            root: PathBuf::from("/tmp/workspace"),
            secs: 60,
            deadline_ms: 60_000,
        };
        assert_eq!(err.jsonrpc_code(), Some(JSONRPC_TOOL_TIMEOUT));
        assert_eq!(err.jsonrpc_code(), Some(-32000));
        let data = err.error_data().expect("ToolTimeout must emit data");
        assert_eq!(data["kind"], "deadline_exceeded");
        assert_eq!(data["retryable"], true);
        assert_eq!(data["retry_after_ms"], 1000);
        assert_eq!(data["details"]["deadline_ms"], 60_000);
        assert_eq!(data["details"]["root"], "/tmp/workspace");
        // Placeholder for the MCP-path wrapper (Phase 8c U8) to
        // overwrite with the inbound method name.
        assert!(data["details"]["tool"].is_null());
    }

    #[test]
    fn invalid_argument_has_jsonrpc_code_32602_and_validation_error_kind() {
        let err = DaemonError::InvalidArgument {
            reason: "missing path argument".into(),
        };
        assert_eq!(err.jsonrpc_code(), Some(JSONRPC_INVALID_PARAMS));
        assert_eq!(err.jsonrpc_code(), Some(-32602));
        let data = err.error_data().expect("InvalidArgument must emit data");
        assert_eq!(data["kind"], "validation_error");
        assert_eq!(data["retryable"], false);
        assert!(data["retry_after_ms"].is_null());
        assert_eq!(data["details"]["reason"], "missing path argument");
    }

    #[test]
    fn internal_has_jsonrpc_code_32603_and_internal_kind() {
        let err = DaemonError::Internal(anyhow::anyhow!("something blew up"));
        assert_eq!(err.jsonrpc_code(), Some(JSONRPC_INTERNAL_ERROR));
        assert_eq!(err.jsonrpc_code(), Some(-32603));
        let data = err.error_data().expect("Internal must emit data");
        assert_eq!(data["kind"], "internal");
        assert_eq!(data["retryable"], false);
        assert!(data["retry_after_ms"].is_null());
        assert!(data["details"].is_null());
    }

    #[test]
    fn error_data_envelope_shape_is_canonical_for_tool_dispatch_variants() {
        // All 3 new Phase 8c U5 variants must emit EXACTLY the 4
        // canonical top-level keys and no others — this is the
        // contract documented in the design doc §O.3 and is what
        // the MCP-path wrapper relies on to avoid renaming / reshaping
        // fields.
        let expected: std::collections::BTreeSet<String> =
            ["kind", "retryable", "retry_after_ms", "details"]
                .iter()
                .map(|s| (*s).to_string())
                .collect();

        let errs = [
            DaemonError::ToolTimeout {
                root: PathBuf::from("/tmp"),
                secs: 10,
                deadline_ms: 10_000,
            },
            DaemonError::InvalidArgument { reason: "x".into() },
            DaemonError::Internal(anyhow::anyhow!("y")),
        ];
        for err in errs {
            let data = err.error_data().expect("variant must emit data");
            let obj = data
                .as_object()
                .expect("error_data envelope must be a JSON object");
            let keys: std::collections::BTreeSet<String> = obj.keys().cloned().collect();
            assert_eq!(
                keys, expected,
                "error_data envelope for {err:?} must be exactly the 4 canonical keys"
            );
        }
    }

    // -----------------------------------------------------------------
    // Task 9 U1 — DaemonError lifecycle variant tests
    // -----------------------------------------------------------------

    /// `AlreadyRunning` must have no JSON-RPC code (it never reaches the wire)
    /// and must exit with code 75 (`EX_TEMPFAIL`).
    #[test]
    fn already_running_has_no_jsonrpc_code_and_exit_75() {
        let err = DaemonError::AlreadyRunning {
            owner_pid: Some(12345),
            socket: PathBuf::from("/run/user/1000/sqryd.sock"),
            lock: PathBuf::from("/run/user/1000/sqryd.lock"),
        };
        assert!(
            err.jsonrpc_code().is_none(),
            "AlreadyRunning must not carry a JSON-RPC code"
        );
        assert_eq!(
            err.exit_code(),
            75,
            "AlreadyRunning must exit with EX_TEMPFAIL (75)"
        );
        assert!(
            err.error_data().is_none(),
            "AlreadyRunning must not carry IPC error_data"
        );
    }

    /// `AlreadyRunning` with `owner_pid = None` must render `pid=?` in Display.
    #[test]
    fn already_running_owner_pid_none_display_contains_pid_question_mark() {
        let err = DaemonError::AlreadyRunning {
            owner_pid: None,
            socket: PathBuf::from("/tmp/sqryd.sock"),
            lock: PathBuf::from("/tmp/sqryd.lock"),
        };
        assert_eq!(err.exit_code(), 75);
        assert!(err.jsonrpc_code().is_none());
        let msg = err.to_string();
        assert!(
            msg.contains("pid=?"),
            "Display for owner_pid=None must contain 'pid=?', got: {msg}"
        );
    }

    /// `AutoStartTimeout` must have no JSON-RPC code and must exit with code
    /// 69 (`EX_UNAVAILABLE`). The design doc iter-0 m5 explicitly changed this
    /// from 73 (`EX_CANTCREAT`) to 69 (`EX_UNAVAILABLE`) — this test pins that
    /// decision and guards against accidental reversion.
    #[test]
    fn auto_start_timeout_has_no_jsonrpc_code_and_exit_69_not_73() {
        let err = DaemonError::AutoStartTimeout {
            timeout_secs: 10,
            socket: PathBuf::from("/run/user/1000/sqryd.sock"),
        };
        assert!(
            err.jsonrpc_code().is_none(),
            "AutoStartTimeout must not carry a JSON-RPC code"
        );
        assert_eq!(
            err.exit_code(),
            69,
            "AutoStartTimeout must exit with EX_UNAVAILABLE (69), NOT EX_CANTCREAT (73)"
        );
        assert!(
            err.error_data().is_none(),
            "AutoStartTimeout must not carry IPC error_data"
        );
    }

    /// `SignalSetup` must have no JSON-RPC code and must exit with code 70
    /// (`EX_SOFTWARE`).
    #[test]
    fn signal_setup_has_no_jsonrpc_code_and_exit_70() {
        let err = DaemonError::SignalSetup {
            source: std::io::Error::other("SIGTERM handler failed"),
        };
        assert!(
            err.jsonrpc_code().is_none(),
            "SignalSetup must not carry a JSON-RPC code"
        );
        assert_eq!(
            err.exit_code(),
            70,
            "SignalSetup must exit with EX_SOFTWARE (70)"
        );
        assert!(
            err.error_data().is_none(),
            "SignalSetup must not carry IPC error_data"
        );
    }

    /// `Config` must exit with code 78 (`EX_CONFIG`).
    #[test]
    fn config_exits_with_78() {
        let err = DaemonError::Config {
            path: PathBuf::from("/etc/sqry/daemon.toml"),
            source: anyhow::anyhow!("invalid TOML"),
        };
        assert_eq!(err.exit_code(), 78, "Config must exit with EX_CONFIG (78)");
        assert!(err.jsonrpc_code().is_none());
    }

    /// `Io` must exit with code 73 (`EX_CANTCREAT`).
    #[test]
    fn io_error_exits_with_73() {
        let err = DaemonError::Io(std::io::Error::other("socket bind failed"));
        assert_eq!(err.exit_code(), 73, "Io must exit with EX_CANTCREAT (73)");
        assert!(err.jsonrpc_code().is_none());
    }

    /// All IPC-path variants must have a defined exit code of 70 (the
    /// `EX_SOFTWARE` default). They should never reach process exit, but the
    /// method must be exhaustive.
    #[test]
    fn ipc_path_variants_exit_with_70_default() {
        let cases: &[DaemonError] = &[
            DaemonError::WorkspaceBuildFailed {
                root: PathBuf::from("/repo"),
                reason: "build failed".into(),
            },
            DaemonError::WorkspaceStaleExpired {
                root: PathBuf::from("/repo"),
                age_hours: 48,
                cap_hours: 24,
                last_good_at: None,
                last_error: None,
            },
            DaemonError::MemoryBudgetExceeded {
                limit_bytes: 1024 * 1024 * 1024,
                current_bytes: 512 * 1024 * 1024,
                reserved_bytes: 0,
                retained_bytes: 0,
                requested_bytes: 4 * 1024 * 1024 * 1024,
            },
            DaemonError::WorkspaceEvicted {
                root: PathBuf::from("/repo"),
            },
            DaemonError::ToolTimeout {
                root: PathBuf::from("/tmp/ws"),
                secs: 60,
                deadline_ms: 60_000,
            },
            DaemonError::InvalidArgument {
                reason: "missing path".into(),
            },
            DaemonError::Internal(anyhow::anyhow!("internal error")),
        ];
        for err in cases {
            assert_eq!(
                err.exit_code(),
                70,
                "IPC-path variant {err:?} must default to EX_SOFTWARE (70)"
            );
        }
    }

    /// `clone_err` must handle all three Task 9 lifecycle variants without
    /// panicking. All three collapse to `WorkspaceBuildFailed` (matching the
    /// pattern for `Config`/`Io`) because they fire before `IpcServer::bind`
    /// and should never reach workspace state storage — but the collapse must
    /// preserve the human-readable message.
    #[test]
    fn clone_err_handles_lifecycle_variants_without_panic() {
        use crate::workspace::manager::clone_err;

        let ar = DaemonError::AlreadyRunning {
            owner_pid: Some(42),
            socket: PathBuf::from("/tmp/sqryd.sock"),
            lock: PathBuf::from("/tmp/sqryd.lock"),
        };
        let cloned = clone_err(&ar);
        assert!(
            cloned.to_string().contains("sqryd.sock"),
            "clone_err for AlreadyRunning must preserve socket path, got: {cloned}"
        );

        // Must not panic with owner_pid=None.
        let ar_none = DaemonError::AlreadyRunning {
            owner_pid: None,
            socket: PathBuf::from("/tmp/sqryd.sock"),
            lock: PathBuf::from("/tmp/sqryd.lock"),
        };
        let _ = clone_err(&ar_none);

        let at = DaemonError::AutoStartTimeout {
            timeout_secs: 15,
            socket: PathBuf::from("/run/user/1000/sqryd.sock"),
        };
        let cloned = clone_err(&at);
        assert!(
            cloned.to_string().contains("15"),
            "clone_err for AutoStartTimeout must preserve timeout_secs, got: {cloned}"
        );

        let ss = DaemonError::SignalSetup {
            source: std::io::Error::other("SIGTERM handler failed"),
        };
        let cloned = clone_err(&ss);
        assert!(
            cloned.to_string().contains("SIGTERM handler failed"),
            "clone_err for SignalSetup must preserve the source message via Display, got: {cloned}"
        );
    }

    #[test]
    fn clone_err_round_trips_tool_dispatch_variants() {
        // `clone_err` lives in `workspace::manager` so it can be used
        // by `classify_for_serve` to reproduce the stored
        // `last_error` on every read path. The helper is
        // `pub(crate)` so we exercise it directly from inside the
        // daemon crate — Phase 8c U5 must keep all new variants
        // round-trippable or `classify_for_serve` will collapse them
        // into the generic `WorkspaceBuildFailed` fallback.
        use crate::workspace::manager::clone_err;

        let tt = DaemonError::ToolTimeout {
            root: PathBuf::from("/tmp/workspace"),
            secs: 60,
            deadline_ms: 60_000,
        };
        let cloned = clone_err(&tt);
        match cloned {
            DaemonError::ToolTimeout {
                root,
                secs,
                deadline_ms,
            } => {
                assert_eq!(root, PathBuf::from("/tmp/workspace"));
                assert_eq!(secs, 60);
                assert_eq!(deadline_ms, 60_000);
            }
            other => panic!("expected ToolTimeout round-trip, got {other:?}"),
        }

        let ia = DaemonError::InvalidArgument {
            reason: "missing path argument".into(),
        };
        let cloned = clone_err(&ia);
        match cloned {
            DaemonError::InvalidArgument { reason } => {
                assert_eq!(reason, "missing path argument");
            }
            other => panic!("expected InvalidArgument round-trip, got {other:?}"),
        }

        let inner = DaemonError::Internal(anyhow::anyhow!("something blew up"));
        let cloned = clone_err(&inner);
        match cloned {
            DaemonError::Internal(err) => {
                // `anyhow::Error` is not `Clone`; `clone_err`
                // re-creates it from the `Display` representation so
                // the user-facing message survives round-trips.
                assert!(
                    err.to_string().contains("something blew up"),
                    "cloned Internal error must preserve the Display text, got: {err}"
                );
            }
            other => panic!("expected Internal round-trip, got {other:?}"),
        }
    }
}