Skip to main content

inferd_daemon/
status.rs

1//! Daemon-wide status events.
2//!
3//! Published by bring-up code (fetch, lifecycle, etc.) through a
4//! `tokio::sync::broadcast` channel. The admin socket subscribes
5//! and serialises each event into the `Status` frame shape
6//! documented in `docs/protocol-v1.md` §"Admin socket".
7//!
8//! Frame shape on the wire (NDJSON):
9//!
10//! ```json
11//! {"id":"admin","type":"status","status":"loading_model",
12//!  "phase":"download",
13//!  "detail":{"downloaded_bytes":33554432,"total_bytes":5126304928,
14//!            "source_url":"https://huggingface.co/..."}}
15//! ```
16//!
17//! These types are the in-process representation; serialisation is
18//! done by `admin.rs` via `serde_json` into the wire shape above.
19
20use serde::Serialize;
21use std::path::PathBuf;
22
23/// One daemon-wide status event. Published through a broadcast channel
24/// to admin-socket subscribers.
25#[derive(Debug, Clone, Serialize)]
26#[serde(tag = "status", rename_all = "snake_case")]
27pub enum StatusEvent {
28    /// Daemon process is up; admin socket is bound. No backend work yet.
29    Starting,
30    /// Model is being prepared. Carries a `phase` describing the
31    /// sub-stage and (for download) progress numbers.
32    LoadingModel {
33        /// Sub-stage of model bring-up. Flattened on the wire so the
34        /// frame reads
35        /// `{"status":"loading_model","phase":"download","downloaded_bytes":...}`
36        /// rather than nesting the phase under another object.
37        #[serde(flatten)]
38        phase: LoadPhase,
39    },
40    /// Backend capability snapshot — emitted once after the backend is
41    /// constructed and before `Ready`. Lets admin subscribers (e.g.
42    /// `inferd doctor`, IDE plugins) discover hardware-acceleration
43    /// posture, multimodal support, and tools / thinking support
44    /// without trial-and-error. Backwards-additive on the admin wire
45    /// (older subscribers ignore unknown `status` values).
46    Capabilities {
47        /// Backend identifier (`"llamacpp"`, `"mock"`, …).
48        backend: String,
49        /// `true` if the backend implements `generate_v2`.
50        v2: bool,
51        /// `true` if the backend can ingest image attachments.
52        vision: bool,
53        /// `true` if the backend can ingest audio attachments.
54        audio: bool,
55        /// `true` if the backend natively supports tool-use.
56        tools: bool,
57        /// `true` if the backend separates `<|think|>` reasoning
58        /// trace from user-visible output.
59        thinking: bool,
60        /// `true` if the backend implements `embed` (per ADR 0017).
61        /// Subscribers use this to decide whether to expose embedding
62        /// surfaces in their UIs.
63        embed: bool,
64        /// Compile-time GGML accelerator: `"cpu"` / `"cuda"` / `"metal"`
65        /// / `"vulkan"` / `"rocm"`.
66        accelerator: String,
67        /// Layers offloaded to the accelerator at runtime. 0 means
68        /// CPU-only regardless of `accelerator`.
69        gpu_layers: u32,
70    },
71    /// Inference socket is bound and accepting connections.
72    Ready,
73    /// Previously-`ready` daemon is reloading. Inference socket has
74    /// closed; new connections refused. Carries the same `phase` enum
75    /// as `LoadingModel` so subscribers can show progress for the
76    /// reload.
77    Restarting {
78        /// Sub-stage of the restart.
79        #[serde(flatten)]
80        phase: LoadPhase,
81    },
82    /// Daemon received a shutdown signal. Existing requests finish;
83    /// new requests rejected. Daemon will exit shortly after this
84    /// frame.
85    Draining,
86}
87
88/// Sub-stage of a model load (or reload).
89#[derive(Debug, Clone, Serialize)]
90#[serde(tag = "phase", rename_all = "snake_case")]
91pub enum LoadPhase {
92    /// Resolving model path on disk and checking SHA-256.
93    CheckingLocal {
94        /// Absolute path being checked.
95        path: PathBuf,
96    },
97    /// Downloading the GGUF. Progress events emit periodically.
98    Download {
99        /// Bytes received so far.
100        downloaded_bytes: u64,
101        /// Total bytes if known (Content-Length or config), else `None`.
102        total_bytes: Option<u64>,
103        /// URL being fetched (for diagnostics only; never echoed to
104        /// inference clients — admin socket only).
105        source_url: String,
106    },
107    /// Streaming SHA-256 over downloaded bytes for final verification.
108    Verify {
109        /// Path being verified.
110        path: PathBuf,
111    },
112    /// Downloaded bytes failed SHA verification; file moved aside.
113    /// Daemon will retry or exit per `auto_pull` config.
114    Quarantine {
115        /// Original path of the bad file.
116        path: PathBuf,
117        /// What the config said the SHA should be.
118        expected_sha256: String,
119        /// What we actually computed.
120        actual_sha256: String,
121        /// Where the bad bytes were moved.
122        quarantine_path: PathBuf,
123    },
124    /// Loading the file into the engine via FFI.
125    Mmap {
126        /// Path being mmapped.
127        path: PathBuf,
128    },
129    /// Allocating the KV cache.
130    KvCache {
131        /// Configured context window in tokens.
132        n_ctx: u32,
133    },
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139
140    #[test]
141    fn starting_serialises_as_simple_status() {
142        let s = serde_json::to_value(StatusEvent::Starting).unwrap();
143        assert_eq!(s["status"], "starting");
144    }
145
146    #[test]
147    fn ready_serialises_as_simple_status() {
148        let s = serde_json::to_value(StatusEvent::Ready).unwrap();
149        assert_eq!(s["status"], "ready");
150    }
151
152    #[test]
153    fn loading_model_download_carries_progress() {
154        let ev = StatusEvent::LoadingModel {
155            phase: LoadPhase::Download {
156                downloaded_bytes: 33_554_432,
157                total_bytes: Some(5_126_304_928),
158                source_url: "https://example.com/x.gguf".into(),
159            },
160        };
161        let s = serde_json::to_value(ev).unwrap();
162        assert_eq!(s["status"], "loading_model");
163        assert_eq!(s["phase"], "download");
164        assert_eq!(s["downloaded_bytes"], 33_554_432);
165        assert_eq!(s["total_bytes"], 5_126_304_928u64);
166        assert_eq!(s["source_url"], "https://example.com/x.gguf");
167    }
168
169    #[test]
170    fn loading_model_checking_local_carries_path() {
171        let ev = StatusEvent::LoadingModel {
172            phase: LoadPhase::CheckingLocal {
173                path: PathBuf::from("/tmp/x.gguf"),
174            },
175        };
176        let s = serde_json::to_value(ev).unwrap();
177        assert_eq!(s["status"], "loading_model");
178        assert_eq!(s["phase"], "checking_local");
179        // Path serialises platform-natively; on Unix this is "/tmp/x.gguf".
180        assert!(s["path"].is_string());
181    }
182
183    #[test]
184    fn quarantine_carries_both_hashes() {
185        let ev = StatusEvent::LoadingModel {
186            phase: LoadPhase::Quarantine {
187                path: PathBuf::from("/tmp/x.gguf"),
188                expected_sha256: "aaaa".into(),
189                actual_sha256: "bbbb".into(),
190                quarantine_path: PathBuf::from("/tmp/x.gguf.quarantine.20260518T000000Z"),
191            },
192        };
193        let s = serde_json::to_value(ev).unwrap();
194        assert_eq!(s["phase"], "quarantine");
195        assert_eq!(s["expected_sha256"], "aaaa");
196        assert_eq!(s["actual_sha256"], "bbbb");
197    }
198}