inferd_daemon/status.rs
1//! Daemon-wide status events.
2//!
3//! Published by bring-up code (fetch, lifecycle, etc.) through a
4//! `tokio::sync::broadcast` channel. The admin socket subscribes
5//! and serialises each event into the `Status` frame shape
6//! documented in `docs/protocol-v1.md` §"Admin socket".
7//!
8//! Frame shape on the wire (NDJSON):
9//!
10//! ```json
11//! {"id":"admin","type":"status","status":"loading_model",
12//! "phase":"download",
13//! "detail":{"downloaded_bytes":33554432,"total_bytes":5126304928,
14//! "source_url":"https://huggingface.co/..."}}
15//! ```
16//!
17//! These types are the in-process representation; serialisation is
18//! done by `admin.rs` via `serde_json` into the wire shape above.
19
20use serde::Serialize;
21use std::path::PathBuf;
22
23/// One daemon-wide status event. Published through a broadcast channel
24/// to admin-socket subscribers.
25#[derive(Debug, Clone, Serialize)]
26#[serde(tag = "status", rename_all = "snake_case")]
27pub enum StatusEvent {
28 /// Daemon process is up; admin socket is bound. No backend work yet.
29 Starting,
30 /// Model is being prepared. Carries a `phase` describing the
31 /// sub-stage and (for download) progress numbers.
32 LoadingModel {
33 /// Sub-stage of model bring-up. Flattened on the wire so the
34 /// frame reads
35 /// `{"status":"loading_model","phase":"download","downloaded_bytes":...}`
36 /// rather than nesting the phase under another object.
37 #[serde(flatten)]
38 phase: LoadPhase,
39 },
40 /// Backend capability snapshot — emitted once after the backend is
41 /// constructed and before `Ready`. Lets admin subscribers (e.g.
42 /// `inferd doctor`, IDE plugins) discover hardware-acceleration
43 /// posture, multimodal support, and tools / thinking support
44 /// without trial-and-error. Backwards-additive on the admin wire
45 /// (older subscribers ignore unknown `status` values).
46 Capabilities {
47 /// Backend identifier (`"llamacpp"`, `"mock"`, …).
48 backend: String,
49 /// `true` if the backend implements `generate_v2`.
50 v2: bool,
51 /// `true` if the backend can ingest image attachments.
52 vision: bool,
53 /// `true` if the backend can ingest audio attachments.
54 audio: bool,
55 /// `true` if the backend natively supports tool-use.
56 tools: bool,
57 /// `true` if the backend separates `<|think|>` reasoning
58 /// trace from user-visible output.
59 thinking: bool,
60 /// `true` if the backend implements `embed` (per ADR 0017).
61 /// Subscribers use this to decide whether to expose embedding
62 /// surfaces in their UIs.
63 embed: bool,
64 /// Compile-time GGML accelerator: `"cpu"` / `"cuda"` / `"metal"`
65 /// / `"vulkan"` / `"rocm"`.
66 accelerator: String,
67 /// Layers offloaded to the accelerator at runtime. 0 means
68 /// CPU-only regardless of `accelerator`.
69 gpu_layers: u32,
70 },
71 /// Inference socket is bound and accepting connections.
72 Ready,
73 /// Previously-`ready` daemon is reloading. Inference socket has
74 /// closed; new connections refused. Carries the same `phase` enum
75 /// as `LoadingModel` so subscribers can show progress for the
76 /// reload.
77 Restarting {
78 /// Sub-stage of the restart.
79 #[serde(flatten)]
80 phase: LoadPhase,
81 },
82 /// Daemon received a shutdown signal. Existing requests finish;
83 /// new requests rejected. Daemon will exit shortly after this
84 /// frame.
85 Draining,
86}
87
88/// Sub-stage of a model load (or reload).
89#[derive(Debug, Clone, Serialize)]
90#[serde(tag = "phase", rename_all = "snake_case")]
91pub enum LoadPhase {
92 /// Resolving model path on disk and checking SHA-256.
93 CheckingLocal {
94 /// Absolute path being checked.
95 path: PathBuf,
96 },
97 /// Downloading the GGUF. Progress events emit periodically.
98 Download {
99 /// Bytes received so far.
100 downloaded_bytes: u64,
101 /// Total bytes if known (Content-Length or config), else `None`.
102 total_bytes: Option<u64>,
103 /// URL being fetched (for diagnostics only; never echoed to
104 /// inference clients — admin socket only).
105 source_url: String,
106 },
107 /// Streaming SHA-256 over downloaded bytes for final verification.
108 Verify {
109 /// Path being verified.
110 path: PathBuf,
111 },
112 /// Downloaded bytes failed SHA verification; file moved aside.
113 /// Daemon will retry or exit per `auto_pull` config.
114 Quarantine {
115 /// Original path of the bad file.
116 path: PathBuf,
117 /// What the config said the SHA should be.
118 expected_sha256: String,
119 /// What we actually computed.
120 actual_sha256: String,
121 /// Where the bad bytes were moved.
122 quarantine_path: PathBuf,
123 },
124 /// Loading the file into the engine via FFI.
125 Mmap {
126 /// Path being mmapped.
127 path: PathBuf,
128 },
129 /// Allocating the KV cache.
130 KvCache {
131 /// Configured context window in tokens.
132 n_ctx: u32,
133 },
134}
135
136#[cfg(test)]
137mod tests {
138 use super::*;
139
140 #[test]
141 fn starting_serialises_as_simple_status() {
142 let s = serde_json::to_value(StatusEvent::Starting).unwrap();
143 assert_eq!(s["status"], "starting");
144 }
145
146 #[test]
147 fn ready_serialises_as_simple_status() {
148 let s = serde_json::to_value(StatusEvent::Ready).unwrap();
149 assert_eq!(s["status"], "ready");
150 }
151
152 #[test]
153 fn loading_model_download_carries_progress() {
154 let ev = StatusEvent::LoadingModel {
155 phase: LoadPhase::Download {
156 downloaded_bytes: 33_554_432,
157 total_bytes: Some(5_126_304_928),
158 source_url: "https://example.com/x.gguf".into(),
159 },
160 };
161 let s = serde_json::to_value(ev).unwrap();
162 assert_eq!(s["status"], "loading_model");
163 assert_eq!(s["phase"], "download");
164 assert_eq!(s["downloaded_bytes"], 33_554_432);
165 assert_eq!(s["total_bytes"], 5_126_304_928u64);
166 assert_eq!(s["source_url"], "https://example.com/x.gguf");
167 }
168
169 #[test]
170 fn loading_model_checking_local_carries_path() {
171 let ev = StatusEvent::LoadingModel {
172 phase: LoadPhase::CheckingLocal {
173 path: PathBuf::from("/tmp/x.gguf"),
174 },
175 };
176 let s = serde_json::to_value(ev).unwrap();
177 assert_eq!(s["status"], "loading_model");
178 assert_eq!(s["phase"], "checking_local");
179 // Path serialises platform-natively; on Unix this is "/tmp/x.gguf".
180 assert!(s["path"].is_string());
181 }
182
183 #[test]
184 fn quarantine_carries_both_hashes() {
185 let ev = StatusEvent::LoadingModel {
186 phase: LoadPhase::Quarantine {
187 path: PathBuf::from("/tmp/x.gguf"),
188 expected_sha256: "aaaa".into(),
189 actual_sha256: "bbbb".into(),
190 quarantine_path: PathBuf::from("/tmp/x.gguf.quarantine.20260518T000000Z"),
191 },
192 };
193 let s = serde_json::to_value(ev).unwrap();
194 assert_eq!(s["phase"], "quarantine");
195 assert_eq!(s["expected_sha256"], "aaaa");
196 assert_eq!(s["actual_sha256"], "bbbb");
197 }
198}