inferd_daemon/config.rs
1//! Daemon CLI configuration.
2//!
3//! M1 keeps the CLI surface deliberately small: one transport choice
4//! (`--tcp` or `--uds`), a lock path, a backend selector, and a queue
5//! depth. The operator-flag matrix expands in M4 along with packaging.
6
7use clap::{Parser, ValueEnum};
8use std::path::PathBuf;
9
10/// Backend adapters the daemon can register at startup.
11///
12/// `LlamaCpp` is gated behind the `llamacpp` cargo feature — default
13/// daemon builds only ship the mock adapter (per ADR 0006: lean core,
14/// extensions are separate concerns). `OpenAiCompat` is gated behind
15/// the `openai` cargo feature — pulled in only when the operator
16/// wants the outbound HTTPS adapter (ADR 0006 cloud carve-out).
17#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
18pub enum BackendKind {
19 /// Deterministic test double — used by integration tests and the
20 /// M1 echo daemon.
21 Mock,
22 /// Local llama.cpp backend via FFI (M2). Requires `--model-path`.
23 #[cfg(feature = "llamacpp")]
24 Llamacpp,
25 /// OpenAI-compatible outbound HTTPS adapter (Phase 5A). Reaches
26 /// any provider speaking the `/v1/chat/completions` wire (OpenAI,
27 /// vLLM, LM Studio, LocalAI, OpenRouter, llama.cpp's HTTP server).
28 /// Requires `--openai-base-url` + `--openai-model`. The API key
29 /// is read from `--openai-api-key` or env (`INFERD_OPENAI_API_KEY`
30 /// then `OPENAI_API_KEY`); pass an empty string to skip the
31 /// `Authorization` header for self-hosted endpoints.
32 #[cfg(feature = "openai")]
33 OpenaiCompat,
34 /// AWS Bedrock-runtime `InvokeModelWithResponseStream` adapter
35 /// (Phase 6B-5). v0.2.0 ships only the Anthropic-on-Bedrock body
36 /// shape — Claude models invoked via Bedrock's pinned
37 /// `anthropic_version: "bedrock-2023-05-31"` payload. Requires
38 /// `--bedrock-region` + `--bedrock-model-id`. Auth resolves from
39 /// `--bedrock-bearer-token` / `AWS_BEARER_TOKEN_BEDROCK` first,
40 /// then the standard `AWS_ACCESS_KEY_ID` /
41 /// `AWS_SECRET_ACCESS_KEY` (+ optional `AWS_SESSION_TOKEN`)
42 /// chain.
43 #[cfg(feature = "bedrock")]
44 BedrockInvoke,
45}
46
47/// Top-level CLI for `inferd-daemon`.
48#[derive(Debug, Parser)]
49#[command(name = "inferd-daemon", version, about = "Local inference daemon")]
50pub struct Cli {
51 /// Backend to load at startup.
52 ///
53 /// When omitted: defer to the config file's `backends:` (or legacy
54 /// `model:` block) if one is present; otherwise fall back to the
55 /// in-memory `mock` backend so `--lock + --tcp/--uds/--pipe` alone
56 /// still boots a dev-mode echo daemon.
57 ///
58 /// When explicit: honour the CLI choice. Passing `--backend mock`
59 /// short-circuits config loading (useful for forcing mock in test
60 /// rigs even when a config file is on disk); any other explicit
61 /// kind is built from CLI flags only — config-file `backends:` are
62 /// ignored in that case so operators get exactly what they asked
63 /// for.
64 #[arg(long, value_enum, env = "INFERD_BACKEND")]
65 pub backend: Option<BackendKind>,
66
67 /// Path to the single-instance lock file. The lock is held for the
68 /// lifetime of the daemon process.
69 #[arg(long, env = "INFERD_LOCK")]
70 pub lock: PathBuf,
71
72 /// Loopback TCP bind address. Mutually exclusive with `--uds` and `--pipe`.
73 #[arg(long, env = "INFERD_TCP", conflicts_with_all = ["uds", "pipe"])]
74 pub tcp: Option<String>,
75
76 /// Unix domain socket path. Mutually exclusive with `--tcp` and `--pipe`. Unix only.
77 #[arg(long, env = "INFERD_UDS", conflicts_with_all = ["tcp", "pipe"])]
78 pub uds: Option<PathBuf>,
79
80 /// Windows named pipe path (e.g. `\\.\pipe\inferd-infer`).
81 /// Mutually exclusive with `--tcp` and `--uds`. Windows only.
82 #[arg(long, env = "INFERD_PIPE", conflicts_with_all = ["tcp", "uds"])]
83 pub pipe: Option<String>,
84
85 /// Group name for the UDS (Unix only). Ignored on other transports.
86 #[arg(long, env = "INFERD_GROUP")]
87 pub group: Option<String>,
88
89 /// Active generations served concurrently. v0.1 invariant is 1; values
90 /// above 1 are reserved for v0.2 continuous-batching backends.
91 #[arg(long, default_value_t = 1, env = "INFERD_ACTIVE_PERMITS")]
92 pub active_permits: usize,
93
94 /// Maximum waiting queue depth. Submits beyond this return
95 /// `code: queue_full` immediately.
96 #[arg(long, default_value_t = 10, env = "INFERD_QUEUE_DEPTH")]
97 pub queue_depth: usize,
98
99 /// Seconds to wait for the backend to report ready before failing
100 /// startup.
101 #[arg(long, default_value_t = 30, env = "INFERD_READY_TIMEOUT_SECS")]
102 pub ready_timeout_secs: u64,
103
104 /// Path to the GGUF model file. Required when `--backend llamacpp`.
105 #[arg(long, env = "INFERD_MODEL_PATH")]
106 pub model_path: Option<PathBuf>,
107
108 /// Optional expected SHA-256 of the model file as a hex string
109 /// (64 chars). When present, the daemon verifies the file before
110 /// loading via `subtle::ConstantTimeEq` (THREAT_MODEL F-5).
111 #[arg(long, env = "INFERD_MODEL_SHA256")]
112 pub model_sha256: Option<String>,
113
114 /// Llama.cpp context window in tokens. Default 8192.
115 #[arg(long, default_value_t = 8192, env = "INFERD_N_CTX")]
116 pub n_ctx: u32,
117
118 /// Llama.cpp GPU layer offload count. 0 = CPU-only. GPU support
119 /// requires the `cuda`/`metal`/`vulkan`/`rocm` cargo feature at
120 /// build time.
121 #[arg(long, default_value_t = 0, env = "INFERD_N_GPU_LAYERS")]
122 pub n_gpu_layers: i32,
123
124 /// Base URL of the upstream OpenAI-compat endpoint, no trailing
125 /// slash and no path (the adapter appends `/v1/chat/completions`).
126 /// Required when `--backend openai-compat`. Examples:
127 /// `https://api.openai.com`, `http://localhost:11434`,
128 /// `https://openrouter.ai`.
129 #[arg(long, env = "INFERD_OPENAI_BASE_URL")]
130 pub openai_base_url: Option<String>,
131
132 /// Bearer token for the OpenAI-compat upstream. Sent as
133 /// `Authorization: Bearer <value>`. Pass an empty string to skip
134 /// the header entirely for self-hosted endpoints. Resolves from
135 /// `--openai-api-key`, then `INFERD_OPENAI_API_KEY`, then
136 /// `OPENAI_API_KEY` (the de-facto env name most providers' SDKs
137 /// already use).
138 #[arg(long, env = "INFERD_OPENAI_API_KEY", hide_env_values = true)]
139 pub openai_api_key: Option<String>,
140
141 /// Upstream model identifier echoed in the request `model` field
142 /// — provider-specific (e.g. `gpt-4o-mini`, `llama3.1:8b`,
143 /// `meta-llama/Meta-Llama-3-70B-Instruct`). Required when
144 /// `--backend openai-compat`.
145 #[arg(long, env = "INFERD_OPENAI_MODEL")]
146 pub openai_model: Option<String>,
147
148 /// Total request timeout for OpenAI-compat calls, in seconds.
149 /// Default 300 (5 minutes) — long enough for a slow first-token
150 /// from a cold cloud model, short enough to surface stuck
151 /// requests rather than hang forever.
152 #[arg(long, default_value_t = 300, env = "INFERD_OPENAI_TIMEOUT_SECS")]
153 pub openai_timeout_secs: u64,
154
155 /// AWS region the Bedrock endpoint lives in, e.g. `us-east-1`,
156 /// `eu-central-1`. Required when `--backend bedrock-invoke`.
157 /// Used for both the endpoint host and SigV4 signing scope.
158 #[arg(long, env = "INFERD_BEDROCK_REGION")]
159 pub bedrock_region: Option<String>,
160
161 /// Bedrock model id (URL-encoded by the adapter), e.g.
162 /// `anthropic.claude-3-5-sonnet-20241022-v2:0`. Required when
163 /// `--backend bedrock-invoke`.
164 #[arg(long, env = "INFERD_BEDROCK_MODEL_ID")]
165 pub bedrock_model_id: Option<String>,
166
167 /// Pre-issued Bedrock bearer token (`AWS_BEARER_TOKEN_BEDROCK`
168 /// shape, AWS rolled this out in 2025-06). When set, the adapter
169 /// sends `Authorization: Bearer <value>` and skips SigV4. When
170 /// unset, the adapter falls back to the standard
171 /// `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` (+ optional
172 /// `AWS_SESSION_TOKEN`) chain via SigV4 signing.
173 #[arg(long, env = "AWS_BEARER_TOKEN_BEDROCK", hide_env_values = true)]
174 pub bedrock_bearer_token: Option<String>,
175
176 /// Override the Bedrock endpoint host. Empty/absent → default
177 /// `bedrock-runtime.<region>.amazonaws.com`. Useful for VPC
178 /// endpoints / integration tests.
179 #[arg(long, env = "INFERD_BEDROCK_ENDPOINT")]
180 pub bedrock_endpoint: Option<String>,
181
182 /// Total request timeout for Bedrock calls, in seconds. Default
183 /// 300 (5 minutes).
184 #[arg(long, default_value_t = 300, env = "INFERD_BEDROCK_TIMEOUT_SECS")]
185 pub bedrock_timeout_secs: u64,
186
187 /// Optional pre-shared API key. When set, TCP clients MUST send
188 /// `{"type":"auth","key":"<this value>"}` as their first NDJSON
189 /// frame on the connection or the daemon closes the connection.
190 /// UDS and named-pipe transports ignore this — kernel-attested
191 /// peer credentials (F-7) do the work there.
192 ///
193 /// Comparison is constant-time. THREAT_MODEL F-8.
194 #[arg(long, env = "INFERD_API_KEY", hide_env_values = true)]
195 pub api_key: Option<String>,
196
197 /// Path to the operator JSON config file. Default
198 /// `~/.inferd/config.json`. When present, fetch + auto-pull are
199 /// driven from it; CLI flags (`--model-path`, `--model-sha256`,
200 /// `--n-ctx`, `--n-gpu-layers`) override config-file values when
201 /// both are supplied. When absent, the daemon falls back to
202 /// CLI-flag-only operation (dev mode).
203 #[arg(long, env = "INFERD_CONFIG")]
204 pub config: Option<PathBuf>,
205
206 /// Admin endpoint path. Defaults per-platform to the path
207 /// documented in `docs/protocol-v1.md` §"Admin endpoint" — e.g.
208 /// `/run/inferd/admin.sock` on Linux, `\\.\pipe\inferd-admin` on
209 /// Windows. Override for tests / non-default deployments.
210 #[arg(long, env = "INFERD_ADMIN_ADDR")]
211 pub admin_addr: Option<PathBuf>,
212
213 /// Enable the v2 inference endpoint per ADR 0015. v2 binds on a
214 /// *separate* socket from v1: `infer.v2.sock` on Unix /
215 /// `\\.\pipe\inferd-infer-v2` on Windows. v1 stays on its own
216 /// socket and is unaffected.
217 ///
218 /// Phase 1B: the v2 endpoint accepts and validates v2 requests
219 /// but returns `Error{code:internal, message:"v2 generation not
220 /// implemented"}` because the Backend trait does not yet expose
221 /// `generate_v2`. Use this to integration-test middleware that
222 /// will speak v2 once Phase 2A lands.
223 #[arg(long, env = "INFERD_V2")]
224 pub v2: bool,
225
226 /// Override the default v2 inference endpoint path.
227 /// Mirrors `--uds` / `--pipe` for v2; on Linux/macOS this is a
228 /// UDS path, on Windows a named-pipe path. Has no effect unless
229 /// `--v2` is also set.
230 #[arg(long, env = "INFERD_V2_ADDR")]
231 pub v2_addr: Option<PathBuf>,
232
233 /// Loopback TCP bind address for the v2 endpoint. Mutually
234 /// exclusive with `--v2-addr`. Useful for tests that don't want
235 /// the platform default (UDS / named pipe). Has no effect
236 /// unless `--v2` is also set.
237 #[arg(long, env = "INFERD_V2_TCP", conflicts_with = "v2_addr")]
238 pub v2_tcp: Option<String>,
239
240 /// Enable the embed inference endpoint per ADR 0017. The embed
241 /// endpoint binds on a *separate* socket from v1/v2:
242 /// `infer.embed.sock` on Unix / `\\.\pipe\inferd-infer-embed`
243 /// on Windows. Has no effect unless the active backend's
244 /// `capabilities().embed` is true (capability-driven binding).
245 #[arg(long, env = "INFERD_EMBED")]
246 pub embed: bool,
247
248 /// Override the default embed inference endpoint path.
249 /// Mirrors `--uds` / `--pipe` for embed; on Linux/macOS this is
250 /// a UDS path, on Windows a named-pipe path. Has no effect
251 /// unless `--embed` is also set.
252 #[arg(long, env = "INFERD_EMBED_ADDR")]
253 pub embed_addr: Option<PathBuf>,
254
255 /// Loopback TCP bind address for the embed endpoint. Mutually
256 /// exclusive with `--embed-addr`. Has no effect unless `--embed`
257 /// is also set.
258 #[arg(long, env = "INFERD_EMBED_TCP", conflicts_with = "embed_addr")]
259 pub embed_tcp: Option<String>,
260}
261
262impl Cli {
263 /// Validate that exactly one transport is selected. clap enforces
264 /// mutual exclusion; this checks the at-least-one part.
265 pub fn require_one_transport(&self) -> Result<(), &'static str> {
266 let count = [self.tcp.is_some(), self.uds.is_some(), self.pipe.is_some()]
267 .iter()
268 .filter(|b| **b)
269 .count();
270 match count {
271 1 => Ok(()),
272 0 => Err("must specify one of --tcp, --uds, --pipe"),
273 _ => Err("--tcp, --uds, --pipe are mutually exclusive"),
274 }
275 }
276}
277
278#[cfg(test)]
279mod tests {
280 use super::*;
281 use clap::CommandFactory;
282
283 #[test]
284 fn cli_parses_minimum_required() {
285 let cli = Cli::parse_from([
286 "inferd-daemon",
287 "--lock",
288 "/tmp/inferd.lock",
289 "--tcp",
290 "127.0.0.1:0",
291 ]);
292 assert!(cli.tcp.is_some());
293 assert!(cli.uds.is_none());
294 assert_eq!(cli.queue_depth, 10);
295 assert_eq!(cli.active_permits, 1);
296 cli.require_one_transport().unwrap();
297 }
298
299 #[test]
300 fn cli_rejects_no_transport() {
301 let cli = Cli::parse_from(["inferd-daemon", "--lock", "/tmp/inferd.lock"]);
302 assert!(cli.require_one_transport().is_err());
303 }
304
305 #[test]
306 fn cli_rejects_both_transports_via_clap() {
307 // clap-level mutual exclusion: this should fail to parse, not
308 // require_one_transport's runtime check.
309 let result = Cli::try_parse_from([
310 "inferd-daemon",
311 "--lock",
312 "/tmp/inferd.lock",
313 "--tcp",
314 "127.0.0.1:0",
315 "--uds",
316 "/tmp/inferd.sock",
317 ]);
318 assert!(result.is_err());
319 }
320
321 #[test]
322 fn cli_accepts_pipe_transport() {
323 let cli = Cli::parse_from([
324 "inferd-daemon",
325 "--lock",
326 "C:/tmp/inferd.lock",
327 "--pipe",
328 r"\\.\pipe\inferd-test",
329 ]);
330 assert!(cli.pipe.is_some());
331 assert!(cli.uds.is_none());
332 assert!(cli.tcp.is_none());
333 cli.require_one_transport().unwrap();
334 }
335
336 #[test]
337 fn cli_rejects_pipe_with_tcp_via_clap() {
338 let result = Cli::try_parse_from([
339 "inferd-daemon",
340 "--lock",
341 "/tmp/inferd.lock",
342 "--tcp",
343 "127.0.0.1:0",
344 "--pipe",
345 r"\\.\pipe\inferd-test",
346 ]);
347 assert!(result.is_err());
348 }
349
350 #[test]
351 fn cli_command_factory_is_well_formed() {
352 // Ensures clap's `#[command]` derives don't conflict; cheap smoke
353 // test that catches lots of misconfigurations.
354 Cli::command().debug_assert();
355 }
356
357 #[test]
358 fn cli_accepts_v2_flag() {
359 let cli = Cli::parse_from([
360 "inferd-daemon",
361 "--lock",
362 "/tmp/inferd.lock",
363 "--tcp",
364 "127.0.0.1:0",
365 "--v2",
366 "--v2-tcp",
367 "127.0.0.1:0",
368 ]);
369 assert!(cli.v2);
370 assert!(cli.v2_tcp.is_some());
371 assert!(cli.v2_addr.is_none());
372 }
373
374 #[test]
375 fn cli_rejects_v2_addr_with_v2_tcp() {
376 let result = Cli::try_parse_from([
377 "inferd-daemon",
378 "--lock",
379 "/tmp/inferd.lock",
380 "--tcp",
381 "127.0.0.1:0",
382 "--v2",
383 "--v2-tcp",
384 "127.0.0.1:0",
385 "--v2-addr",
386 "/tmp/inferd-v2.sock",
387 ]);
388 assert!(result.is_err());
389 }
390
391 #[test]
392 fn cli_v2_disabled_by_default() {
393 let cli = Cli::parse_from([
394 "inferd-daemon",
395 "--lock",
396 "/tmp/inferd.lock",
397 "--tcp",
398 "127.0.0.1:0",
399 ]);
400 assert!(!cli.v2);
401 }
402
403 #[test]
404 fn cli_accepts_embed_flag() {
405 let cli = Cli::parse_from([
406 "inferd-daemon",
407 "--lock",
408 "/tmp/inferd.lock",
409 "--tcp",
410 "127.0.0.1:0",
411 "--embed",
412 "--embed-tcp",
413 "127.0.0.1:0",
414 ]);
415 assert!(cli.embed);
416 assert!(cli.embed_tcp.is_some());
417 assert!(cli.embed_addr.is_none());
418 }
419
420 #[test]
421 fn cli_rejects_embed_addr_with_embed_tcp() {
422 let result = Cli::try_parse_from([
423 "inferd-daemon",
424 "--lock",
425 "/tmp/inferd.lock",
426 "--tcp",
427 "127.0.0.1:0",
428 "--embed",
429 "--embed-tcp",
430 "127.0.0.1:0",
431 "--embed-addr",
432 "/tmp/inferd-embed.sock",
433 ]);
434 assert!(result.is_err());
435 }
436
437 #[test]
438 fn cli_embed_disabled_by_default() {
439 let cli = Cli::parse_from([
440 "inferd-daemon",
441 "--lock",
442 "/tmp/inferd.lock",
443 "--tcp",
444 "127.0.0.1:0",
445 ]);
446 assert!(!cli.embed);
447 }
448
449 #[cfg(feature = "openai")]
450 #[test]
451 fn cli_accepts_openai_compat_backend() {
452 let cli = Cli::parse_from([
453 "inferd-daemon",
454 "--lock",
455 "/tmp/inferd.lock",
456 "--tcp",
457 "127.0.0.1:0",
458 "--backend",
459 "openai-compat",
460 "--openai-base-url",
461 "http://localhost:11434",
462 "--openai-model",
463 "llama3.1:8b",
464 "--openai-api-key",
465 "sk-x",
466 "--openai-timeout-secs",
467 "30",
468 ]);
469 assert_eq!(cli.backend, Some(BackendKind::OpenaiCompat));
470 assert_eq!(
471 cli.openai_base_url.as_deref(),
472 Some("http://localhost:11434")
473 );
474 assert_eq!(cli.openai_model.as_deref(), Some("llama3.1:8b"));
475 assert_eq!(cli.openai_api_key.as_deref(), Some("sk-x"));
476 assert_eq!(cli.openai_timeout_secs, 30);
477 }
478
479 #[cfg(feature = "bedrock")]
480 #[test]
481 fn cli_accepts_bedrock_invoke_backend() {
482 let cli = Cli::parse_from([
483 "inferd-daemon",
484 "--lock",
485 "/tmp/inferd.lock",
486 "--tcp",
487 "127.0.0.1:0",
488 "--backend",
489 "bedrock-invoke",
490 "--bedrock-region",
491 "us-east-1",
492 "--bedrock-model-id",
493 "anthropic.claude-3-5-sonnet-20241022-v2:0",
494 "--bedrock-bearer-token",
495 "abc123",
496 "--bedrock-timeout-secs",
497 "60",
498 ]);
499 assert_eq!(cli.backend, Some(BackendKind::BedrockInvoke));
500 assert_eq!(cli.bedrock_region.as_deref(), Some("us-east-1"));
501 assert_eq!(
502 cli.bedrock_model_id.as_deref(),
503 Some("anthropic.claude-3-5-sonnet-20241022-v2:0")
504 );
505 assert_eq!(cli.bedrock_bearer_token.as_deref(), Some("abc123"));
506 assert_eq!(cli.bedrock_timeout_secs, 60);
507 }
508
509 #[cfg(feature = "bedrock")]
510 #[test]
511 fn cli_bedrock_timeout_defaults_to_300() {
512 let cli = Cli::parse_from([
513 "inferd-daemon",
514 "--lock",
515 "/tmp/inferd.lock",
516 "--tcp",
517 "127.0.0.1:0",
518 "--backend",
519 "bedrock-invoke",
520 "--bedrock-region",
521 "us-east-1",
522 "--bedrock-model-id",
523 "anthropic.claude-3-5-haiku-20241022-v1:0",
524 ]);
525 assert_eq!(cli.bedrock_timeout_secs, 300);
526 assert!(cli.bedrock_bearer_token.is_none());
527 assert!(cli.bedrock_endpoint.is_none());
528 }
529
530 #[cfg(feature = "openai")]
531 #[test]
532 fn cli_openai_timeout_defaults_to_300() {
533 let cli = Cli::parse_from([
534 "inferd-daemon",
535 "--lock",
536 "/tmp/inferd.lock",
537 "--tcp",
538 "127.0.0.1:0",
539 "--backend",
540 "openai-compat",
541 "--openai-base-url",
542 "https://api.openai.com",
543 "--openai-model",
544 "gpt-4o-mini",
545 ]);
546 assert_eq!(cli.openai_timeout_secs, 300);
547 assert!(cli.openai_api_key.is_none());
548 }
549}