inferd_daemon/config.rs
1//! Daemon CLI configuration.
2//!
3//! M1 keeps the CLI surface deliberately small: one transport choice
4//! (`--tcp` or `--uds`), a lock path, a backend selector, and a queue
5//! depth. The operator-flag matrix expands in M4 along with packaging.
6
7use clap::{Parser, ValueEnum};
8use std::path::PathBuf;
9
10/// Backend adapters the daemon can register at startup.
11///
12/// `LlamaCpp` is gated behind the `llamacpp` cargo feature — default
13/// daemon builds only ship the mock adapter (per ADR 0006: lean core,
14/// extensions are separate concerns). `OpenAiCompat` is gated behind
15/// the `openai` cargo feature — pulled in only when the operator
16/// wants the outbound HTTPS adapter (ADR 0006 cloud carve-out).
17#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
18pub enum BackendKind {
19 /// Deterministic test double — used by integration tests and the
20 /// M1 echo daemon.
21 Mock,
22 /// Local llama.cpp backend via FFI (M2). Requires `--model-path`.
23 #[cfg(feature = "llamacpp")]
24 Llamacpp,
25 /// OpenAI-compatible outbound HTTPS adapter (Phase 5A). Reaches
26 /// any provider speaking the `/v1/chat/completions` wire (OpenAI,
27 /// vLLM, LM Studio, LocalAI, OpenRouter, llama.cpp's HTTP server).
28 /// Requires `--openai-base-url` + `--openai-model`. The API key
29 /// is read from `--openai-api-key` or env (`INFERD_OPENAI_API_KEY`
30 /// then `OPENAI_API_KEY`); pass an empty string to skip the
31 /// `Authorization` header for self-hosted endpoints.
32 #[cfg(feature = "openai")]
33 OpenaiCompat,
34 /// AWS Bedrock-runtime `InvokeModelWithResponseStream` adapter
35 /// (Phase 6B-5). v0.2.0 ships only the Anthropic-on-Bedrock body
36 /// shape — Claude models invoked via Bedrock's pinned
37 /// `anthropic_version: "bedrock-2023-05-31"` payload. Requires
38 /// `--bedrock-region` + `--bedrock-model-id`. Auth resolves from
39 /// `--bedrock-bearer-token` / `AWS_BEARER_TOKEN_BEDROCK` first,
40 /// then the standard `AWS_ACCESS_KEY_ID` /
41 /// `AWS_SECRET_ACCESS_KEY` (+ optional `AWS_SESSION_TOKEN`)
42 /// chain.
43 #[cfg(feature = "bedrock")]
44 BedrockInvoke,
45}
46
47/// Top-level CLI for `inferd-daemon`.
48#[derive(Debug, Parser)]
49#[command(name = "inferd-daemon", version, about = "Local inference daemon")]
50pub struct Cli {
51 /// Backend to load at startup.
52 #[arg(long, value_enum, default_value_t = BackendKind::Mock, env = "INFERD_BACKEND")]
53 pub backend: BackendKind,
54
55 /// Path to the single-instance lock file. The lock is held for the
56 /// lifetime of the daemon process.
57 #[arg(long, env = "INFERD_LOCK")]
58 pub lock: PathBuf,
59
60 /// Loopback TCP bind address. Mutually exclusive with `--uds` and `--pipe`.
61 #[arg(long, env = "INFERD_TCP", conflicts_with_all = ["uds", "pipe"])]
62 pub tcp: Option<String>,
63
64 /// Unix domain socket path. Mutually exclusive with `--tcp` and `--pipe`. Unix only.
65 #[arg(long, env = "INFERD_UDS", conflicts_with_all = ["tcp", "pipe"])]
66 pub uds: Option<PathBuf>,
67
68 /// Windows named pipe path (e.g. `\\.\pipe\inferd-infer`).
69 /// Mutually exclusive with `--tcp` and `--uds`. Windows only.
70 #[arg(long, env = "INFERD_PIPE", conflicts_with_all = ["tcp", "uds"])]
71 pub pipe: Option<String>,
72
73 /// Group name for the UDS (Unix only). Ignored on other transports.
74 #[arg(long, env = "INFERD_GROUP")]
75 pub group: Option<String>,
76
77 /// Active generations served concurrently. v0.1 invariant is 1; values
78 /// above 1 are reserved for v0.2 continuous-batching backends.
79 #[arg(long, default_value_t = 1, env = "INFERD_ACTIVE_PERMITS")]
80 pub active_permits: usize,
81
82 /// Maximum waiting queue depth. Submits beyond this return
83 /// `code: queue_full` immediately.
84 #[arg(long, default_value_t = 10, env = "INFERD_QUEUE_DEPTH")]
85 pub queue_depth: usize,
86
87 /// Seconds to wait for the backend to report ready before failing
88 /// startup.
89 #[arg(long, default_value_t = 30, env = "INFERD_READY_TIMEOUT_SECS")]
90 pub ready_timeout_secs: u64,
91
92 /// Path to the GGUF model file. Required when `--backend llamacpp`.
93 #[arg(long, env = "INFERD_MODEL_PATH")]
94 pub model_path: Option<PathBuf>,
95
96 /// Optional expected SHA-256 of the model file as a hex string
97 /// (64 chars). When present, the daemon verifies the file before
98 /// loading via `subtle::ConstantTimeEq` (THREAT_MODEL F-5).
99 #[arg(long, env = "INFERD_MODEL_SHA256")]
100 pub model_sha256: Option<String>,
101
102 /// Llama.cpp context window in tokens. Default 8192.
103 #[arg(long, default_value_t = 8192, env = "INFERD_N_CTX")]
104 pub n_ctx: u32,
105
106 /// Llama.cpp GPU layer offload count. 0 = CPU-only. GPU support
107 /// requires the `cuda`/`metal`/`vulkan`/`rocm` cargo feature at
108 /// build time.
109 #[arg(long, default_value_t = 0, env = "INFERD_N_GPU_LAYERS")]
110 pub n_gpu_layers: i32,
111
112 /// Base URL of the upstream OpenAI-compat endpoint, no trailing
113 /// slash and no path (the adapter appends `/v1/chat/completions`).
114 /// Required when `--backend openai-compat`. Examples:
115 /// `https://api.openai.com`, `http://localhost:11434`,
116 /// `https://openrouter.ai`.
117 #[arg(long, env = "INFERD_OPENAI_BASE_URL")]
118 pub openai_base_url: Option<String>,
119
120 /// Bearer token for the OpenAI-compat upstream. Sent as
121 /// `Authorization: Bearer <value>`. Pass an empty string to skip
122 /// the header entirely for self-hosted endpoints. Resolves from
123 /// `--openai-api-key`, then `INFERD_OPENAI_API_KEY`, then
124 /// `OPENAI_API_KEY` (the de-facto env name most providers' SDKs
125 /// already use).
126 #[arg(long, env = "INFERD_OPENAI_API_KEY", hide_env_values = true)]
127 pub openai_api_key: Option<String>,
128
129 /// Upstream model identifier echoed in the request `model` field
130 /// — provider-specific (e.g. `gpt-4o-mini`, `llama3.1:8b`,
131 /// `meta-llama/Meta-Llama-3-70B-Instruct`). Required when
132 /// `--backend openai-compat`.
133 #[arg(long, env = "INFERD_OPENAI_MODEL")]
134 pub openai_model: Option<String>,
135
136 /// Total request timeout for OpenAI-compat calls, in seconds.
137 /// Default 300 (5 minutes) — long enough for a slow first-token
138 /// from a cold cloud model, short enough to surface stuck
139 /// requests rather than hang forever.
140 #[arg(long, default_value_t = 300, env = "INFERD_OPENAI_TIMEOUT_SECS")]
141 pub openai_timeout_secs: u64,
142
143 /// AWS region the Bedrock endpoint lives in, e.g. `us-east-1`,
144 /// `eu-central-1`. Required when `--backend bedrock-invoke`.
145 /// Used for both the endpoint host and SigV4 signing scope.
146 #[arg(long, env = "INFERD_BEDROCK_REGION")]
147 pub bedrock_region: Option<String>,
148
149 /// Bedrock model id (URL-encoded by the adapter), e.g.
150 /// `anthropic.claude-3-5-sonnet-20241022-v2:0`. Required when
151 /// `--backend bedrock-invoke`.
152 #[arg(long, env = "INFERD_BEDROCK_MODEL_ID")]
153 pub bedrock_model_id: Option<String>,
154
155 /// Pre-issued Bedrock bearer token (`AWS_BEARER_TOKEN_BEDROCK`
156 /// shape, AWS rolled this out in 2025-06). When set, the adapter
157 /// sends `Authorization: Bearer <value>` and skips SigV4. When
158 /// unset, the adapter falls back to the standard
159 /// `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` (+ optional
160 /// `AWS_SESSION_TOKEN`) chain via SigV4 signing.
161 #[arg(long, env = "AWS_BEARER_TOKEN_BEDROCK", hide_env_values = true)]
162 pub bedrock_bearer_token: Option<String>,
163
164 /// Override the Bedrock endpoint host. Empty/absent → default
165 /// `bedrock-runtime.<region>.amazonaws.com`. Useful for VPC
166 /// endpoints / integration tests.
167 #[arg(long, env = "INFERD_BEDROCK_ENDPOINT")]
168 pub bedrock_endpoint: Option<String>,
169
170 /// Total request timeout for Bedrock calls, in seconds. Default
171 /// 300 (5 minutes).
172 #[arg(long, default_value_t = 300, env = "INFERD_BEDROCK_TIMEOUT_SECS")]
173 pub bedrock_timeout_secs: u64,
174
175 /// Optional pre-shared API key. When set, TCP clients MUST send
176 /// `{"type":"auth","key":"<this value>"}` as their first NDJSON
177 /// frame on the connection or the daemon closes the connection.
178 /// UDS and named-pipe transports ignore this — kernel-attested
179 /// peer credentials (F-7) do the work there.
180 ///
181 /// Comparison is constant-time. THREAT_MODEL F-8.
182 #[arg(long, env = "INFERD_API_KEY", hide_env_values = true)]
183 pub api_key: Option<String>,
184
185 /// Path to the operator JSON config file. Default
186 /// `~/.inferd/config.json`. When present, fetch + auto-pull are
187 /// driven from it; CLI flags (`--model-path`, `--model-sha256`,
188 /// `--n-ctx`, `--n-gpu-layers`) override config-file values when
189 /// both are supplied. When absent, the daemon falls back to
190 /// CLI-flag-only operation (dev mode).
191 #[arg(long, env = "INFERD_CONFIG")]
192 pub config: Option<PathBuf>,
193
194 /// Admin endpoint path. Defaults per-platform to the path
195 /// documented in `docs/protocol-v1.md` §"Admin endpoint" — e.g.
196 /// `/run/inferd/admin.sock` on Linux, `\\.\pipe\inferd-admin` on
197 /// Windows. Override for tests / non-default deployments.
198 #[arg(long, env = "INFERD_ADMIN_ADDR")]
199 pub admin_addr: Option<PathBuf>,
200
201 /// Enable the v2 inference endpoint per ADR 0015. v2 binds on a
202 /// *separate* socket from v1: `infer.v2.sock` on Unix /
203 /// `\\.\pipe\inferd-infer-v2` on Windows. v1 stays on its own
204 /// socket and is unaffected.
205 ///
206 /// Phase 1B: the v2 endpoint accepts and validates v2 requests
207 /// but returns `Error{code:internal, message:"v2 generation not
208 /// implemented"}` because the Backend trait does not yet expose
209 /// `generate_v2`. Use this to integration-test middleware that
210 /// will speak v2 once Phase 2A lands.
211 #[arg(long, env = "INFERD_V2")]
212 pub v2: bool,
213
214 /// Override the default v2 inference endpoint path.
215 /// Mirrors `--uds` / `--pipe` for v2; on Linux/macOS this is a
216 /// UDS path, on Windows a named-pipe path. Has no effect unless
217 /// `--v2` is also set.
218 #[arg(long, env = "INFERD_V2_ADDR")]
219 pub v2_addr: Option<PathBuf>,
220
221 /// Loopback TCP bind address for the v2 endpoint. Mutually
222 /// exclusive with `--v2-addr`. Useful for tests that don't want
223 /// the platform default (UDS / named pipe). Has no effect
224 /// unless `--v2` is also set.
225 #[arg(long, env = "INFERD_V2_TCP", conflicts_with = "v2_addr")]
226 pub v2_tcp: Option<String>,
227
228 /// Enable the embed inference endpoint per ADR 0017. The embed
229 /// endpoint binds on a *separate* socket from v1/v2:
230 /// `infer.embed.sock` on Unix / `\\.\pipe\inferd-infer-embed`
231 /// on Windows. Has no effect unless the active backend's
232 /// `capabilities().embed` is true (capability-driven binding).
233 #[arg(long, env = "INFERD_EMBED")]
234 pub embed: bool,
235
236 /// Override the default embed inference endpoint path.
237 /// Mirrors `--uds` / `--pipe` for embed; on Linux/macOS this is
238 /// a UDS path, on Windows a named-pipe path. Has no effect
239 /// unless `--embed` is also set.
240 #[arg(long, env = "INFERD_EMBED_ADDR")]
241 pub embed_addr: Option<PathBuf>,
242
243 /// Loopback TCP bind address for the embed endpoint. Mutually
244 /// exclusive with `--embed-addr`. Has no effect unless `--embed`
245 /// is also set.
246 #[arg(long, env = "INFERD_EMBED_TCP", conflicts_with = "embed_addr")]
247 pub embed_tcp: Option<String>,
248}
249
250impl Cli {
251 /// Validate that exactly one transport is selected. clap enforces
252 /// mutual exclusion; this checks the at-least-one part.
253 pub fn require_one_transport(&self) -> Result<(), &'static str> {
254 let count = [self.tcp.is_some(), self.uds.is_some(), self.pipe.is_some()]
255 .iter()
256 .filter(|b| **b)
257 .count();
258 match count {
259 1 => Ok(()),
260 0 => Err("must specify one of --tcp, --uds, --pipe"),
261 _ => Err("--tcp, --uds, --pipe are mutually exclusive"),
262 }
263 }
264}
265
266#[cfg(test)]
267mod tests {
268 use super::*;
269 use clap::CommandFactory;
270
271 #[test]
272 fn cli_parses_minimum_required() {
273 let cli = Cli::parse_from([
274 "inferd-daemon",
275 "--lock",
276 "/tmp/inferd.lock",
277 "--tcp",
278 "127.0.0.1:0",
279 ]);
280 assert!(cli.tcp.is_some());
281 assert!(cli.uds.is_none());
282 assert_eq!(cli.queue_depth, 10);
283 assert_eq!(cli.active_permits, 1);
284 cli.require_one_transport().unwrap();
285 }
286
287 #[test]
288 fn cli_rejects_no_transport() {
289 let cli = Cli::parse_from(["inferd-daemon", "--lock", "/tmp/inferd.lock"]);
290 assert!(cli.require_one_transport().is_err());
291 }
292
293 #[test]
294 fn cli_rejects_both_transports_via_clap() {
295 // clap-level mutual exclusion: this should fail to parse, not
296 // require_one_transport's runtime check.
297 let result = Cli::try_parse_from([
298 "inferd-daemon",
299 "--lock",
300 "/tmp/inferd.lock",
301 "--tcp",
302 "127.0.0.1:0",
303 "--uds",
304 "/tmp/inferd.sock",
305 ]);
306 assert!(result.is_err());
307 }
308
309 #[test]
310 fn cli_accepts_pipe_transport() {
311 let cli = Cli::parse_from([
312 "inferd-daemon",
313 "--lock",
314 "C:/tmp/inferd.lock",
315 "--pipe",
316 r"\\.\pipe\inferd-test",
317 ]);
318 assert!(cli.pipe.is_some());
319 assert!(cli.uds.is_none());
320 assert!(cli.tcp.is_none());
321 cli.require_one_transport().unwrap();
322 }
323
324 #[test]
325 fn cli_rejects_pipe_with_tcp_via_clap() {
326 let result = Cli::try_parse_from([
327 "inferd-daemon",
328 "--lock",
329 "/tmp/inferd.lock",
330 "--tcp",
331 "127.0.0.1:0",
332 "--pipe",
333 r"\\.\pipe\inferd-test",
334 ]);
335 assert!(result.is_err());
336 }
337
338 #[test]
339 fn cli_command_factory_is_well_formed() {
340 // Ensures clap's `#[command]` derives don't conflict; cheap smoke
341 // test that catches lots of misconfigurations.
342 Cli::command().debug_assert();
343 }
344
345 #[test]
346 fn cli_accepts_v2_flag() {
347 let cli = Cli::parse_from([
348 "inferd-daemon",
349 "--lock",
350 "/tmp/inferd.lock",
351 "--tcp",
352 "127.0.0.1:0",
353 "--v2",
354 "--v2-tcp",
355 "127.0.0.1:0",
356 ]);
357 assert!(cli.v2);
358 assert!(cli.v2_tcp.is_some());
359 assert!(cli.v2_addr.is_none());
360 }
361
362 #[test]
363 fn cli_rejects_v2_addr_with_v2_tcp() {
364 let result = Cli::try_parse_from([
365 "inferd-daemon",
366 "--lock",
367 "/tmp/inferd.lock",
368 "--tcp",
369 "127.0.0.1:0",
370 "--v2",
371 "--v2-tcp",
372 "127.0.0.1:0",
373 "--v2-addr",
374 "/tmp/inferd-v2.sock",
375 ]);
376 assert!(result.is_err());
377 }
378
379 #[test]
380 fn cli_v2_disabled_by_default() {
381 let cli = Cli::parse_from([
382 "inferd-daemon",
383 "--lock",
384 "/tmp/inferd.lock",
385 "--tcp",
386 "127.0.0.1:0",
387 ]);
388 assert!(!cli.v2);
389 }
390
391 #[test]
392 fn cli_accepts_embed_flag() {
393 let cli = Cli::parse_from([
394 "inferd-daemon",
395 "--lock",
396 "/tmp/inferd.lock",
397 "--tcp",
398 "127.0.0.1:0",
399 "--embed",
400 "--embed-tcp",
401 "127.0.0.1:0",
402 ]);
403 assert!(cli.embed);
404 assert!(cli.embed_tcp.is_some());
405 assert!(cli.embed_addr.is_none());
406 }
407
408 #[test]
409 fn cli_rejects_embed_addr_with_embed_tcp() {
410 let result = Cli::try_parse_from([
411 "inferd-daemon",
412 "--lock",
413 "/tmp/inferd.lock",
414 "--tcp",
415 "127.0.0.1:0",
416 "--embed",
417 "--embed-tcp",
418 "127.0.0.1:0",
419 "--embed-addr",
420 "/tmp/inferd-embed.sock",
421 ]);
422 assert!(result.is_err());
423 }
424
425 #[test]
426 fn cli_embed_disabled_by_default() {
427 let cli = Cli::parse_from([
428 "inferd-daemon",
429 "--lock",
430 "/tmp/inferd.lock",
431 "--tcp",
432 "127.0.0.1:0",
433 ]);
434 assert!(!cli.embed);
435 }
436
437 #[cfg(feature = "openai")]
438 #[test]
439 fn cli_accepts_openai_compat_backend() {
440 let cli = Cli::parse_from([
441 "inferd-daemon",
442 "--lock",
443 "/tmp/inferd.lock",
444 "--tcp",
445 "127.0.0.1:0",
446 "--backend",
447 "openai-compat",
448 "--openai-base-url",
449 "http://localhost:11434",
450 "--openai-model",
451 "llama3.1:8b",
452 "--openai-api-key",
453 "sk-x",
454 "--openai-timeout-secs",
455 "30",
456 ]);
457 assert_eq!(cli.backend, BackendKind::OpenaiCompat);
458 assert_eq!(
459 cli.openai_base_url.as_deref(),
460 Some("http://localhost:11434")
461 );
462 assert_eq!(cli.openai_model.as_deref(), Some("llama3.1:8b"));
463 assert_eq!(cli.openai_api_key.as_deref(), Some("sk-x"));
464 assert_eq!(cli.openai_timeout_secs, 30);
465 }
466
467 #[cfg(feature = "bedrock")]
468 #[test]
469 fn cli_accepts_bedrock_invoke_backend() {
470 let cli = Cli::parse_from([
471 "inferd-daemon",
472 "--lock",
473 "/tmp/inferd.lock",
474 "--tcp",
475 "127.0.0.1:0",
476 "--backend",
477 "bedrock-invoke",
478 "--bedrock-region",
479 "us-east-1",
480 "--bedrock-model-id",
481 "anthropic.claude-3-5-sonnet-20241022-v2:0",
482 "--bedrock-bearer-token",
483 "abc123",
484 "--bedrock-timeout-secs",
485 "60",
486 ]);
487 assert_eq!(cli.backend, BackendKind::BedrockInvoke);
488 assert_eq!(cli.bedrock_region.as_deref(), Some("us-east-1"));
489 assert_eq!(
490 cli.bedrock_model_id.as_deref(),
491 Some("anthropic.claude-3-5-sonnet-20241022-v2:0")
492 );
493 assert_eq!(cli.bedrock_bearer_token.as_deref(), Some("abc123"));
494 assert_eq!(cli.bedrock_timeout_secs, 60);
495 }
496
497 #[cfg(feature = "bedrock")]
498 #[test]
499 fn cli_bedrock_timeout_defaults_to_300() {
500 let cli = Cli::parse_from([
501 "inferd-daemon",
502 "--lock",
503 "/tmp/inferd.lock",
504 "--tcp",
505 "127.0.0.1:0",
506 "--backend",
507 "bedrock-invoke",
508 "--bedrock-region",
509 "us-east-1",
510 "--bedrock-model-id",
511 "anthropic.claude-3-5-haiku-20241022-v1:0",
512 ]);
513 assert_eq!(cli.bedrock_timeout_secs, 300);
514 assert!(cli.bedrock_bearer_token.is_none());
515 assert!(cli.bedrock_endpoint.is_none());
516 }
517
518 #[cfg(feature = "openai")]
519 #[test]
520 fn cli_openai_timeout_defaults_to_300() {
521 let cli = Cli::parse_from([
522 "inferd-daemon",
523 "--lock",
524 "/tmp/inferd.lock",
525 "--tcp",
526 "127.0.0.1:0",
527 "--backend",
528 "openai-compat",
529 "--openai-base-url",
530 "https://api.openai.com",
531 "--openai-model",
532 "gpt-4o-mini",
533 ]);
534 assert_eq!(cli.openai_timeout_secs, 300);
535 assert!(cli.openai_api_key.is_none());
536 }
537}