Skip to main content

inferd_daemon/
config.rs

1//! Daemon CLI configuration.
2//!
3//! M1 keeps the CLI surface deliberately small: one transport choice
4//! (`--tcp` or `--uds`), a lock path, a backend selector, and a queue
5//! depth. The operator-flag matrix expands in M4 along with packaging.
6
7use clap::{Parser, ValueEnum};
8use std::path::PathBuf;
9
10/// Backend adapters the daemon can register at startup.
11///
12/// `LlamaCpp` is gated behind the `llamacpp` cargo feature — default
13/// daemon builds only ship the mock adapter (per ADR 0006: lean core,
14/// extensions are separate concerns). `OpenAiCompat` is gated behind
15/// the `openai` cargo feature — pulled in only when the operator
16/// wants the outbound HTTPS adapter (ADR 0006 cloud carve-out).
17#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
18pub enum BackendKind {
19    /// Deterministic test double — used by integration tests and the
20    /// M1 echo daemon.
21    Mock,
22    /// Local llama.cpp backend via FFI (M2). Requires `--model-path`.
23    #[cfg(feature = "llamacpp")]
24    Llamacpp,
25    /// OpenAI-compatible outbound HTTPS adapter (Phase 5A). Reaches
26    /// any provider speaking the `/v1/chat/completions` wire (OpenAI,
27    /// vLLM, LM Studio, LocalAI, OpenRouter, llama.cpp's HTTP server).
28    /// Requires `--openai-base-url` + `--openai-model`. The API key
29    /// is read from `--openai-api-key` or env (`INFERD_OPENAI_API_KEY`
30    /// then `OPENAI_API_KEY`); pass an empty string to skip the
31    /// `Authorization` header for self-hosted endpoints.
32    #[cfg(feature = "openai")]
33    OpenaiCompat,
34    /// AWS Bedrock-runtime `InvokeModelWithResponseStream` adapter
35    /// (Phase 6B-5). v0.2.0 ships only the Anthropic-on-Bedrock body
36    /// shape — Claude models invoked via Bedrock's pinned
37    /// `anthropic_version: "bedrock-2023-05-31"` payload. Requires
38    /// `--bedrock-region` + `--bedrock-model-id`. Auth resolves from
39    /// `--bedrock-bearer-token` / `AWS_BEARER_TOKEN_BEDROCK` first,
40    /// then the standard `AWS_ACCESS_KEY_ID` /
41    /// `AWS_SECRET_ACCESS_KEY` (+ optional `AWS_SESSION_TOKEN`)
42    /// chain.
43    #[cfg(feature = "bedrock")]
44    BedrockInvoke,
45}
46
47/// Top-level CLI for `inferd-daemon`.
48#[derive(Debug, Parser)]
49#[command(name = "inferd-daemon", version, about = "Local inference daemon")]
50pub struct Cli {
51    /// Backend to load at startup.
52    ///
53    /// When omitted: defer to the config file's `backends:` (or legacy
54    /// `model:` block) if one is present; otherwise fall back to the
55    /// in-memory `mock` backend so `--lock + --tcp/--uds/--pipe` alone
56    /// still boots a dev-mode echo daemon.
57    ///
58    /// When explicit: honour the CLI choice. Passing `--backend mock`
59    /// short-circuits config loading (useful for forcing mock in test
60    /// rigs even when a config file is on disk); any other explicit
61    /// kind is built from CLI flags only — config-file `backends:` are
62    /// ignored in that case so operators get exactly what they asked
63    /// for.
64    #[arg(long, value_enum, env = "INFERD_BACKEND")]
65    pub backend: Option<BackendKind>,
66
67    /// Path to the single-instance lock file. The lock is held for the
68    /// lifetime of the daemon process.
69    #[arg(long, env = "INFERD_LOCK")]
70    pub lock: PathBuf,
71
72    /// Loopback TCP bind address. Mutually exclusive with `--uds` and `--pipe`.
73    #[arg(long, env = "INFERD_TCP", conflicts_with_all = ["uds", "pipe"])]
74    pub tcp: Option<String>,
75
76    /// Unix domain socket path. Mutually exclusive with `--tcp` and `--pipe`. Unix only.
77    #[arg(long, env = "INFERD_UDS", conflicts_with_all = ["tcp", "pipe"])]
78    pub uds: Option<PathBuf>,
79
80    /// Windows named pipe path (e.g. `\\.\pipe\inferd-infer`).
81    /// Mutually exclusive with `--tcp` and `--uds`. Windows only.
82    #[arg(long, env = "INFERD_PIPE", conflicts_with_all = ["tcp", "uds"])]
83    pub pipe: Option<String>,
84
85    /// Group name for the UDS (Unix only). Ignored on other transports.
86    #[arg(long, env = "INFERD_GROUP")]
87    pub group: Option<String>,
88
89    /// Active generations served concurrently. v0.1 invariant is 1; values
90    /// above 1 are reserved for v0.2 continuous-batching backends.
91    #[arg(long, default_value_t = 1, env = "INFERD_ACTIVE_PERMITS")]
92    pub active_permits: usize,
93
94    /// Maximum waiting queue depth. Submits beyond this return
95    /// `code: queue_full` immediately.
96    #[arg(long, default_value_t = 10, env = "INFERD_QUEUE_DEPTH")]
97    pub queue_depth: usize,
98
99    /// Seconds to wait for the backend to report ready before failing
100    /// startup.
101    #[arg(long, default_value_t = 30, env = "INFERD_READY_TIMEOUT_SECS")]
102    pub ready_timeout_secs: u64,
103
104    /// Path to the GGUF model file. Required when `--backend llamacpp`.
105    #[arg(long, env = "INFERD_MODEL_PATH")]
106    pub model_path: Option<PathBuf>,
107
108    /// Optional expected SHA-256 of the model file as a hex string
109    /// (64 chars). When present, the daemon verifies the file before
110    /// loading via `subtle::ConstantTimeEq` (THREAT_MODEL F-5).
111    #[arg(long, env = "INFERD_MODEL_SHA256")]
112    pub model_sha256: Option<String>,
113
114    /// Llama.cpp context window in tokens. Default 8192.
115    #[arg(long, default_value_t = 8192, env = "INFERD_N_CTX")]
116    pub n_ctx: u32,
117
118    /// Llama.cpp GPU layer offload count. 0 = CPU-only. GPU support
119    /// requires the `cuda`/`metal`/`vulkan`/`rocm` cargo feature at
120    /// build time.
121    #[arg(long, default_value_t = 0, env = "INFERD_N_GPU_LAYERS")]
122    pub n_gpu_layers: i32,
123
124    /// Base URL of the upstream OpenAI-compat endpoint, no trailing
125    /// slash and no path (the adapter appends `/v1/chat/completions`).
126    /// Required when `--backend openai-compat`. Examples:
127    /// `https://api.openai.com`, `http://localhost:11434`,
128    /// `https://openrouter.ai`.
129    #[arg(long, env = "INFERD_OPENAI_BASE_URL")]
130    pub openai_base_url: Option<String>,
131
132    /// Bearer token for the OpenAI-compat upstream. Sent as
133    /// `Authorization: Bearer <value>`. Pass an empty string to skip
134    /// the header entirely for self-hosted endpoints. Resolves from
135    /// `--openai-api-key`, then `INFERD_OPENAI_API_KEY`, then
136    /// `OPENAI_API_KEY` (the de-facto env name most providers' SDKs
137    /// already use).
138    #[arg(long, env = "INFERD_OPENAI_API_KEY", hide_env_values = true)]
139    pub openai_api_key: Option<String>,
140
141    /// Upstream model identifier echoed in the request `model` field
142    /// — provider-specific (e.g. `gpt-4o-mini`, `llama3.1:8b`,
143    /// `meta-llama/Meta-Llama-3-70B-Instruct`). Required when
144    /// `--backend openai-compat`.
145    #[arg(long, env = "INFERD_OPENAI_MODEL")]
146    pub openai_model: Option<String>,
147
148    /// Total request timeout for OpenAI-compat calls, in seconds.
149    /// Default 300 (5 minutes) — long enough for a slow first-token
150    /// from a cold cloud model, short enough to surface stuck
151    /// requests rather than hang forever.
152    #[arg(long, default_value_t = 300, env = "INFERD_OPENAI_TIMEOUT_SECS")]
153    pub openai_timeout_secs: u64,
154
155    /// AWS region the Bedrock endpoint lives in, e.g. `us-east-1`,
156    /// `eu-central-1`. Required when `--backend bedrock-invoke`.
157    /// Used for both the endpoint host and SigV4 signing scope.
158    #[arg(long, env = "INFERD_BEDROCK_REGION")]
159    pub bedrock_region: Option<String>,
160
161    /// Bedrock model id (URL-encoded by the adapter), e.g.
162    /// `anthropic.claude-3-5-sonnet-20241022-v2:0`. Required when
163    /// `--backend bedrock-invoke`.
164    #[arg(long, env = "INFERD_BEDROCK_MODEL_ID")]
165    pub bedrock_model_id: Option<String>,
166
167    /// Pre-issued Bedrock bearer token (`AWS_BEARER_TOKEN_BEDROCK`
168    /// shape, AWS rolled this out in 2025-06). When set, the adapter
169    /// sends `Authorization: Bearer <value>` and skips SigV4. When
170    /// unset, the adapter falls back to the standard
171    /// `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` (+ optional
172    /// `AWS_SESSION_TOKEN`) chain via SigV4 signing.
173    #[arg(long, env = "AWS_BEARER_TOKEN_BEDROCK", hide_env_values = true)]
174    pub bedrock_bearer_token: Option<String>,
175
176    /// Override the Bedrock endpoint host. Empty/absent → default
177    /// `bedrock-runtime.<region>.amazonaws.com`. Useful for VPC
178    /// endpoints / integration tests.
179    #[arg(long, env = "INFERD_BEDROCK_ENDPOINT")]
180    pub bedrock_endpoint: Option<String>,
181
182    /// Total request timeout for Bedrock calls, in seconds. Default
183    /// 300 (5 minutes).
184    #[arg(long, default_value_t = 300, env = "INFERD_BEDROCK_TIMEOUT_SECS")]
185    pub bedrock_timeout_secs: u64,
186
187    /// Optional pre-shared API key. When set, TCP clients MUST send
188    /// `{"type":"auth","key":"<this value>"}` as their first NDJSON
189    /// frame on the connection or the daemon closes the connection.
190    /// UDS and named-pipe transports ignore this — kernel-attested
191    /// peer credentials (F-7) do the work there.
192    ///
193    /// Comparison is constant-time. THREAT_MODEL F-8.
194    #[arg(long, env = "INFERD_API_KEY", hide_env_values = true)]
195    pub api_key: Option<String>,
196
197    /// Path to the operator JSON config file. Default
198    /// `~/.inferd/config.json`. When present, fetch + auto-pull are
199    /// driven from it; CLI flags (`--model-path`, `--model-sha256`,
200    /// `--n-ctx`, `--n-gpu-layers`) override config-file values when
201    /// both are supplied. When absent, the daemon falls back to
202    /// CLI-flag-only operation (dev mode).
203    #[arg(long, env = "INFERD_CONFIG")]
204    pub config: Option<PathBuf>,
205
206    /// Admin endpoint path. Defaults per-platform to the path
207    /// documented in `docs/protocol-v1.md` §"Admin endpoint" — e.g.
208    /// `/run/inferd/admin.sock` on Linux, `\\.\pipe\inferd-admin` on
209    /// Windows. Override for tests / non-default deployments.
210    #[arg(long, env = "INFERD_ADMIN_ADDR")]
211    pub admin_addr: Option<PathBuf>,
212
213    /// Enable the v2 inference endpoint per ADR 0015. v2 binds on a
214    /// *separate* socket from v1: `infer.v2.sock` on Unix /
215    /// `\\.\pipe\inferd-infer-v2` on Windows. v1 stays on its own
216    /// socket and is unaffected.
217    ///
218    /// Phase 1B: the v2 endpoint accepts and validates v2 requests
219    /// but returns `Error{code:internal, message:"v2 generation not
220    /// implemented"}` because the Backend trait does not yet expose
221    /// `generate_v2`. Use this to integration-test middleware that
222    /// will speak v2 once Phase 2A lands.
223    #[arg(long, env = "INFERD_V2")]
224    pub v2: bool,
225
226    /// Override the default v2 inference endpoint path.
227    /// Mirrors `--uds` / `--pipe` for v2; on Linux/macOS this is a
228    /// UDS path, on Windows a named-pipe path. Has no effect unless
229    /// `--v2` is also set.
230    #[arg(long, env = "INFERD_V2_ADDR")]
231    pub v2_addr: Option<PathBuf>,
232
233    /// Loopback TCP bind address for the v2 endpoint. Mutually
234    /// exclusive with `--v2-addr`. Useful for tests that don't want
235    /// the platform default (UDS / named pipe). Has no effect
236    /// unless `--v2` is also set.
237    #[arg(long, env = "INFERD_V2_TCP", conflicts_with = "v2_addr")]
238    pub v2_tcp: Option<String>,
239
240    /// Enable the embed inference endpoint per ADR 0017. The embed
241    /// endpoint binds on a *separate* socket from v1/v2:
242    /// `infer.embed.sock` on Unix / `\\.\pipe\inferd-infer-embed`
243    /// on Windows. Has no effect unless the active backend's
244    /// `capabilities().embed` is true (capability-driven binding).
245    #[arg(long, env = "INFERD_EMBED")]
246    pub embed: bool,
247
248    /// Override the default embed inference endpoint path.
249    /// Mirrors `--uds` / `--pipe` for embed; on Linux/macOS this is
250    /// a UDS path, on Windows a named-pipe path. Has no effect
251    /// unless `--embed` is also set.
252    #[arg(long, env = "INFERD_EMBED_ADDR")]
253    pub embed_addr: Option<PathBuf>,
254
255    /// Loopback TCP bind address for the embed endpoint. Mutually
256    /// exclusive with `--embed-addr`. Has no effect unless `--embed`
257    /// is also set.
258    #[arg(long, env = "INFERD_EMBED_TCP", conflicts_with = "embed_addr")]
259    pub embed_tcp: Option<String>,
260}
261
262impl Cli {
263    /// Validate that exactly one transport is selected. clap enforces
264    /// mutual exclusion; this checks the at-least-one part.
265    pub fn require_one_transport(&self) -> Result<(), &'static str> {
266        let count = [self.tcp.is_some(), self.uds.is_some(), self.pipe.is_some()]
267            .iter()
268            .filter(|b| **b)
269            .count();
270        match count {
271            1 => Ok(()),
272            0 => Err("must specify one of --tcp, --uds, --pipe"),
273            _ => Err("--tcp, --uds, --pipe are mutually exclusive"),
274        }
275    }
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281    use clap::CommandFactory;
282
283    #[test]
284    fn cli_parses_minimum_required() {
285        let cli = Cli::parse_from([
286            "inferd-daemon",
287            "--lock",
288            "/tmp/inferd.lock",
289            "--tcp",
290            "127.0.0.1:0",
291        ]);
292        assert!(cli.tcp.is_some());
293        assert!(cli.uds.is_none());
294        assert_eq!(cli.queue_depth, 10);
295        assert_eq!(cli.active_permits, 1);
296        cli.require_one_transport().unwrap();
297    }
298
299    #[test]
300    fn cli_rejects_no_transport() {
301        let cli = Cli::parse_from(["inferd-daemon", "--lock", "/tmp/inferd.lock"]);
302        assert!(cli.require_one_transport().is_err());
303    }
304
305    #[test]
306    fn cli_rejects_both_transports_via_clap() {
307        // clap-level mutual exclusion: this should fail to parse, not
308        // require_one_transport's runtime check.
309        let result = Cli::try_parse_from([
310            "inferd-daemon",
311            "--lock",
312            "/tmp/inferd.lock",
313            "--tcp",
314            "127.0.0.1:0",
315            "--uds",
316            "/tmp/inferd.sock",
317        ]);
318        assert!(result.is_err());
319    }
320
321    #[test]
322    fn cli_accepts_pipe_transport() {
323        let cli = Cli::parse_from([
324            "inferd-daemon",
325            "--lock",
326            "C:/tmp/inferd.lock",
327            "--pipe",
328            r"\\.\pipe\inferd-test",
329        ]);
330        assert!(cli.pipe.is_some());
331        assert!(cli.uds.is_none());
332        assert!(cli.tcp.is_none());
333        cli.require_one_transport().unwrap();
334    }
335
336    #[test]
337    fn cli_rejects_pipe_with_tcp_via_clap() {
338        let result = Cli::try_parse_from([
339            "inferd-daemon",
340            "--lock",
341            "/tmp/inferd.lock",
342            "--tcp",
343            "127.0.0.1:0",
344            "--pipe",
345            r"\\.\pipe\inferd-test",
346        ]);
347        assert!(result.is_err());
348    }
349
350    #[test]
351    fn cli_command_factory_is_well_formed() {
352        // Ensures clap's `#[command]` derives don't conflict; cheap smoke
353        // test that catches lots of misconfigurations.
354        Cli::command().debug_assert();
355    }
356
357    #[test]
358    fn cli_accepts_v2_flag() {
359        let cli = Cli::parse_from([
360            "inferd-daemon",
361            "--lock",
362            "/tmp/inferd.lock",
363            "--tcp",
364            "127.0.0.1:0",
365            "--v2",
366            "--v2-tcp",
367            "127.0.0.1:0",
368        ]);
369        assert!(cli.v2);
370        assert!(cli.v2_tcp.is_some());
371        assert!(cli.v2_addr.is_none());
372    }
373
374    #[test]
375    fn cli_rejects_v2_addr_with_v2_tcp() {
376        let result = Cli::try_parse_from([
377            "inferd-daemon",
378            "--lock",
379            "/tmp/inferd.lock",
380            "--tcp",
381            "127.0.0.1:0",
382            "--v2",
383            "--v2-tcp",
384            "127.0.0.1:0",
385            "--v2-addr",
386            "/tmp/inferd-v2.sock",
387        ]);
388        assert!(result.is_err());
389    }
390
391    #[test]
392    fn cli_v2_disabled_by_default() {
393        let cli = Cli::parse_from([
394            "inferd-daemon",
395            "--lock",
396            "/tmp/inferd.lock",
397            "--tcp",
398            "127.0.0.1:0",
399        ]);
400        assert!(!cli.v2);
401    }
402
403    #[test]
404    fn cli_accepts_embed_flag() {
405        let cli = Cli::parse_from([
406            "inferd-daemon",
407            "--lock",
408            "/tmp/inferd.lock",
409            "--tcp",
410            "127.0.0.1:0",
411            "--embed",
412            "--embed-tcp",
413            "127.0.0.1:0",
414        ]);
415        assert!(cli.embed);
416        assert!(cli.embed_tcp.is_some());
417        assert!(cli.embed_addr.is_none());
418    }
419
420    #[test]
421    fn cli_rejects_embed_addr_with_embed_tcp() {
422        let result = Cli::try_parse_from([
423            "inferd-daemon",
424            "--lock",
425            "/tmp/inferd.lock",
426            "--tcp",
427            "127.0.0.1:0",
428            "--embed",
429            "--embed-tcp",
430            "127.0.0.1:0",
431            "--embed-addr",
432            "/tmp/inferd-embed.sock",
433        ]);
434        assert!(result.is_err());
435    }
436
437    #[test]
438    fn cli_embed_disabled_by_default() {
439        let cli = Cli::parse_from([
440            "inferd-daemon",
441            "--lock",
442            "/tmp/inferd.lock",
443            "--tcp",
444            "127.0.0.1:0",
445        ]);
446        assert!(!cli.embed);
447    }
448
449    #[cfg(feature = "openai")]
450    #[test]
451    fn cli_accepts_openai_compat_backend() {
452        let cli = Cli::parse_from([
453            "inferd-daemon",
454            "--lock",
455            "/tmp/inferd.lock",
456            "--tcp",
457            "127.0.0.1:0",
458            "--backend",
459            "openai-compat",
460            "--openai-base-url",
461            "http://localhost:11434",
462            "--openai-model",
463            "llama3.1:8b",
464            "--openai-api-key",
465            "sk-x",
466            "--openai-timeout-secs",
467            "30",
468        ]);
469        assert_eq!(cli.backend, Some(BackendKind::OpenaiCompat));
470        assert_eq!(
471            cli.openai_base_url.as_deref(),
472            Some("http://localhost:11434")
473        );
474        assert_eq!(cli.openai_model.as_deref(), Some("llama3.1:8b"));
475        assert_eq!(cli.openai_api_key.as_deref(), Some("sk-x"));
476        assert_eq!(cli.openai_timeout_secs, 30);
477    }
478
479    #[cfg(feature = "bedrock")]
480    #[test]
481    fn cli_accepts_bedrock_invoke_backend() {
482        let cli = Cli::parse_from([
483            "inferd-daemon",
484            "--lock",
485            "/tmp/inferd.lock",
486            "--tcp",
487            "127.0.0.1:0",
488            "--backend",
489            "bedrock-invoke",
490            "--bedrock-region",
491            "us-east-1",
492            "--bedrock-model-id",
493            "anthropic.claude-3-5-sonnet-20241022-v2:0",
494            "--bedrock-bearer-token",
495            "abc123",
496            "--bedrock-timeout-secs",
497            "60",
498        ]);
499        assert_eq!(cli.backend, Some(BackendKind::BedrockInvoke));
500        assert_eq!(cli.bedrock_region.as_deref(), Some("us-east-1"));
501        assert_eq!(
502            cli.bedrock_model_id.as_deref(),
503            Some("anthropic.claude-3-5-sonnet-20241022-v2:0")
504        );
505        assert_eq!(cli.bedrock_bearer_token.as_deref(), Some("abc123"));
506        assert_eq!(cli.bedrock_timeout_secs, 60);
507    }
508
509    #[cfg(feature = "bedrock")]
510    #[test]
511    fn cli_bedrock_timeout_defaults_to_300() {
512        let cli = Cli::parse_from([
513            "inferd-daemon",
514            "--lock",
515            "/tmp/inferd.lock",
516            "--tcp",
517            "127.0.0.1:0",
518            "--backend",
519            "bedrock-invoke",
520            "--bedrock-region",
521            "us-east-1",
522            "--bedrock-model-id",
523            "anthropic.claude-3-5-haiku-20241022-v1:0",
524        ]);
525        assert_eq!(cli.bedrock_timeout_secs, 300);
526        assert!(cli.bedrock_bearer_token.is_none());
527        assert!(cli.bedrock_endpoint.is_none());
528    }
529
530    #[cfg(feature = "openai")]
531    #[test]
532    fn cli_openai_timeout_defaults_to_300() {
533        let cli = Cli::parse_from([
534            "inferd-daemon",
535            "--lock",
536            "/tmp/inferd.lock",
537            "--tcp",
538            "127.0.0.1:0",
539            "--backend",
540            "openai-compat",
541            "--openai-base-url",
542            "https://api.openai.com",
543            "--openai-model",
544            "gpt-4o-mini",
545        ]);
546        assert_eq!(cli.openai_timeout_secs, 300);
547        assert!(cli.openai_api_key.is_none());
548    }
549}