Skip to main content

inferd_daemon/
config.rs

1//! Daemon CLI configuration.
2//!
3//! M1 keeps the CLI surface deliberately small: one transport choice
4//! (`--tcp` or `--uds`), a lock path, a backend selector, and a queue
5//! depth. The operator-flag matrix expands in M4 along with packaging.
6
7use clap::{Parser, ValueEnum};
8use std::path::PathBuf;
9
10/// Backend adapters the daemon can register at startup.
11///
12/// `LlamaCpp` is gated behind the `llamacpp` cargo feature — default
13/// daemon builds only ship the mock adapter (per ADR 0006: lean core,
14/// extensions are separate concerns). `OpenAiCompat` is gated behind
15/// the `openai` cargo feature — pulled in only when the operator
16/// wants the outbound HTTPS adapter (ADR 0006 cloud carve-out).
17#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
18pub enum BackendKind {
19    /// Deterministic test double — used by integration tests and the
20    /// M1 echo daemon.
21    Mock,
22    /// Local llama.cpp backend via FFI (M2). Requires `--model-path`.
23    #[cfg(feature = "llamacpp")]
24    Llamacpp,
25    /// OpenAI-compatible outbound HTTPS adapter (Phase 5A). Reaches
26    /// any provider speaking the `/v1/chat/completions` wire (OpenAI,
27    /// vLLM, LM Studio, LocalAI, OpenRouter, llama.cpp's HTTP server).
28    /// Requires `--openai-base-url` + `--openai-model`. The API key
29    /// is read from `--openai-api-key` or env (`INFERD_OPENAI_API_KEY`
30    /// then `OPENAI_API_KEY`); pass an empty string to skip the
31    /// `Authorization` header for self-hosted endpoints.
32    #[cfg(feature = "openai")]
33    OpenaiCompat,
34    /// AWS Bedrock-runtime `InvokeModelWithResponseStream` adapter
35    /// (Phase 6B-5). v0.2.0 ships only the Anthropic-on-Bedrock body
36    /// shape — Claude models invoked via Bedrock's pinned
37    /// `anthropic_version: "bedrock-2023-05-31"` payload. Requires
38    /// `--bedrock-region` + `--bedrock-model-id`. Auth resolves from
39    /// `--bedrock-bearer-token` / `AWS_BEARER_TOKEN_BEDROCK` first,
40    /// then the standard `AWS_ACCESS_KEY_ID` /
41    /// `AWS_SECRET_ACCESS_KEY` (+ optional `AWS_SESSION_TOKEN`)
42    /// chain.
43    #[cfg(feature = "bedrock")]
44    BedrockInvoke,
45}
46
47/// Top-level CLI for `inferd-daemon`.
48#[derive(Debug, Parser)]
49#[command(name = "inferd-daemon", version, about = "Local inference daemon")]
50pub struct Cli {
51    /// Backend to load at startup.
52    #[arg(long, value_enum, default_value_t = BackendKind::Mock, env = "INFERD_BACKEND")]
53    pub backend: BackendKind,
54
55    /// Path to the single-instance lock file. The lock is held for the
56    /// lifetime of the daemon process.
57    #[arg(long, env = "INFERD_LOCK")]
58    pub lock: PathBuf,
59
60    /// Loopback TCP bind address. Mutually exclusive with `--uds` and `--pipe`.
61    #[arg(long, env = "INFERD_TCP", conflicts_with_all = ["uds", "pipe"])]
62    pub tcp: Option<String>,
63
64    /// Unix domain socket path. Mutually exclusive with `--tcp` and `--pipe`. Unix only.
65    #[arg(long, env = "INFERD_UDS", conflicts_with_all = ["tcp", "pipe"])]
66    pub uds: Option<PathBuf>,
67
68    /// Windows named pipe path (e.g. `\\.\pipe\inferd-infer`).
69    /// Mutually exclusive with `--tcp` and `--uds`. Windows only.
70    #[arg(long, env = "INFERD_PIPE", conflicts_with_all = ["tcp", "uds"])]
71    pub pipe: Option<String>,
72
73    /// Group name for the UDS (Unix only). Ignored on other transports.
74    #[arg(long, env = "INFERD_GROUP")]
75    pub group: Option<String>,
76
77    /// Active generations served concurrently. v0.1 invariant is 1; values
78    /// above 1 are reserved for v0.2 continuous-batching backends.
79    #[arg(long, default_value_t = 1, env = "INFERD_ACTIVE_PERMITS")]
80    pub active_permits: usize,
81
82    /// Maximum waiting queue depth. Submits beyond this return
83    /// `code: queue_full` immediately.
84    #[arg(long, default_value_t = 10, env = "INFERD_QUEUE_DEPTH")]
85    pub queue_depth: usize,
86
87    /// Seconds to wait for the backend to report ready before failing
88    /// startup.
89    #[arg(long, default_value_t = 30, env = "INFERD_READY_TIMEOUT_SECS")]
90    pub ready_timeout_secs: u64,
91
92    /// Path to the GGUF model file. Required when `--backend llamacpp`.
93    #[arg(long, env = "INFERD_MODEL_PATH")]
94    pub model_path: Option<PathBuf>,
95
96    /// Optional expected SHA-256 of the model file as a hex string
97    /// (64 chars). When present, the daemon verifies the file before
98    /// loading via `subtle::ConstantTimeEq` (THREAT_MODEL F-5).
99    #[arg(long, env = "INFERD_MODEL_SHA256")]
100    pub model_sha256: Option<String>,
101
102    /// Llama.cpp context window in tokens. Default 8192.
103    #[arg(long, default_value_t = 8192, env = "INFERD_N_CTX")]
104    pub n_ctx: u32,
105
106    /// Llama.cpp GPU layer offload count. 0 = CPU-only. GPU support
107    /// requires the `cuda`/`metal`/`vulkan`/`rocm` cargo feature at
108    /// build time.
109    #[arg(long, default_value_t = 0, env = "INFERD_N_GPU_LAYERS")]
110    pub n_gpu_layers: i32,
111
112    /// Base URL of the upstream OpenAI-compat endpoint, no trailing
113    /// slash and no path (the adapter appends `/v1/chat/completions`).
114    /// Required when `--backend openai-compat`. Examples:
115    /// `https://api.openai.com`, `http://localhost:11434`,
116    /// `https://openrouter.ai`.
117    #[arg(long, env = "INFERD_OPENAI_BASE_URL")]
118    pub openai_base_url: Option<String>,
119
120    /// Bearer token for the OpenAI-compat upstream. Sent as
121    /// `Authorization: Bearer <value>`. Pass an empty string to skip
122    /// the header entirely for self-hosted endpoints. Resolves from
123    /// `--openai-api-key`, then `INFERD_OPENAI_API_KEY`, then
124    /// `OPENAI_API_KEY` (the de-facto env name most providers' SDKs
125    /// already use).
126    #[arg(long, env = "INFERD_OPENAI_API_KEY", hide_env_values = true)]
127    pub openai_api_key: Option<String>,
128
129    /// Upstream model identifier echoed in the request `model` field
130    /// — provider-specific (e.g. `gpt-4o-mini`, `llama3.1:8b`,
131    /// `meta-llama/Meta-Llama-3-70B-Instruct`). Required when
132    /// `--backend openai-compat`.
133    #[arg(long, env = "INFERD_OPENAI_MODEL")]
134    pub openai_model: Option<String>,
135
136    /// Total request timeout for OpenAI-compat calls, in seconds.
137    /// Default 300 (5 minutes) — long enough for a slow first-token
138    /// from a cold cloud model, short enough to surface stuck
139    /// requests rather than hang forever.
140    #[arg(long, default_value_t = 300, env = "INFERD_OPENAI_TIMEOUT_SECS")]
141    pub openai_timeout_secs: u64,
142
143    /// AWS region the Bedrock endpoint lives in, e.g. `us-east-1`,
144    /// `eu-central-1`. Required when `--backend bedrock-invoke`.
145    /// Used for both the endpoint host and SigV4 signing scope.
146    #[arg(long, env = "INFERD_BEDROCK_REGION")]
147    pub bedrock_region: Option<String>,
148
149    /// Bedrock model id (URL-encoded by the adapter), e.g.
150    /// `anthropic.claude-3-5-sonnet-20241022-v2:0`. Required when
151    /// `--backend bedrock-invoke`.
152    #[arg(long, env = "INFERD_BEDROCK_MODEL_ID")]
153    pub bedrock_model_id: Option<String>,
154
155    /// Pre-issued Bedrock bearer token (`AWS_BEARER_TOKEN_BEDROCK`
156    /// shape, AWS rolled this out in 2025-06). When set, the adapter
157    /// sends `Authorization: Bearer <value>` and skips SigV4. When
158    /// unset, the adapter falls back to the standard
159    /// `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` (+ optional
160    /// `AWS_SESSION_TOKEN`) chain via SigV4 signing.
161    #[arg(long, env = "AWS_BEARER_TOKEN_BEDROCK", hide_env_values = true)]
162    pub bedrock_bearer_token: Option<String>,
163
164    /// Override the Bedrock endpoint host. Empty/absent → default
165    /// `bedrock-runtime.<region>.amazonaws.com`. Useful for VPC
166    /// endpoints / integration tests.
167    #[arg(long, env = "INFERD_BEDROCK_ENDPOINT")]
168    pub bedrock_endpoint: Option<String>,
169
170    /// Total request timeout for Bedrock calls, in seconds. Default
171    /// 300 (5 minutes).
172    #[arg(long, default_value_t = 300, env = "INFERD_BEDROCK_TIMEOUT_SECS")]
173    pub bedrock_timeout_secs: u64,
174
175    /// Optional pre-shared API key. When set, TCP clients MUST send
176    /// `{"type":"auth","key":"<this value>"}` as their first NDJSON
177    /// frame on the connection or the daemon closes the connection.
178    /// UDS and named-pipe transports ignore this — kernel-attested
179    /// peer credentials (F-7) do the work there.
180    ///
181    /// Comparison is constant-time. THREAT_MODEL F-8.
182    #[arg(long, env = "INFERD_API_KEY", hide_env_values = true)]
183    pub api_key: Option<String>,
184
185    /// Path to the operator JSON config file. Default
186    /// `~/.inferd/config.json`. When present, fetch + auto-pull are
187    /// driven from it; CLI flags (`--model-path`, `--model-sha256`,
188    /// `--n-ctx`, `--n-gpu-layers`) override config-file values when
189    /// both are supplied. When absent, the daemon falls back to
190    /// CLI-flag-only operation (dev mode).
191    #[arg(long, env = "INFERD_CONFIG")]
192    pub config: Option<PathBuf>,
193
194    /// Admin endpoint path. Defaults per-platform to the path
195    /// documented in `docs/protocol-v1.md` §"Admin endpoint" — e.g.
196    /// `/run/inferd/admin.sock` on Linux, `\\.\pipe\inferd-admin` on
197    /// Windows. Override for tests / non-default deployments.
198    #[arg(long, env = "INFERD_ADMIN_ADDR")]
199    pub admin_addr: Option<PathBuf>,
200
201    /// Enable the v2 inference endpoint per ADR 0015. v2 binds on a
202    /// *separate* socket from v1: `infer.v2.sock` on Unix /
203    /// `\\.\pipe\inferd-infer-v2` on Windows. v1 stays on its own
204    /// socket and is unaffected.
205    ///
206    /// Phase 1B: the v2 endpoint accepts and validates v2 requests
207    /// but returns `Error{code:internal, message:"v2 generation not
208    /// implemented"}` because the Backend trait does not yet expose
209    /// `generate_v2`. Use this to integration-test middleware that
210    /// will speak v2 once Phase 2A lands.
211    #[arg(long, env = "INFERD_V2")]
212    pub v2: bool,
213
214    /// Override the default v2 inference endpoint path.
215    /// Mirrors `--uds` / `--pipe` for v2; on Linux/macOS this is a
216    /// UDS path, on Windows a named-pipe path. Has no effect unless
217    /// `--v2` is also set.
218    #[arg(long, env = "INFERD_V2_ADDR")]
219    pub v2_addr: Option<PathBuf>,
220
221    /// Loopback TCP bind address for the v2 endpoint. Mutually
222    /// exclusive with `--v2-addr`. Useful for tests that don't want
223    /// the platform default (UDS / named pipe). Has no effect
224    /// unless `--v2` is also set.
225    #[arg(long, env = "INFERD_V2_TCP", conflicts_with = "v2_addr")]
226    pub v2_tcp: Option<String>,
227
228    /// Enable the embed inference endpoint per ADR 0017. The embed
229    /// endpoint binds on a *separate* socket from v1/v2:
230    /// `infer.embed.sock` on Unix / `\\.\pipe\inferd-infer-embed`
231    /// on Windows. Has no effect unless the active backend's
232    /// `capabilities().embed` is true (capability-driven binding).
233    #[arg(long, env = "INFERD_EMBED")]
234    pub embed: bool,
235
236    /// Override the default embed inference endpoint path.
237    /// Mirrors `--uds` / `--pipe` for embed; on Linux/macOS this is
238    /// a UDS path, on Windows a named-pipe path. Has no effect
239    /// unless `--embed` is also set.
240    #[arg(long, env = "INFERD_EMBED_ADDR")]
241    pub embed_addr: Option<PathBuf>,
242
243    /// Loopback TCP bind address for the embed endpoint. Mutually
244    /// exclusive with `--embed-addr`. Has no effect unless `--embed`
245    /// is also set.
246    #[arg(long, env = "INFERD_EMBED_TCP", conflicts_with = "embed_addr")]
247    pub embed_tcp: Option<String>,
248}
249
250impl Cli {
251    /// Validate that exactly one transport is selected. clap enforces
252    /// mutual exclusion; this checks the at-least-one part.
253    pub fn require_one_transport(&self) -> Result<(), &'static str> {
254        let count = [self.tcp.is_some(), self.uds.is_some(), self.pipe.is_some()]
255            .iter()
256            .filter(|b| **b)
257            .count();
258        match count {
259            1 => Ok(()),
260            0 => Err("must specify one of --tcp, --uds, --pipe"),
261            _ => Err("--tcp, --uds, --pipe are mutually exclusive"),
262        }
263    }
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269    use clap::CommandFactory;
270
271    #[test]
272    fn cli_parses_minimum_required() {
273        let cli = Cli::parse_from([
274            "inferd-daemon",
275            "--lock",
276            "/tmp/inferd.lock",
277            "--tcp",
278            "127.0.0.1:0",
279        ]);
280        assert!(cli.tcp.is_some());
281        assert!(cli.uds.is_none());
282        assert_eq!(cli.queue_depth, 10);
283        assert_eq!(cli.active_permits, 1);
284        cli.require_one_transport().unwrap();
285    }
286
287    #[test]
288    fn cli_rejects_no_transport() {
289        let cli = Cli::parse_from(["inferd-daemon", "--lock", "/tmp/inferd.lock"]);
290        assert!(cli.require_one_transport().is_err());
291    }
292
293    #[test]
294    fn cli_rejects_both_transports_via_clap() {
295        // clap-level mutual exclusion: this should fail to parse, not
296        // require_one_transport's runtime check.
297        let result = Cli::try_parse_from([
298            "inferd-daemon",
299            "--lock",
300            "/tmp/inferd.lock",
301            "--tcp",
302            "127.0.0.1:0",
303            "--uds",
304            "/tmp/inferd.sock",
305        ]);
306        assert!(result.is_err());
307    }
308
309    #[test]
310    fn cli_accepts_pipe_transport() {
311        let cli = Cli::parse_from([
312            "inferd-daemon",
313            "--lock",
314            "C:/tmp/inferd.lock",
315            "--pipe",
316            r"\\.\pipe\inferd-test",
317        ]);
318        assert!(cli.pipe.is_some());
319        assert!(cli.uds.is_none());
320        assert!(cli.tcp.is_none());
321        cli.require_one_transport().unwrap();
322    }
323
324    #[test]
325    fn cli_rejects_pipe_with_tcp_via_clap() {
326        let result = Cli::try_parse_from([
327            "inferd-daemon",
328            "--lock",
329            "/tmp/inferd.lock",
330            "--tcp",
331            "127.0.0.1:0",
332            "--pipe",
333            r"\\.\pipe\inferd-test",
334        ]);
335        assert!(result.is_err());
336    }
337
338    #[test]
339    fn cli_command_factory_is_well_formed() {
340        // Ensures clap's `#[command]` derives don't conflict; cheap smoke
341        // test that catches lots of misconfigurations.
342        Cli::command().debug_assert();
343    }
344
345    #[test]
346    fn cli_accepts_v2_flag() {
347        let cli = Cli::parse_from([
348            "inferd-daemon",
349            "--lock",
350            "/tmp/inferd.lock",
351            "--tcp",
352            "127.0.0.1:0",
353            "--v2",
354            "--v2-tcp",
355            "127.0.0.1:0",
356        ]);
357        assert!(cli.v2);
358        assert!(cli.v2_tcp.is_some());
359        assert!(cli.v2_addr.is_none());
360    }
361
362    #[test]
363    fn cli_rejects_v2_addr_with_v2_tcp() {
364        let result = Cli::try_parse_from([
365            "inferd-daemon",
366            "--lock",
367            "/tmp/inferd.lock",
368            "--tcp",
369            "127.0.0.1:0",
370            "--v2",
371            "--v2-tcp",
372            "127.0.0.1:0",
373            "--v2-addr",
374            "/tmp/inferd-v2.sock",
375        ]);
376        assert!(result.is_err());
377    }
378
379    #[test]
380    fn cli_v2_disabled_by_default() {
381        let cli = Cli::parse_from([
382            "inferd-daemon",
383            "--lock",
384            "/tmp/inferd.lock",
385            "--tcp",
386            "127.0.0.1:0",
387        ]);
388        assert!(!cli.v2);
389    }
390
391    #[test]
392    fn cli_accepts_embed_flag() {
393        let cli = Cli::parse_from([
394            "inferd-daemon",
395            "--lock",
396            "/tmp/inferd.lock",
397            "--tcp",
398            "127.0.0.1:0",
399            "--embed",
400            "--embed-tcp",
401            "127.0.0.1:0",
402        ]);
403        assert!(cli.embed);
404        assert!(cli.embed_tcp.is_some());
405        assert!(cli.embed_addr.is_none());
406    }
407
408    #[test]
409    fn cli_rejects_embed_addr_with_embed_tcp() {
410        let result = Cli::try_parse_from([
411            "inferd-daemon",
412            "--lock",
413            "/tmp/inferd.lock",
414            "--tcp",
415            "127.0.0.1:0",
416            "--embed",
417            "--embed-tcp",
418            "127.0.0.1:0",
419            "--embed-addr",
420            "/tmp/inferd-embed.sock",
421        ]);
422        assert!(result.is_err());
423    }
424
425    #[test]
426    fn cli_embed_disabled_by_default() {
427        let cli = Cli::parse_from([
428            "inferd-daemon",
429            "--lock",
430            "/tmp/inferd.lock",
431            "--tcp",
432            "127.0.0.1:0",
433        ]);
434        assert!(!cli.embed);
435    }
436
437    #[cfg(feature = "openai")]
438    #[test]
439    fn cli_accepts_openai_compat_backend() {
440        let cli = Cli::parse_from([
441            "inferd-daemon",
442            "--lock",
443            "/tmp/inferd.lock",
444            "--tcp",
445            "127.0.0.1:0",
446            "--backend",
447            "openai-compat",
448            "--openai-base-url",
449            "http://localhost:11434",
450            "--openai-model",
451            "llama3.1:8b",
452            "--openai-api-key",
453            "sk-x",
454            "--openai-timeout-secs",
455            "30",
456        ]);
457        assert_eq!(cli.backend, BackendKind::OpenaiCompat);
458        assert_eq!(
459            cli.openai_base_url.as_deref(),
460            Some("http://localhost:11434")
461        );
462        assert_eq!(cli.openai_model.as_deref(), Some("llama3.1:8b"));
463        assert_eq!(cli.openai_api_key.as_deref(), Some("sk-x"));
464        assert_eq!(cli.openai_timeout_secs, 30);
465    }
466
467    #[cfg(feature = "bedrock")]
468    #[test]
469    fn cli_accepts_bedrock_invoke_backend() {
470        let cli = Cli::parse_from([
471            "inferd-daemon",
472            "--lock",
473            "/tmp/inferd.lock",
474            "--tcp",
475            "127.0.0.1:0",
476            "--backend",
477            "bedrock-invoke",
478            "--bedrock-region",
479            "us-east-1",
480            "--bedrock-model-id",
481            "anthropic.claude-3-5-sonnet-20241022-v2:0",
482            "--bedrock-bearer-token",
483            "abc123",
484            "--bedrock-timeout-secs",
485            "60",
486        ]);
487        assert_eq!(cli.backend, BackendKind::BedrockInvoke);
488        assert_eq!(cli.bedrock_region.as_deref(), Some("us-east-1"));
489        assert_eq!(
490            cli.bedrock_model_id.as_deref(),
491            Some("anthropic.claude-3-5-sonnet-20241022-v2:0")
492        );
493        assert_eq!(cli.bedrock_bearer_token.as_deref(), Some("abc123"));
494        assert_eq!(cli.bedrock_timeout_secs, 60);
495    }
496
497    #[cfg(feature = "bedrock")]
498    #[test]
499    fn cli_bedrock_timeout_defaults_to_300() {
500        let cli = Cli::parse_from([
501            "inferd-daemon",
502            "--lock",
503            "/tmp/inferd.lock",
504            "--tcp",
505            "127.0.0.1:0",
506            "--backend",
507            "bedrock-invoke",
508            "--bedrock-region",
509            "us-east-1",
510            "--bedrock-model-id",
511            "anthropic.claude-3-5-haiku-20241022-v1:0",
512        ]);
513        assert_eq!(cli.bedrock_timeout_secs, 300);
514        assert!(cli.bedrock_bearer_token.is_none());
515        assert!(cli.bedrock_endpoint.is_none());
516    }
517
518    #[cfg(feature = "openai")]
519    #[test]
520    fn cli_openai_timeout_defaults_to_300() {
521        let cli = Cli::parse_from([
522            "inferd-daemon",
523            "--lock",
524            "/tmp/inferd.lock",
525            "--tcp",
526            "127.0.0.1:0",
527            "--backend",
528            "openai-compat",
529            "--openai-base-url",
530            "https://api.openai.com",
531            "--openai-model",
532            "gpt-4o-mini",
533        ]);
534        assert_eq!(cli.openai_timeout_secs, 300);
535        assert!(cli.openai_api_key.is_none());
536    }
537}