Skip to main content

inferd_daemon/
config_file.rs

1//! Operator-facing JSON config file.
2//!
3//! Default location: `~/.inferd/config.json` (Unix) /
4//! `%USERPROFILE%\.inferd\config.json` (Windows). Override via
5//! `--config` CLI flag or `INFERD_CONFIG` env var.
6//!
7//! # Schema (single-backend, legacy)
8//!
9//! ```json
10//! {
11//!   "auto_pull": true,
12//!   "models_home": "~/.local/share/models",
13//!   "model": {
14//!     "name":       "gemma-4-e4b",
15//!     "sha256":     "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
16//!     "size_bytes": 5126304928,
17//!     "source_url": "https://huggingface.co/unsloth/.../resolve/main/...gguf",
18//!     "license":    "apache-2.0"
19//!   },
20//!   "n_ctx":         8192,
21//!   "n_gpu_layers":  0,
22//!   "admin_addr":    "/run/inferd/admin.sock"
23//! }
24//! ```
25//!
26//! # Schema (multi-backend, v0.2+)
27//!
28//! Per ADR 0007, the router walks an *ordered* list of backends; the
29//! first that's `ready()` and not currently circuit-broken serves the
30//! request. The config-file surface mirrors that:
31//!
32//! ```json
33//! {
34//!   "models_home": "~/.local/share/models",
35//!   "backends": [
36//!     {
37//!       "kind": "llamacpp",
38//!       "name": "local-gemma",
39//!       "model": { "name": "gemma-4-e4b", "sha256": "...", "source_url": "https://...gguf" },
40//!       "n_ctx": 8192,
41//!       "n_gpu_layers": 35
42//!     },
43//!     {
44//!       "kind": "openai-compat",
45//!       "name": "anthropic-fallback",
46//!       "base_url": "https://api.anthropic.com",
47//!       "model": "claude-opus-4-7",
48//!       "api_key_env": "ANTHROPIC_API_KEY",
49//!       "timeout_secs": 300
50//!     }
51//!   ]
52//! }
53//! ```
54//!
55//! `backends:` and `model:` are mutually exclusive. When `model:` is
56//! present (legacy single-backend shape), the daemon promotes it to a
57//! one-element `backends:` list with `kind: "llamacpp"` so existing
58//! v0.1.x configs keep working without edits.
59//!
60//! API keys for `openai-compat` are referenced by env-var **name**
61//! via `api_key_env:` — never embedded literally in the file. The
62//! daemon reads the named env at startup. When `api_key_env:` is
63//! absent, falls back to `INFERD_OPENAI_API_KEY`, then `OPENAI_API_KEY`,
64//! then empty (skips `Authorization` for self-hosted endpoints).
65//!
66//! The `kind:` field is an open-ended tagged union: future variants
67//! (`bedrock-invoke`, `bedrock-converse`, etc.) slot in additively
68//! without breaking existing configs.
69//!
70//! Resolution order for the model store (per ADR 0011):
71//!
72//! 1. `models_home` field if set in this config.
73//! 2. `MODELS_HOME` env var.
74//! 3. Platform default (XDG / Application Support / LOCALAPPDATA).
75//!
76//! CLI flags override config-file values when both are present.
77
78use serde::{Deserialize, Serialize};
79use std::fs::File;
80use std::io::{self, BufReader};
81use std::path::{Path, PathBuf};
82
83/// Top-level config-file schema.
84///
85/// Two flavours coexist:
86///
87/// - **Legacy single-backend** — `model:` at the top level, plus
88///   `n_ctx` / `n_gpu_layers`. Implies one `kind: "llamacpp"` backend.
89/// - **Multi-backend** — `backends: [...]` carries an ordered list of
90///   backend entries. Router walks the list per ADR 0007.
91///
92/// The two are mutually exclusive at parse time: setting both is a
93/// validation error. `auto_pull` and `admin_addr` apply to both
94/// flavours.
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct ConfigFile {
97    /// When `true` and a `kind: "llamacpp"` model file is absent, the
98    /// daemon downloads it from the entry's `source_url` on startup.
99    /// When `false`, the daemon refuses to start with a clear error
100    /// pointing at the operator's next step. Default: `true`. Applies
101    /// to every llamacpp entry in `backends:`.
102    #[serde(default = "default_auto_pull")]
103    pub auto_pull: bool,
104
105    /// Override for the shared model store root. When unset the
106    /// daemon falls back to `MODELS_HOME` env, then the platform
107    /// default. Tilde-expanded on read.
108    #[serde(default)]
109    pub models_home: Option<PathBuf>,
110
111    /// Legacy single-backend model spec. Deprecated in favour of
112    /// `backends:` but kept for v0.1.x config-file compatibility.
113    /// Mutually exclusive with `backends:`.
114    #[serde(default)]
115    pub model: Option<ModelConfig>,
116
117    /// Llama.cpp context window in tokens. Default: 8192. Used as
118    /// the fallback for legacy `model:` entries; multi-backend
119    /// entries carry their own `n_ctx`.
120    #[serde(default = "default_n_ctx")]
121    pub n_ctx: u32,
122
123    /// Llama.cpp GPU layer offload count. 0 = CPU-only. Default: 0.
124    /// Used as the fallback for legacy `model:` entries; multi-
125    /// backend entries carry their own `n_gpu_layers`.
126    #[serde(default)]
127    pub n_gpu_layers: i32,
128
129    /// Admin socket address. Default: platform-specific path per
130    /// `docs/protocol-v1.md` §"Admin endpoint".
131    #[serde(default)]
132    pub admin_addr: Option<String>,
133
134    /// Ordered list of backends (multi-backend shape). First entry
135    /// is highest priority — the router tries it first, then the
136    /// next, etc. Mutually exclusive with `model:`.
137    #[serde(default)]
138    pub backends: Option<Vec<BackendEntry>>,
139
140    /// Optional listener overrides. Default behaviour is unchanged:
141    /// the operator picks a transport via `--tcp` / `--uds` /
142    /// `--pipe` on the CLI. When `listen:` is present **and** the
143    /// CLI did not pass a transport flag, the daemon binds the
144    /// transports declared here. CLI flags always win when both
145    /// are set. Restart-time only — no config watcher.
146    #[serde(default)]
147    pub listen: Option<ListenConfig>,
148}
149
150/// Operator-declared listener overrides. Every field is optional.
151/// TCP is **off by default** — set `tcp:` (and `tcp_v2:` if running
152/// with v2) to opt in for cross-VM use cases (WSL ↔ Windows host,
153/// podman-on-machine, …) where Unix sockets / named pipes don't
154/// cross the boundary cleanly. Mirrors the security shape of
155/// `openai-compat`: the API key is referenced by env-var **name**,
156/// never embedded literally in the file.
157#[derive(Debug, Clone, Serialize, Deserialize, Default)]
158pub struct ListenConfig {
159    /// Loopback TCP bind address for the v1 inference socket, e.g.
160    /// `"127.0.0.1:9090"` or `"0.0.0.0:9090"`. When unset, no v1
161    /// TCP listener is bound from config (CLI `--tcp` may still
162    /// provide one). v0.1 invariant: CLI mutual exclusion still
163    /// applies — if CLI passes `--uds` / `--pipe`, the config
164    /// `tcp:` is ignored with a one-line warning at startup.
165    #[serde(default)]
166    pub tcp: Option<String>,
167
168    /// Loopback TCP bind address for the v2 inference socket. Has
169    /// no effect unless `--v2` is also set on the CLI.
170    #[serde(default)]
171    pub tcp_v2: Option<String>,
172
173    /// Loopback TCP bind address for the embed socket per ADR 0017.
174    /// Has no effect unless `--embed` is also set on the CLI and the
175    /// active backend advertises `capabilities().embed == true`.
176    #[serde(default)]
177    pub tcp_embed: Option<String>,
178
179    /// **Name** of the env var carrying the pre-shared API key for
180    /// TCP clients (THREAT_MODEL F-8). When set, the daemon reads
181    /// the named env at startup and clients must send
182    /// `{"type":"auth","key":"<value>"}` as their first NDJSON
183    /// frame. UDS and named-pipe transports ignore this — kernel-
184    /// attested peer credentials (F-7) gate those. CLI `--api-key`
185    /// always wins when both are set.
186    #[serde(default)]
187    pub api_key_env: Option<String>,
188}
189
190/// A single backend declaration. Tagged on `kind:` so future
191/// variants (`bedrock-converse`, …) slot in additively. Unknown
192/// kinds are rejected at parse time so operators see a clear error
193/// rather than a silent skip.
194#[derive(Debug, Clone, Serialize, Deserialize)]
195#[serde(tag = "kind", rename_all = "kebab-case")]
196pub enum BackendEntry {
197    /// Local llama.cpp backend over a GGUF file in the shared CAS.
198    Llamacpp(LlamacppEntry),
199    /// Outbound HTTPS adapter for any provider speaking the OpenAI
200    /// Chat Completions wire (OpenAI, Anthropic via the compat layer
201    /// at `api.anthropic.com/v1/`, OpenRouter, vLLM, LM Studio,
202    /// LocalAI, Ollama, llama.cpp's HTTP server).
203    OpenaiCompat(OpenaiCompatEntry),
204    /// AWS Bedrock-runtime
205    /// `InvokeModelWithResponseStream` adapter
206    /// (Phase 6B-5). v0.2.0 ships only the Anthropic-on-Bedrock body
207    /// shape — Claude models invoked via Bedrock's pinned
208    /// `anthropic_version: "bedrock-2023-05-31"` payload.
209    BedrockInvoke(BedrockInvokeEntry),
210}
211
212impl BackendEntry {
213    /// Operator-supplied stable identifier, used in router feedback
214    /// and admin-status events.
215    pub fn name(&self) -> &str {
216        match self {
217            BackendEntry::Llamacpp(e) => &e.name,
218            BackendEntry::OpenaiCompat(e) => &e.name,
219            BackendEntry::BedrockInvoke(e) => &e.name,
220        }
221    }
222}
223
224/// Llamacpp backend entry inside `backends:`.
225#[derive(Debug, Clone, Serialize, Deserialize)]
226pub struct LlamacppEntry {
227    /// Stable operator-facing identifier, e.g. `"local-gemma"`. Used
228    /// in router feedback + admin events. Required to be unique
229    /// across all entries.
230    pub name: String,
231
232    /// Per-entry model spec (CAS layout, ADR 0011).
233    pub model: ModelConfig,
234
235    /// Llama.cpp context window in tokens. Default: 8192.
236    #[serde(default = "default_n_ctx")]
237    pub n_ctx: u32,
238
239    /// Llama.cpp GPU layer offload count. 0 = CPU-only. Default: 0.
240    #[serde(default)]
241    pub n_gpu_layers: i32,
242
243    /// Opt this backend into serving embeddings per ADR 0017. When
244    /// `true`, the adapter allocates a *second* `llama_context`
245    /// configured with `embeddings = true` so embed requests don't
246    /// race the generation context. `capabilities().embed` flips
247    /// `true` accordingly. Default: `false`.
248    #[serde(default)]
249    pub embed: bool,
250
251    /// Pooling strategy for the embedding context, mapped 1:1 to
252    /// llama.cpp's `enum llama_pooling_type`. Most embedding models
253    /// expect `1` (`LLAMA_POOLING_TYPE_MEAN`), which is the default;
254    /// EmbeddingGemma 300M is in this group. Set explicitly only if
255    /// the model documents a different strategy (e.g. `2` =
256    /// `CLS`, `3` = `LAST`). Ignored when `embed = false`.
257    #[serde(default, skip_serializing_if = "Option::is_none")]
258    pub embed_pooling: Option<i32>,
259
260    /// Context window for the dedicated embedding `llama_context`,
261    /// in tokens. Embedding models typically have a smaller window
262    /// than generation models — 2048 is the EmbeddingGemma 300M
263    /// default and is what the adapter uses when this is unset.
264    /// Ignored when `embed = false`.
265    #[serde(default = "default_embed_n_ctx")]
266    pub embed_n_ctx: u32,
267}
268
269/// OpenAI-compat backend entry inside `backends:`.
270#[derive(Debug, Clone, Serialize, Deserialize)]
271pub struct OpenaiCompatEntry {
272    /// Stable operator-facing identifier, e.g. `"anthropic-fallback"`.
273    pub name: String,
274
275    /// Base URL of the upstream, no trailing slash and no path
276    /// (the adapter appends `/v1/chat/completions`). Examples:
277    /// `https://api.openai.com`, `https://api.anthropic.com`,
278    /// `http://localhost:11434`.
279    pub base_url: String,
280
281    /// Upstream model identifier echoed in the request `model` field.
282    /// Provider-specific (e.g. `gpt-4o-mini`, `claude-opus-4-7`,
283    /// `llama3.1:8b`).
284    pub model: String,
285
286    /// **Name** of the env var carrying the bearer token — never the
287    /// literal token. Operators set the env separately so secrets
288    /// stay out of the config file. When unset, the daemon falls
289    /// back to `INFERD_OPENAI_API_KEY`, then `OPENAI_API_KEY`, then
290    /// skips the `Authorization` header (some self-hosted endpoints
291    /// accept unauthenticated traffic).
292    #[serde(default)]
293    pub api_key_env: Option<String>,
294
295    /// Total request timeout in seconds. Default 300 (5 minutes).
296    #[serde(default = "default_openai_timeout_secs")]
297    pub timeout_secs: u64,
298}
299
300/// Bedrock-invoke backend entry inside `backends:`.
301///
302/// Auth precedence at startup (mirrors `openai-compat` env-var-by-name
303/// shape so secrets stay out of the file):
304///
305/// 1. `bearer_token_env: "<NAME>"` — when the named env contains a
306///    non-empty value, the adapter sends `Authorization: Bearer
307///    <value>` and skips SigV4. Mirrors AWS' 2025-06
308///    `AWS_BEARER_TOKEN_BEDROCK` rollout.
309/// 2. SigV4 against the standard AWS credential chain — env vars
310///    `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` (+ optional
311///    `AWS_SESSION_TOKEN`). Cross-account assume-role is out of
312///    scope for v0.2.0; operators set the env vars from their own
313///    session before starting the daemon.
314#[derive(Debug, Clone, Serialize, Deserialize)]
315pub struct BedrockInvokeEntry {
316    /// Stable operator-facing identifier, e.g. `"bedrock-claude"`.
317    pub name: String,
318
319    /// AWS region the Bedrock endpoint lives in, e.g. `"us-east-1"`.
320    /// Used for both the endpoint host and SigV4 signing scope.
321    pub region: String,
322
323    /// Bedrock model id, e.g.
324    /// `"anthropic.claude-3-5-sonnet-20241022-v2:0"`. URL-encoded
325    /// by the adapter.
326    pub model_id: String,
327
328    /// Optional **name** of the env var carrying the Bedrock bearer
329    /// token (`AWS_BEARER_TOKEN_BEDROCK` shape) — never the literal
330    /// token. When the named env is non-empty, bearer auth wins and
331    /// SigV4 is skipped. When unset or empty, the adapter falls back
332    /// to the standard `AWS_ACCESS_KEY_ID` /
333    /// `AWS_SECRET_ACCESS_KEY` chain.
334    #[serde(default)]
335    pub bearer_token_env: Option<String>,
336
337    /// Optional endpoint host override. Empty/absent → default
338    /// `bedrock-runtime.<region>.amazonaws.com`. Useful for VPC
339    /// endpoints / integration tests.
340    #[serde(default)]
341    pub endpoint: Option<String>,
342
343    /// Total request timeout in seconds. Default 300 (5 minutes).
344    #[serde(default = "default_bedrock_timeout_secs")]
345    pub timeout_secs: u64,
346}
347
348/// Per-model entry: pinned URL + pinned SHA-256 + name.
349///
350/// The shape mirrors `fetch::ModelSpec` but as a serde-deserialisable
351/// config-file type. Conversion is straightforward (`From` impl below).
352///
353/// Note: there is no `filename` field. The blob's on-disk location
354/// is derived from its SHA-256 (CAS layout, ADR 0011); the manifest
355/// at `<store>/manifests/<name>.json` is the only place a name maps
356/// to a blob.
357#[derive(Debug, Clone, Serialize, Deserialize)]
358pub struct ModelConfig {
359    /// Stable identifier, e.g. `"gemma-4-e4b"`. Used as the manifest
360    /// filename and the lock-file basename.
361    pub name: String,
362    /// Lowercase hex SHA-256 of the GGUF bytes. Required.
363    pub sha256: String,
364    /// Advisory total size for progress reporting + manifest.
365    #[serde(default)]
366    pub size_bytes: Option<u64>,
367    /// Direct-download HTTPS endpoint. Must be `https://`.
368    pub source_url: String,
369    /// SPDX-style license id when known. Recorded in the manifest.
370    #[serde(default)]
371    pub license: Option<String>,
372}
373
374fn default_auto_pull() -> bool {
375    true
376}
377
378fn default_n_ctx() -> u32 {
379    8192
380}
381
382fn default_embed_n_ctx() -> u32 {
383    2048
384}
385
386fn default_openai_timeout_secs() -> u64 {
387    300
388}
389
390fn default_bedrock_timeout_secs() -> u64 {
391    300
392}
393
394fn home_dir() -> Option<PathBuf> {
395    #[cfg(unix)]
396    {
397        std::env::var_os("HOME").map(PathBuf::from)
398    }
399    #[cfg(not(unix))]
400    {
401        std::env::var_os("USERPROFILE").map(PathBuf::from)
402    }
403}
404
405/// Default config-file path: `~/.inferd/config.json` on Unix /
406/// `%USERPROFILE%\.inferd\config.json` on Windows. Honours
407/// `INFERD_CONFIG` for tests and ops.
408pub fn default_config_path() -> PathBuf {
409    if let Ok(p) = std::env::var("INFERD_CONFIG") {
410        return PathBuf::from(p);
411    }
412    let home = home_dir().unwrap_or_else(|| PathBuf::from("."));
413    home.join(".inferd").join("config.json")
414}
415
416/// Errors produced by `ConfigFile::load`.
417#[derive(Debug, thiserror::Error)]
418pub enum ConfigError {
419    /// The config file did not exist at the resolved path.
420    #[error("config file not found: {0}")]
421    NotFound(PathBuf),
422    /// I/O error reading the file.
423    #[error("io reading {path}: {source}")]
424    Io {
425        /// Path that failed.
426        path: PathBuf,
427        /// Underlying I/O error.
428        #[source]
429        source: io::Error,
430    },
431    /// JSON parse failure.
432    #[error("parse {path}: {source}")]
433    Parse {
434        /// Path that failed.
435        path: PathBuf,
436        /// Underlying serde error.
437        #[source]
438        source: serde_json::Error,
439    },
440    /// Validation failure on otherwise-well-formed config.
441    #[error("invalid config: {0}")]
442    Invalid(String),
443}
444
445impl ConfigFile {
446    /// Read + parse + validate a config file at `path`.
447    pub fn load(path: &Path) -> Result<Self, ConfigError> {
448        let file = File::open(path).map_err(|e| {
449            if e.kind() == io::ErrorKind::NotFound {
450                ConfigError::NotFound(path.to_path_buf())
451            } else {
452                ConfigError::Io {
453                    path: path.to_path_buf(),
454                    source: e,
455                }
456            }
457        })?;
458        let reader = BufReader::new(file);
459        let mut cfg: ConfigFile =
460            serde_json::from_reader(reader).map_err(|e| ConfigError::Parse {
461                path: path.to_path_buf(),
462                source: e,
463            })?;
464        cfg.expand_paths();
465        cfg.validate()?;
466        Ok(cfg)
467    }
468
469    fn expand_paths(&mut self) {
470        if let Some(p) = self.models_home.as_ref()
471            && let Some(stripped) = p
472                .to_str()
473                .and_then(|s| s.strip_prefix("~/").or_else(|| s.strip_prefix("~\\")))
474            && let Some(home) = home_dir()
475        {
476            self.models_home = Some(home.join(stripped));
477        }
478    }
479
480    fn validate(&self) -> Result<(), ConfigError> {
481        match (&self.model, &self.backends) {
482            (Some(_), Some(_)) => {
483                return Err(ConfigError::Invalid(
484                    "config: `model` and `backends` are mutually exclusive — \
485                     pick one shape, not both"
486                        .into(),
487                ));
488            }
489            (None, None) => {
490                return Err(ConfigError::Invalid(
491                    "config: must specify either `model` (legacy single-backend) \
492                     or `backends` (multi-backend list)"
493                        .into(),
494                ));
495            }
496            _ => {}
497        }
498        if self.n_ctx == 0 {
499            return Err(ConfigError::Invalid("n_ctx must be > 0".into()));
500        }
501        if let Some(m) = &self.model {
502            validate_model_config(m)?;
503        }
504        if let Some(listen) = &self.listen {
505            if let Some(addr) = &listen.tcp
506                && addr.trim().is_empty()
507            {
508                return Err(ConfigError::Invalid(
509                    "listen.tcp must not be empty when set".into(),
510                ));
511            }
512            if let Some(addr) = &listen.tcp_v2
513                && addr.trim().is_empty()
514            {
515                return Err(ConfigError::Invalid(
516                    "listen.tcp_v2 must not be empty when set".into(),
517                ));
518            }
519            if let Some(addr) = &listen.tcp_embed
520                && addr.trim().is_empty()
521            {
522                return Err(ConfigError::Invalid(
523                    "listen.tcp_embed must not be empty when set".into(),
524                ));
525            }
526        }
527        if let Some(list) = &self.backends {
528            if list.is_empty() {
529                return Err(ConfigError::Invalid(
530                    "backends list must not be empty".into(),
531                ));
532            }
533            let mut seen = std::collections::HashSet::with_capacity(list.len());
534            for entry in list {
535                let name = entry.name();
536                if name.is_empty() {
537                    return Err(ConfigError::Invalid(
538                        "backends[].name must not be empty".into(),
539                    ));
540                }
541                if !seen.insert(name.to_string()) {
542                    return Err(ConfigError::Invalid(format!(
543                        "duplicate backends[].name {name:?} — names must be unique"
544                    )));
545                }
546                match entry {
547                    BackendEntry::Llamacpp(e) => {
548                        validate_model_config(&e.model)?;
549                        if e.n_ctx == 0 {
550                            return Err(ConfigError::Invalid(format!(
551                                "backends[{name:?}].n_ctx must be > 0"
552                            )));
553                        }
554                    }
555                    BackendEntry::OpenaiCompat(e) => {
556                        if e.base_url.trim().is_empty() {
557                            return Err(ConfigError::Invalid(format!(
558                                "backends[{name:?}].base_url must not be empty"
559                            )));
560                        }
561                        if !(e.base_url.starts_with("https://")
562                            || e.base_url.starts_with("http://"))
563                        {
564                            return Err(ConfigError::Invalid(format!(
565                                "backends[{name:?}].base_url must be http:// or https:// \
566                                 (got {:?})",
567                                e.base_url
568                            )));
569                        }
570                        if e.model.trim().is_empty() {
571                            return Err(ConfigError::Invalid(format!(
572                                "backends[{name:?}].model must not be empty"
573                            )));
574                        }
575                        if e.timeout_secs == 0 {
576                            return Err(ConfigError::Invalid(format!(
577                                "backends[{name:?}].timeout_secs must be > 0"
578                            )));
579                        }
580                    }
581                    BackendEntry::BedrockInvoke(e) => {
582                        if e.region.trim().is_empty() {
583                            return Err(ConfigError::Invalid(format!(
584                                "backends[{name:?}].region must not be empty"
585                            )));
586                        }
587                        if e.model_id.trim().is_empty() {
588                            return Err(ConfigError::Invalid(format!(
589                                "backends[{name:?}].model_id must not be empty"
590                            )));
591                        }
592                        if e.timeout_secs == 0 {
593                            return Err(ConfigError::Invalid(format!(
594                                "backends[{name:?}].timeout_secs must be > 0"
595                            )));
596                        }
597                    }
598                }
599            }
600        }
601        Ok(())
602    }
603
604    /// Canonical multi-backend list. When the operator wrote the
605    /// legacy single-backend shape (`model:` at top level), this
606    /// returns a one-element list with `kind: "llamacpp"` so the
607    /// rest of the daemon only ever sees the multi-backend shape.
608    pub fn resolved_backends(&self) -> Vec<BackendEntry> {
609        if let Some(list) = &self.backends {
610            return list.clone();
611        }
612        // Legacy promotion path. `validate()` ensures exactly one of
613        // (`model`, `backends`) is set, so the unwrap is unreachable
614        // for any value that reached this method.
615        let m = self
616            .model
617            .as_ref()
618            .expect("validate() guarantees one of model|backends is set")
619            .clone();
620        vec![BackendEntry::Llamacpp(LlamacppEntry {
621            name: m.name.clone(),
622            model: m,
623            n_ctx: self.n_ctx,
624            n_gpu_layers: self.n_gpu_layers,
625            // Legacy single-model configs predate ADR 0017's embed
626            // surface and stay generation-only. Operators wanting
627            // embeddings migrate to the multi-backend `backends:`
628            // shape.
629            embed: false,
630            embed_pooling: None,
631            embed_n_ctx: default_embed_n_ctx(),
632        })]
633    }
634}
635
636fn validate_model_config(m: &ModelConfig) -> Result<(), ConfigError> {
637    if m.name.is_empty() {
638        return Err(ConfigError::Invalid("model.name must not be empty".into()));
639    }
640    if !m.source_url.starts_with("https://") {
641        return Err(ConfigError::Invalid(format!(
642            "model.source_url must be https:// (got {:?})",
643            m.source_url
644        )));
645    }
646    if m.sha256.len() != 64
647        || !m
648            .sha256
649            .bytes()
650            .all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase())
651    {
652        return Err(ConfigError::Invalid(
653            "model.sha256 must be 64 lowercase hex chars".into(),
654        ));
655    }
656    Ok(())
657}
658
659impl From<&ModelConfig> for crate::fetch::ModelSpec {
660    fn from(m: &ModelConfig) -> Self {
661        crate::fetch::ModelSpec {
662            name: m.name.clone(),
663            source_url: m.source_url.clone(),
664            sha256_hex: m.sha256.clone(),
665            size_bytes: m.size_bytes,
666            license: m.license.clone(),
667            source: None,
668        }
669    }
670}
671
672#[cfg(test)]
673mod tests {
674    use super::*;
675    use std::io::Write;
676
677    fn write_config(s: &str) -> tempfile::NamedTempFile {
678        let mut f = tempfile::NamedTempFile::new().unwrap();
679        f.write_all(s.as_bytes()).unwrap();
680        f.flush().unwrap();
681        f
682    }
683
684    fn good_json() -> String {
685        r#"{
686            "auto_pull": true,
687            "models_home": "/tmp/inferd-models-home",
688            "model": {
689                "name": "gemma-4-e4b",
690                "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
691                "size_bytes": 5126304928,
692                "source_url": "https://huggingface.co/unsloth/gemma-4-E4B-it-GGUF/resolve/main/gemma-4-E4B-it-UD-Q4_K_XL.gguf",
693                "license": "apache-2.0"
694            },
695            "n_ctx": 8192,
696            "n_gpu_layers": 0
697        }"#
698        .to_string()
699    }
700
701    #[test]
702    fn load_well_formed_config() {
703        let f = write_config(&good_json());
704        let cfg = ConfigFile::load(f.path()).unwrap();
705        let m = cfg.model.as_ref().expect("legacy model present");
706        assert_eq!(m.name, "gemma-4-e4b");
707        assert_eq!(m.size_bytes, Some(5_126_304_928));
708        assert_eq!(m.license.as_deref(), Some("apache-2.0"));
709        assert!(cfg.auto_pull);
710        assert_eq!(cfg.n_ctx, 8192);
711        assert_eq!(
712            cfg.models_home,
713            Some(PathBuf::from("/tmp/inferd-models-home"))
714        );
715    }
716
717    #[test]
718    fn missing_file_returns_not_found() {
719        let path = std::env::temp_dir().join("inferd-config-does-not-exist.json");
720        let _ = std::fs::remove_file(&path);
721        let err = ConfigFile::load(&path).unwrap_err();
722        assert!(matches!(err, ConfigError::NotFound(_)));
723    }
724
725    #[test]
726    fn invalid_json_returns_parse_error() {
727        let f = write_config("{ not valid json");
728        let err = ConfigFile::load(f.path()).unwrap_err();
729        assert!(matches!(err, ConfigError::Parse { .. }));
730    }
731
732    #[test]
733    fn http_url_rejected() {
734        let bad = good_json().replace("https://", "http://");
735        let f = write_config(&bad);
736        let err = ConfigFile::load(f.path()).unwrap_err();
737        match err {
738            ConfigError::Invalid(msg) => assert!(msg.contains("https://")),
739            other => panic!("expected Invalid, got {other:?}"),
740        }
741    }
742
743    #[test]
744    fn uppercase_sha_rejected() {
745        let bad = good_json().replace(
746            "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
747            "30D1E7949597A3446726064E80B876FD1B5CBA4AA6EEC53D27AFA420E731FB36",
748        );
749        let f = write_config(&bad);
750        let err = ConfigFile::load(f.path()).unwrap_err();
751        match err {
752            ConfigError::Invalid(msg) => assert!(msg.contains("lowercase hex")),
753            other => panic!("expected Invalid, got {other:?}"),
754        }
755    }
756
757    #[test]
758    fn short_sha_rejected() {
759        let bad = good_json().replace(
760            "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
761            "30d1e7",
762        );
763        let f = write_config(&bad);
764        let err = ConfigFile::load(f.path()).unwrap_err();
765        assert!(matches!(err, ConfigError::Invalid(_)));
766    }
767
768    #[test]
769    fn defaults_when_optional_fields_missing() {
770        let json = r#"{
771            "model": {
772                "name": "gemma-4-e4b",
773                "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
774                "source_url": "https://example.com/x.gguf"
775            }
776        }"#;
777        let f = write_config(json);
778        let cfg = ConfigFile::load(f.path()).unwrap();
779        let m = cfg.model.as_ref().expect("legacy model present");
780        assert!(cfg.auto_pull);
781        assert_eq!(cfg.n_ctx, 8192);
782        assert_eq!(cfg.n_gpu_layers, 0);
783        assert!(m.size_bytes.is_none());
784        assert!(cfg.models_home.is_none());
785        assert!(m.license.is_none());
786    }
787
788    #[test]
789    fn modelconfig_converts_to_fetch_modelspec() {
790        let cfg = ModelConfig {
791            name: "x".into(),
792            sha256: "abc".into(),
793            size_bytes: Some(42),
794            source_url: "https://e/x.gguf".into(),
795            license: Some("mit".into()),
796        };
797        let spec: crate::fetch::ModelSpec = (&cfg).into();
798        assert_eq!(spec.name, "x");
799        assert_eq!(spec.size_bytes, Some(42));
800        assert_eq!(spec.sha256_hex, "abc");
801        assert_eq!(spec.license.as_deref(), Some("mit"));
802    }
803
804    fn good_multi_backend_json() -> String {
805        r#"{
806            "models_home": "/tmp/inferd-models-home",
807            "backends": [
808                {
809                    "kind": "llamacpp",
810                    "name": "local-gemma",
811                    "model": {
812                        "name": "gemma-4-e4b",
813                        "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
814                        "source_url": "https://example.com/gemma.gguf"
815                    },
816                    "n_ctx": 8192,
817                    "n_gpu_layers": 35
818                },
819                {
820                    "kind": "openai-compat",
821                    "name": "anthropic-fallback",
822                    "base_url": "https://api.anthropic.com",
823                    "model": "claude-opus-4-7",
824                    "api_key_env": "ANTHROPIC_API_KEY"
825                }
826            ]
827        }"#
828        .to_string()
829    }
830
831    #[test]
832    fn load_multi_backend_config() {
833        let f = write_config(&good_multi_backend_json());
834        let cfg = ConfigFile::load(f.path()).unwrap();
835        assert!(cfg.model.is_none());
836        let list = cfg.backends.as_ref().expect("backends present");
837        assert_eq!(list.len(), 2);
838        match &list[0] {
839            BackendEntry::Llamacpp(e) => {
840                assert_eq!(e.name, "local-gemma");
841                assert_eq!(e.model.name, "gemma-4-e4b");
842                assert_eq!(e.n_ctx, 8192);
843                assert_eq!(e.n_gpu_layers, 35);
844            }
845            other => panic!("expected llamacpp, got {other:?}"),
846        }
847        match &list[1] {
848            BackendEntry::OpenaiCompat(e) => {
849                assert_eq!(e.name, "anthropic-fallback");
850                assert_eq!(e.base_url, "https://api.anthropic.com");
851                assert_eq!(e.model, "claude-opus-4-7");
852                assert_eq!(e.api_key_env.as_deref(), Some("ANTHROPIC_API_KEY"));
853                assert_eq!(e.timeout_secs, 300);
854            }
855            other => panic!("expected openai-compat, got {other:?}"),
856        }
857    }
858
859    #[test]
860    fn rejects_both_model_and_backends() {
861        let json = r#"{
862            "model": {
863                "name": "gemma-4-e4b",
864                "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
865                "source_url": "https://example.com/x.gguf"
866            },
867            "backends": [
868                {
869                    "kind": "openai-compat",
870                    "name": "x",
871                    "base_url": "https://api.openai.com",
872                    "model": "gpt-4o-mini"
873                }
874            ]
875        }"#;
876        let f = write_config(json);
877        let err = ConfigFile::load(f.path()).unwrap_err();
878        match err {
879            ConfigError::Invalid(msg) => assert!(msg.contains("mutually exclusive")),
880            other => panic!("expected Invalid, got {other:?}"),
881        }
882    }
883
884    #[test]
885    fn rejects_neither_model_nor_backends() {
886        let json = r#"{ "auto_pull": true }"#;
887        let f = write_config(json);
888        let err = ConfigFile::load(f.path()).unwrap_err();
889        match err {
890            ConfigError::Invalid(msg) => assert!(msg.contains("must specify either")),
891            other => panic!("expected Invalid, got {other:?}"),
892        }
893    }
894
895    #[test]
896    fn rejects_empty_backends_list() {
897        let json = r#"{ "backends": [] }"#;
898        let f = write_config(json);
899        let err = ConfigFile::load(f.path()).unwrap_err();
900        match err {
901            ConfigError::Invalid(msg) => assert!(msg.contains("must not be empty")),
902            other => panic!("expected Invalid, got {other:?}"),
903        }
904    }
905
906    #[test]
907    fn rejects_duplicate_backend_names() {
908        let json = r#"{
909            "backends": [
910                {
911                    "kind": "openai-compat",
912                    "name": "dup",
913                    "base_url": "https://api.openai.com",
914                    "model": "gpt-4o-mini"
915                },
916                {
917                    "kind": "openai-compat",
918                    "name": "dup",
919                    "base_url": "https://api.anthropic.com",
920                    "model": "claude-opus-4-7"
921                }
922            ]
923        }"#;
924        let f = write_config(json);
925        let err = ConfigFile::load(f.path()).unwrap_err();
926        match err {
927            ConfigError::Invalid(msg) => assert!(msg.contains("duplicate")),
928            other => panic!("expected Invalid, got {other:?}"),
929        }
930    }
931
932    #[test]
933    fn rejects_openai_compat_without_base_url() {
934        let json = r#"{
935            "backends": [
936                {
937                    "kind": "openai-compat",
938                    "name": "x",
939                    "base_url": "",
940                    "model": "gpt-4o-mini"
941                }
942            ]
943        }"#;
944        let f = write_config(json);
945        let err = ConfigFile::load(f.path()).unwrap_err();
946        assert!(matches!(err, ConfigError::Invalid(_)));
947    }
948
949    #[test]
950    fn rejects_openai_compat_with_bad_scheme() {
951        let json = r#"{
952            "backends": [
953                {
954                    "kind": "openai-compat",
955                    "name": "x",
956                    "base_url": "ftp://api.openai.com",
957                    "model": "gpt-4o-mini"
958                }
959            ]
960        }"#;
961        let f = write_config(json);
962        let err = ConfigFile::load(f.path()).unwrap_err();
963        match err {
964            ConfigError::Invalid(msg) => assert!(msg.contains("http")),
965            other => panic!("expected Invalid, got {other:?}"),
966        }
967    }
968
969    #[test]
970    fn accepts_openai_compat_with_localhost_http() {
971        let json = r#"{
972            "backends": [
973                {
974                    "kind": "openai-compat",
975                    "name": "ollama",
976                    "base_url": "http://localhost:11434",
977                    "model": "llama3.1:8b"
978                }
979            ]
980        }"#;
981        let f = write_config(json);
982        let cfg = ConfigFile::load(f.path()).unwrap();
983        assert_eq!(cfg.resolved_backends().len(), 1);
984    }
985
986    #[test]
987    fn rejects_unknown_kind() {
988        let json = r#"{
989            "backends": [
990                {
991                    "kind": "future-thing-not-supported",
992                    "name": "x"
993                }
994            ]
995        }"#;
996        let f = write_config(json);
997        let err = ConfigFile::load(f.path()).unwrap_err();
998        assert!(matches!(err, ConfigError::Parse { .. }));
999    }
1000
1001    #[test]
1002    fn loads_bedrock_invoke_entry() {
1003        let json = r#"{
1004            "backends": [
1005                {
1006                    "kind": "bedrock-invoke",
1007                    "name": "bedrock-claude",
1008                    "region": "us-east-1",
1009                    "model_id": "anthropic.claude-3-5-sonnet-20241022-v2:0",
1010                    "bearer_token_env": "AWS_BEARER_TOKEN_BEDROCK"
1011                }
1012            ]
1013        }"#;
1014        let f = write_config(json);
1015        let cfg = ConfigFile::load(f.path()).unwrap();
1016        let list = cfg.backends.as_ref().unwrap();
1017        assert_eq!(list.len(), 1);
1018        match &list[0] {
1019            BackendEntry::BedrockInvoke(e) => {
1020                assert_eq!(e.name, "bedrock-claude");
1021                assert_eq!(e.region, "us-east-1");
1022                assert_eq!(e.model_id, "anthropic.claude-3-5-sonnet-20241022-v2:0");
1023                assert_eq!(
1024                    e.bearer_token_env.as_deref(),
1025                    Some("AWS_BEARER_TOKEN_BEDROCK")
1026                );
1027                assert!(e.endpoint.is_none());
1028                assert_eq!(e.timeout_secs, 300);
1029            }
1030            other => panic!("expected bedrock-invoke, got {other:?}"),
1031        }
1032    }
1033
1034    #[test]
1035    fn rejects_bedrock_invoke_without_region() {
1036        let json = r#"{
1037            "backends": [
1038                {
1039                    "kind": "bedrock-invoke",
1040                    "name": "x",
1041                    "region": "",
1042                    "model_id": "anthropic.claude-3-5-sonnet-20241022-v2:0"
1043                }
1044            ]
1045        }"#;
1046        let f = write_config(json);
1047        let err = ConfigFile::load(f.path()).unwrap_err();
1048        match err {
1049            ConfigError::Invalid(msg) => assert!(msg.contains("region")),
1050            other => panic!("expected Invalid, got {other:?}"),
1051        }
1052    }
1053
1054    #[test]
1055    fn rejects_bedrock_invoke_without_model_id() {
1056        let json = r#"{
1057            "backends": [
1058                {
1059                    "kind": "bedrock-invoke",
1060                    "name": "x",
1061                    "region": "us-east-1",
1062                    "model_id": ""
1063                }
1064            ]
1065        }"#;
1066        let f = write_config(json);
1067        let err = ConfigFile::load(f.path()).unwrap_err();
1068        match err {
1069            ConfigError::Invalid(msg) => assert!(msg.contains("model_id")),
1070            other => panic!("expected Invalid, got {other:?}"),
1071        }
1072    }
1073
1074    #[test]
1075    fn legacy_model_promotes_to_one_backend() {
1076        let f = write_config(&good_json());
1077        let cfg = ConfigFile::load(f.path()).unwrap();
1078        let resolved = cfg.resolved_backends();
1079        assert_eq!(resolved.len(), 1);
1080        match &resolved[0] {
1081            BackendEntry::Llamacpp(e) => {
1082                assert_eq!(e.name, "gemma-4-e4b");
1083                assert_eq!(e.n_ctx, 8192);
1084                assert_eq!(e.n_gpu_layers, 0);
1085            }
1086            other => panic!("expected llamacpp, got {other:?}"),
1087        }
1088    }
1089
1090    #[test]
1091    fn multi_backend_resolved_passes_through() {
1092        let f = write_config(&good_multi_backend_json());
1093        let cfg = ConfigFile::load(f.path()).unwrap();
1094        let resolved = cfg.resolved_backends();
1095        assert_eq!(resolved.len(), 2);
1096        assert_eq!(resolved[0].name(), "local-gemma");
1097        assert_eq!(resolved[1].name(), "anthropic-fallback");
1098    }
1099
1100    #[test]
1101    fn listen_block_absent_by_default() {
1102        let f = write_config(&good_json());
1103        let cfg = ConfigFile::load(f.path()).unwrap();
1104        assert!(cfg.listen.is_none());
1105    }
1106
1107    #[test]
1108    fn listen_block_carries_tcp_and_api_key_env() {
1109        let json = r#"{
1110            "model": {
1111                "name": "gemma-4-e4b",
1112                "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
1113                "source_url": "https://example.com/x.gguf"
1114            },
1115            "listen": {
1116                "tcp": "127.0.0.1:9090",
1117                "tcp_v2": "127.0.0.1:9091",
1118                "api_key_env": "INFERD_TCP_API_KEY"
1119            }
1120        }"#;
1121        let f = write_config(json);
1122        let cfg = ConfigFile::load(f.path()).unwrap();
1123        let listen = cfg.listen.as_ref().expect("listen present");
1124        assert_eq!(listen.tcp.as_deref(), Some("127.0.0.1:9090"));
1125        assert_eq!(listen.tcp_v2.as_deref(), Some("127.0.0.1:9091"));
1126        assert_eq!(listen.api_key_env.as_deref(), Some("INFERD_TCP_API_KEY"));
1127    }
1128
1129    #[test]
1130    fn llamacpp_entry_embed_defaults_off() {
1131        let f = write_config(&good_multi_backend_json());
1132        let cfg = ConfigFile::load(f.path()).unwrap();
1133        let list = cfg.backends.as_ref().unwrap();
1134        match &list[0] {
1135            BackendEntry::Llamacpp(e) => {
1136                assert!(!e.embed);
1137                assert!(e.embed_pooling.is_none());
1138                assert_eq!(e.embed_n_ctx, 2048);
1139            }
1140            other => panic!("expected llamacpp, got {other:?}"),
1141        }
1142    }
1143
1144    #[test]
1145    fn llamacpp_entry_carries_embed_fields() {
1146        let json = r#"{
1147            "backends": [
1148                {
1149                    "kind": "llamacpp",
1150                    "name": "embeddings",
1151                    "embed": true,
1152                    "embed_pooling": 1,
1153                    "embed_n_ctx": 1024,
1154                    "model": {
1155                        "name": "embeddinggemma-300m",
1156                        "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
1157                        "source_url": "https://example.com/embed.gguf"
1158                    }
1159                }
1160            ]
1161        }"#;
1162        let f = write_config(json);
1163        let cfg = ConfigFile::load(f.path()).unwrap();
1164        let list = cfg.backends.as_ref().unwrap();
1165        match &list[0] {
1166            BackendEntry::Llamacpp(e) => {
1167                assert!(e.embed);
1168                assert_eq!(e.embed_pooling, Some(1));
1169                assert_eq!(e.embed_n_ctx, 1024);
1170            }
1171            other => panic!("expected llamacpp, got {other:?}"),
1172        }
1173    }
1174
1175    #[test]
1176    fn legacy_promotion_keeps_embed_off() {
1177        let f = write_config(&good_json());
1178        let cfg = ConfigFile::load(f.path()).unwrap();
1179        let list = cfg.resolved_backends();
1180        match &list[0] {
1181            BackendEntry::Llamacpp(e) => {
1182                assert!(!e.embed);
1183                assert!(e.embed_pooling.is_none());
1184                assert_eq!(e.embed_n_ctx, 2048);
1185            }
1186            other => panic!("expected llamacpp, got {other:?}"),
1187        }
1188    }
1189
1190    #[test]
1191    fn listen_rejects_empty_tcp() {
1192        let json = r#"{
1193            "model": {
1194                "name": "gemma-4-e4b",
1195                "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
1196                "source_url": "https://example.com/x.gguf"
1197            },
1198            "listen": { "tcp": "   " }
1199        }"#;
1200        let f = write_config(json);
1201        let err = ConfigFile::load(f.path()).unwrap_err();
1202        match err {
1203            ConfigError::Invalid(msg) => assert!(msg.contains("listen.tcp")),
1204            other => panic!("expected Invalid, got {other:?}"),
1205        }
1206    }
1207}