Skip to main content

engram_server/
config.rs

1//! Server backend configuration — chooses LLM and embedding providers at startup.
2//!
3//! The Engram library ships with multiple LLM backends (`MockLlmClient`,
4//! `OllamaLlmClient`, `OpenAiLlmClient`, `AnthropicLlmClient`, `GoogleLlmClient`)
5//! and two embedding backends (`MockEmbeddingProvider`, `OllamaEmbeddingProvider`).
6//! This module lets the server binary pick between them from CLI flags or
7//! environment variables, so one Docker image can run in any mode without a
8//! rebuild.
9//!
10//! Defaults favour real providers. `--llm-provider ollama` (the default) uses
11//! Ollama at `http://localhost:11434` with `llama3.2` for completions.
12//! Pick `openai`, `anthropic`, or `google` and supply an API key to route
13//! extraction through a hosted model.
14
15use engram::embedding::{EmbeddingProvider, MockEmbeddingProvider};
16use engram::embedding_ollama::OllamaEmbeddingProvider;
17use engram::llm::{LlmClient, MockLlmClient};
18use engram::llm_anthropic::AnthropicLlmClient;
19use engram::llm_command::CommandLlmClient;
20use engram::llm_google::GoogleLlmClient;
21use engram::llm_ollama::OllamaLlmClient;
22use engram::llm_openai::OpenAiLlmClient;
23
24/// Which LLM backend the server uses for fact extraction.
25#[derive(Clone, Debug)]
26pub enum LlmBackend {
27    /// Deterministic mock that always returns `{"facts": []}`.
28    /// Use only for tests — `memory_add` will produce zero facts.
29    Mock,
30    /// Ollama-backed chat-completion client (local, free) — uses Ollama's
31    /// native `/api/chat`.
32    Ollama { base_url: String, model: String },
33    /// Any OpenAI chat-completions-compatible endpoint — OpenAI itself,
34    /// Azure OpenAI, Groq, Together, Mistral, DeepSeek, Perplexity, OpenRouter,
35    /// Fireworks, vLLM, LM Studio, LocalAI, or Ollama's `/v1` compat layer.
36    /// Switch providers by changing `base_url` alone.
37    OpenAiCompatible {
38        base_url: String,
39        api_key: String,
40        model: String,
41    },
42    /// Anthropic Claude via the Messages API.
43    Anthropic {
44        base_url: String,
45        api_key: String,
46        model: String,
47    },
48    /// Google Gemini via the `generateContent` API.
49    Google {
50        base_url: String,
51        api_key: String,
52        model: String,
53    },
54    /// Shell-out extensibility escape hatch. Runs a user-supplied command
55    /// per extraction call, writes a JSON request to stdin, reads JSON from
56    /// stdout. See `engram::llm_command` for the stdin/stdout contract.
57    Command { command: String, timeout_secs: u64 },
58}
59
60impl LlmBackend {
61    /// Build a fresh boxed LLM client. `Memory::add_messages` takes
62    /// `Box<dyn LlmClient>` by value, so each extraction call needs its own.
63    pub fn build(&self) -> Box<dyn LlmClient> {
64        match self {
65            Self::Mock => Box::new(MockLlmClient::new(vec![serde_json::json!({"facts": []})])),
66            Self::Ollama { base_url, model } => Box::new(OllamaLlmClient::with_config(
67                base_url.clone(),
68                model.clone(),
69            )),
70            Self::OpenAiCompatible {
71                base_url,
72                api_key,
73                model,
74            } => Box::new(OpenAiLlmClient::with_config(
75                base_url.clone(),
76                api_key.clone(),
77                model.clone(),
78            )),
79            Self::Anthropic {
80                base_url,
81                api_key,
82                model,
83            } => Box::new(AnthropicLlmClient::with_config(
84                base_url.clone(),
85                api_key.clone(),
86                model.clone(),
87            )),
88            Self::Google {
89                base_url,
90                api_key,
91                model,
92            } => Box::new(GoogleLlmClient::with_config(
93                base_url.clone(),
94                api_key.clone(),
95                model.clone(),
96            )),
97            Self::Command {
98                command,
99                timeout_secs,
100            } => Box::new(CommandLlmClient::new(command.clone()).with_timeout(*timeout_secs)),
101        }
102    }
103
104    /// Human-readable description for startup logs. Never includes API keys
105    /// or command contents longer than 60 characters.
106    pub fn describe(&self) -> String {
107        match self {
108            Self::Mock => "mock (returns empty facts)".to_string(),
109            Self::Ollama { base_url, model } => format!("ollama {model} at {base_url}"),
110            Self::OpenAiCompatible {
111                base_url, model, ..
112            } => format!("openai-compatible {model} at {base_url}"),
113            Self::Anthropic {
114                base_url, model, ..
115            } => format!("anthropic {model} at {base_url}"),
116            Self::Google {
117                base_url, model, ..
118            } => format!("google {model} at {base_url}"),
119            Self::Command {
120                command,
121                timeout_secs,
122            } => {
123                let shown: String = command.chars().take(60).collect();
124                format!("command `{shown}` (timeout={timeout_secs}s)")
125            }
126        }
127    }
128}
129
130/// Which embedding backend the server uses for vector search.
131#[derive(Clone, Debug)]
132pub enum EmbeddingBackend {
133    /// Deterministic byte-cycled mock. Not semantically meaningful.
134    Mock { dims: usize },
135    /// Ollama-backed embedding provider.
136    Ollama {
137        base_url: String,
138        model: String,
139        dims: usize,
140    },
141}
142
143impl EmbeddingBackend {
144    /// Build the embedding provider used by `Memory::open`.
145    pub fn build(&self) -> Box<dyn EmbeddingProvider> {
146        match self {
147            Self::Mock { dims } => Box::new(MockEmbeddingProvider::new(*dims)),
148            Self::Ollama {
149                base_url,
150                model,
151                dims,
152            } => Box::new(OllamaEmbeddingProvider::with_config(base_url, model, *dims)),
153        }
154    }
155
156    pub fn dimensions(&self) -> usize {
157        match self {
158            Self::Mock { dims } => *dims,
159            Self::Ollama { dims, .. } => *dims,
160        }
161    }
162
163    pub fn describe(&self) -> String {
164        match self {
165            Self::Mock { dims } => format!("mock ({dims}d)"),
166            Self::Ollama {
167                base_url,
168                model,
169                dims,
170            } => format!("ollama {model} at {base_url} ({dims}d)"),
171        }
172    }
173}
174
175/// Full backend configuration. Parsed once at startup and cloned into the
176/// MCP handler closures and REST `AppState`.
177#[derive(Clone, Debug)]
178pub struct BackendConfig {
179    pub llm: LlmBackend,
180    pub embedding: EmbeddingBackend,
181}
182
183impl BackendConfig {
184    /// Convenience for tests: both backends mocked at 64 dimensions.
185    pub fn mock() -> Self {
186        Self {
187            llm: LlmBackend::Mock,
188            embedding: EmbeddingBackend::Mock { dims: 64 },
189        }
190    }
191
192    /// True if either backend is a mock. Used to print a startup warning.
193    pub fn is_mock(&self) -> bool {
194        matches!(self.llm, LlmBackend::Mock)
195            || matches!(self.embedding, EmbeddingBackend::Mock { .. })
196    }
197}