Skip to main content

innate_core/
settings.rs

1use std::path::{Path, PathBuf};
2
3use serde::{Deserialize, Serialize};
4
5use crate::errors::{InnateError, Result};
6
7pub const SCHEMA_JSONC: &str = include_str!("settings.schema.jsonc");
8
9fn default_schema_path() -> String {
10    "https://raw.githubusercontent.com/vima-tech/Innate/main/settings.schema.jsonc".to_string()
11}
12
13// ---------------------------------------------------------------------------
14// Top-level Settings
15// ---------------------------------------------------------------------------
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct Settings {
19    /// JSON Schema reference — always present; written alongside settings.json.
20    #[serde(rename = "$schema", default = "default_schema_path")]
21    pub schema: String,
22
23    #[serde(default, skip_serializing_if = "Option::is_none")]
24    pub llm: Option<LlmConfig>,
25
26    #[serde(default, skip_serializing_if = "Option::is_none")]
27    pub embedding: Option<EmbeddingConfig>,
28
29    #[serde(default, skip_serializing_if = "Option::is_none")]
30    pub daemon: Option<DaemonConfig>,
31
32    #[serde(default, skip_serializing_if = "Option::is_none")]
33    pub backup: Option<BackupConfig>,
34}
35
36impl Default for Settings {
37    fn default() -> Self {
38        Self {
39            schema: default_schema_path(),
40            llm: None,
41            embedding: None,
42            daemon: None,
43            backup: None,
44        }
45    }
46}
47
48// ---------------------------------------------------------------------------
49// LLM (generative) config — used by LlmDistiller
50// ---------------------------------------------------------------------------
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct LlmConfig {
54    /// "openai" | "anthropic"
55    pub provider: String,
56
57    #[serde(default, skip_serializing_if = "Option::is_none")]
58    pub base_url: Option<String>,
59
60    pub model_id: String,
61
62    /// API key (env var override: INNATE_LLM_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY)
63    #[serde(default, skip_serializing_if = "Option::is_none")]
64    pub api_key: Option<String>,
65}
66
67impl LlmConfig {
68    /// Resolved API key: settings file → env var fallback.
69    pub fn resolved_api_key(&self) -> Option<String> {
70        if let Some(ref k) = self.api_key {
71            if !k.is_empty() {
72                return Some(k.clone());
73            }
74        }
75        // Generic override
76        if let Ok(k) = std::env::var("INNATE_LLM_API_KEY") {
77            if !k.is_empty() {
78                return Some(k);
79            }
80        }
81        match self.provider.as_str() {
82            "anthropic" => std::env::var("ANTHROPIC_API_KEY")
83                .ok()
84                .filter(|k| !k.is_empty()),
85            _ => std::env::var("OPENAI_API_KEY")
86                .ok()
87                .filter(|k| !k.is_empty()),
88        }
89    }
90
91    pub fn resolved_base_url(&self) -> String {
92        if let Some(ref u) = self.base_url {
93            if !u.is_empty() {
94                return u.trim_end_matches('/').to_string();
95            }
96        }
97        match self.provider.as_str() {
98            "anthropic" => "https://api.anthropic.com".to_string(),
99            _ => "https://api.openai.com/v1".to_string(),
100        }
101    }
102}
103
104// ---------------------------------------------------------------------------
105// Embedding config — used by LlmEmbeddingProvider
106// ---------------------------------------------------------------------------
107
108#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct EmbeddingConfig {
110    /// Only "openai" format is supported (Anthropic has no embedding API).
111    #[serde(default = "default_openai")]
112    pub provider: String,
113
114    #[serde(default, skip_serializing_if = "Option::is_none")]
115    pub base_url: Option<String>,
116
117    pub model_id: String,
118
119    #[serde(default, skip_serializing_if = "Option::is_none")]
120    pub api_key: Option<String>,
121
122    /// Embedding output dimension (model-specific; defaults to 1536 for text-embedding-3-small).
123    #[serde(default = "default_embed_dim")]
124    pub dim: usize,
125}
126
127fn default_openai() -> String {
128    "openai".to_string()
129}
130
131fn default_embed_dim() -> usize {
132    1536
133}
134
135impl EmbeddingConfig {
136    pub fn resolved_api_key(&self) -> Option<String> {
137        if let Some(ref k) = self.api_key {
138            if !k.is_empty() {
139                return Some(k.clone());
140            }
141        }
142        if let Ok(k) = std::env::var("INNATE_LLM_API_KEY") {
143            if !k.is_empty() {
144                return Some(k);
145            }
146        }
147        std::env::var("OPENAI_API_KEY")
148            .ok()
149            .filter(|k| !k.is_empty())
150    }
151
152    pub fn resolved_base_url(&self) -> String {
153        self.base_url
154            .as_deref()
155            .filter(|u| !u.is_empty())
156            .map(|u| u.trim_end_matches('/').to_string())
157            .unwrap_or_else(|| "https://api.openai.com/v1".to_string())
158    }
159}
160
161// ---------------------------------------------------------------------------
162// Daemon config
163// ---------------------------------------------------------------------------
164
165#[derive(Debug, Default, Clone, Serialize, Deserialize)]
166pub struct DaemonConfig {
167    /// Directories the daemon watches for .log files.
168    #[serde(default)]
169    pub watch_dirs: Vec<String>,
170
171    /// Automatically spawn the daemon when the MCP server starts (default: true).
172    #[serde(default = "default_true")]
173    pub auto_start: bool,
174}
175
176fn default_true() -> bool {
177    true
178}
179
180// ---------------------------------------------------------------------------
181// Backup config
182// ---------------------------------------------------------------------------
183
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct BackupConfig {
186    /// Master switch — backup is disabled by default. Set to true to enable.
187    #[serde(default)]
188    pub enable: bool,
189
190    #[serde(default, skip_serializing_if = "Option::is_none")]
191    pub r2: Option<R2Config>,
192
193    /// Auto-backup interval in hours (default: 24).
194    #[serde(default = "default_backup_interval_hours")]
195    pub auto_backup_interval_hours: u64,
196
197    /// Delete backups older than this many days (default: 60).
198    #[serde(default = "default_retention_days")]
199    pub retention_days: u64,
200
201    /// Always keep at least this many backup files regardless of age (default: 5).
202    #[serde(default = "default_min_backups")]
203    pub min_backups: usize,
204}
205
206impl Default for BackupConfig {
207    fn default() -> Self {
208        Self {
209            enable: false,
210            r2: None,
211            auto_backup_interval_hours: default_backup_interval_hours(),
212            retention_days: default_retention_days(),
213            min_backups: default_min_backups(),
214        }
215    }
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct R2Config {
220    /// Cloudflare account ID (found in the R2 dashboard URL).
221    pub account_id: String,
222
223    /// R2 bucket name.
224    pub bucket: String,
225
226    /// R2 API token access key ID. Env override: INNATE_R2_ACCESS_KEY_ID.
227    #[serde(default, skip_serializing_if = "Option::is_none")]
228    pub access_key_id: Option<String>,
229
230    /// R2 API token secret access key. Env override: INNATE_R2_SECRET_ACCESS_KEY.
231    #[serde(default, skip_serializing_if = "Option::is_none")]
232    pub secret_access_key: Option<String>,
233
234    /// Optional key prefix (e.g. "innate/"). Default: "".
235    #[serde(default)]
236    pub prefix: String,
237}
238
239impl R2Config {
240    pub fn resolved_access_key_id(&self) -> Option<String> {
241        if let Some(ref k) = self.access_key_id {
242            if !k.is_empty() {
243                return Some(k.clone());
244            }
245        }
246        std::env::var("INNATE_R2_ACCESS_KEY_ID")
247            .ok()
248            .filter(|k| !k.is_empty())
249    }
250
251    pub fn resolved_secret_access_key(&self) -> Option<String> {
252        if let Some(ref k) = self.secret_access_key {
253            if !k.is_empty() {
254                return Some(k.clone());
255            }
256        }
257        std::env::var("INNATE_R2_SECRET_ACCESS_KEY")
258            .ok()
259            .filter(|k| !k.is_empty())
260    }
261}
262
263fn default_backup_interval_hours() -> u64 {
264    24
265}
266
267fn default_retention_days() -> u64 {
268    60
269}
270
271fn default_min_backups() -> usize {
272    5
273}
274
275// ---------------------------------------------------------------------------
276// Load / save
277// ---------------------------------------------------------------------------
278
279/// Returns `~/.innate/settings.json`.
280pub fn settings_path() -> PathBuf {
281    crate::paths::settings_path()
282}
283
284/// Load settings from `~/.innate/settings.json`.
285///
286/// Fail-closed: **only an absent file** falls back to `Settings::default()`. A
287/// present-but-unreadable or unparseable file returns `Err` rather than silently
288/// degrading to defaults (which would drop the user's LLM/embedding config and
289/// run on the Dummy provider). Callers that open the knowledge base propagate
290/// this error so a corrupt config surfaces instead of producing a Dummy DB.
291pub fn load() -> Result<Settings> {
292    load_from(&settings_path())
293}
294
295pub fn load_from(path: &Path) -> Result<Settings> {
296    let text = match std::fs::read_to_string(path) {
297        Ok(text) => text,
298        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Settings::default()),
299        Err(e) => {
300            return Err(InnateError::Other(format!(
301                "cannot read settings file {}: {e}",
302                path.display()
303            )))
304        }
305    };
306    serde_json::from_str(&text).map_err(|e| {
307        InnateError::Other(format!(
308            "{} is present but could not be parsed ({e}); fix the file \
309             (only an absent settings file falls back to defaults)",
310            path.display()
311        ))
312    })
313}
314
315/// Write settings to `~/.innate/settings.json` with mode 0600.
316pub fn save(settings: &Settings) -> anyhow::Result<()> {
317    let path = settings_path();
318    save_to(settings, &path)
319}
320
321pub fn save_to(settings: &Settings, path: &Path) -> anyhow::Result<()> {
322    if let Some(parent) = path.parent() {
323        std::fs::create_dir_all(parent)?;
324        // Write the schema file alongside settings.json so $schema resolves locally.
325        let schema_path = parent.join("settings.schema.jsonc");
326        let _ = std::fs::write(&schema_path, SCHEMA_JSONC);
327    }
328    let json = serde_json::to_string_pretty(settings)?;
329    std::fs::write(path, &json)?;
330    // 0600 on Unix
331    #[cfg(unix)]
332    {
333        use std::os::unix::fs::PermissionsExt;
334        std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600))?;
335    }
336    Ok(())
337}
338
339/// Expand `~` at the start of a path string to the home directory.
340pub fn expand_tilde(path: &str) -> String {
341    if path.starts_with("~/") || path == "~" {
342        let home = dirs_next::home_dir()
343            .map(|h| h.display().to_string())
344            .unwrap_or_default();
345        path.replacen('~', &home, 1)
346    } else {
347        path.to_string()
348    }
349}
350
351/// Return expanded watch directories from daemon config.
352pub fn resolved_watch_dirs(settings: &Settings) -> Vec<String> {
353    settings
354        .daemon
355        .as_ref()
356        .map(|d| d.watch_dirs.iter().map(|p| expand_tilde(p)).collect())
357        .unwrap_or_default()
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363    use std::io::Write;
364
365    #[test]
366    fn load_is_fail_closed_only_absent_falls_back() {
367        // Absent file → default (the one allowed fallback).
368        let missing = std::path::Path::new("/nonexistent/innate/settings.json");
369        assert!(load_from(missing).is_ok());
370
371        // Present-but-corrupt file → Err (no silent degradation to Dummy).
372        let mut bad = tempfile::NamedTempFile::new().unwrap();
373        bad.write_all(b"{ this is not valid json").unwrap();
374        assert!(load_from(bad.path()).is_err());
375
376        // Valid file → parsed.
377        let mut good = tempfile::NamedTempFile::new().unwrap();
378        good.write_all(br#"{"schema":"1"}"#).unwrap();
379        assert!(load_from(good.path()).is_ok());
380    }
381}