archelon_core/user_config.rs
1use std::path::PathBuf;
2
3use indexmap::IndexMap;
4use serde::{Deserialize, Serialize};
5
6use crate::error::{Error, Result};
7
8/// Contents of `$XDG_CONFIG_HOME/archelon/config.toml`.
9///
10/// This is a user-level (host-level) config that controls machine-specific
11/// settings such as caching backends. It is intentionally separate from the
12/// per-journal `.archelon/config.toml` so that the same journal can be shared
13/// across machines with different hardware capabilities.
14#[derive(Debug, Clone, Serialize, Deserialize, Default)]
15pub struct UserConfig {
16 #[serde(default)]
17 pub cache: CacheConfig,
18}
19
20impl UserConfig {
21 /// Canonical path to the user config file.
22 ///
23 /// Resolves to `$XDG_CONFIG_HOME/archelon/config.toml`
24 /// (or `~/.config/archelon/config.toml` when `XDG_CONFIG_HOME` is not set).
25 pub fn path() -> PathBuf {
26 xdg_config_home().join("archelon").join("config.toml")
27 }
28
29 /// Load the user config from disk.
30 ///
31 /// Returns the default config (all fields at their defaults) if the file
32 /// does not exist.
33 pub fn load() -> Result<Self> {
34 let path = Self::path();
35 if !path.exists() {
36 return Ok(UserConfig::default());
37 }
38 let contents = std::fs::read_to_string(&path)?;
39 toml::from_str(&contents).map_err(|e| Error::InvalidConfig(e.to_string()))
40 }
41}
42
43/// Cache-related configuration (`[cache]` section in the user config).
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct CacheConfig {
46 /// Vector DB backend used for approximate (semantic) text search.
47 ///
48 /// Defaults to [`VectorDb::None`], which disables vector search entirely.
49 /// Changing this requires a text embedding provider to also be configured
50 /// in the `[cache.embedding]` section.
51 #[serde(default)]
52 pub vector_db: VectorDb,
53
54 /// Text embedding provider settings.
55 ///
56 /// Required when `vector_db` is not [`VectorDb::None`].
57 /// When `vector_db = "none"` this section is ignored.
58 #[serde(skip_serializing_if = "Option::is_none")]
59 pub embedding: Option<EmbeddingConfig>,
60
61 /// Unknown fields preserved for round-trip TOML compatibility.
62 #[serde(flatten)]
63 pub extra: IndexMap<String, toml::Value>,
64}
65
66impl Default for CacheConfig {
67 fn default() -> Self {
68 CacheConfig {
69 vector_db: VectorDb::default(),
70 embedding: None,
71 extra: IndexMap::new(),
72 }
73 }
74}
75
76/// Vector database backend for approximate (semantic) text search.
77///
78/// Select based on what your host machine supports:
79///
80/// | Variant | Description |
81/// |--------------|----------------------------------------------------------|
82/// | `none` | Vector search disabled (default, no extra dependencies) |
83/// | `sqlite_vec` | sqlite-vec extension, integrated with the SQLite cache |
84/// | `lancedb` | LanceDB, suitable for multimodal / larger-scale use |
85#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
86#[serde(rename_all = "snake_case")]
87pub enum VectorDb {
88 /// Vector search is disabled. No embedding model is required.
89 #[default]
90 None,
91 /// Use the sqlite-vec extension, stored inside the existing SQLite cache
92 /// database. Lightweight and requires no additional infrastructure.
93 SqliteVec,
94 /// Use LanceDB for vector storage. More capable and suitable for future
95 /// multimodal embeddings, but requires a separate data directory.
96 #[serde(rename = "lancedb")]
97 LanceDb,
98}
99
100impl VectorDb {
101 /// Human-readable name shown in `config show` output.
102 pub fn as_str(self) -> &'static str {
103 match self {
104 VectorDb::None => "none",
105 VectorDb::SqliteVec => "sqlite_vec",
106 VectorDb::LanceDb => "lancedb",
107 }
108 }
109}
110
111/// Text embedding provider configuration (`[cache.embedding]` subsection).
112#[derive(Debug, Clone, Serialize, Deserialize)]
113pub struct EmbeddingConfig {
114 /// Embedding provider identifier.
115 ///
116 /// - `"openai"` — OpenAI-compatible REST API
117 /// - `"ollama"` — local Ollama server
118 /// - `"fastembed"` — local ONNX inference, no server required
119 pub provider: String,
120
121 /// Model name understood by the provider.
122 ///
123 /// - OpenAI: `"text-embedding-3-small"`, `"text-embedding-3-large"`, …
124 /// - Ollama: `"nomic-embed-text"`, `"mxbai-embed-large"`, …
125 /// - fastembed: `"AllMiniLML6V2"` (384), `"BGESmallENV15"` (384),
126 /// `"BGEBaseENV15"` (768), `"NomicEmbedTextV1"` (768), …
127 pub model: String,
128
129 /// Name of the environment variable that holds the API key.
130 ///
131 /// Used by OpenAI-compatible providers. Defaults to `OPENAI_API_KEY` when
132 /// omitted. Not required for local providers such as Ollama.
133 #[serde(skip_serializing_if = "Option::is_none")]
134 pub api_key_env: Option<String>,
135
136 /// Base URL of the embedding API endpoint.
137 ///
138 /// Required for local providers (e.g. `"http://localhost:11434"` for Ollama).
139 /// OpenAI-compatible providers default to the official API endpoint when this
140 /// is omitted.
141 #[serde(skip_serializing_if = "Option::is_none")]
142 pub base_url: Option<String>,
143
144 /// Output vector dimension of the model.
145 ///
146 /// Required when `vector_db = "sqlite_vec"`. Must exactly match the model's
147 /// actual output size; a mismatch will cause the vector table to be recreated.
148 ///
149 /// Common values:
150 /// - `1536` — `text-embedding-3-small` (OpenAI)
151 /// - `3072` — `text-embedding-3-large` (OpenAI)
152 /// - `768` — `nomic-embed-text` (Ollama)
153 #[serde(skip_serializing_if = "Option::is_none")]
154 pub dimension: Option<u32>,
155
156 /// Unknown fields preserved for round-trip TOML compatibility.
157 #[serde(flatten)]
158 pub extra: IndexMap<String, toml::Value>,
159}
160
161fn xdg_config_home() -> PathBuf {
162 if let Ok(dir) = std::env::var("XDG_CONFIG_HOME") {
163 if !dir.is_empty() {
164 return PathBuf::from(dir);
165 }
166 }
167 if let Ok(home) = std::env::var("HOME") {
168 return PathBuf::from(home).join(".config");
169 }
170 std::env::temp_dir()
171}