Skip to main content

claw_vector/
config.rs

1// config.rs — VectorConfig with full builder pattern, defaults, validation, and env loading.
2use serde::{Deserialize, Serialize};
3use std::{num::NonZeroUsize, path::PathBuf};
4
5use crate::error::{VectorError, VectorResult};
6
7/// Runtime configuration for the claw-vector engine.
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct VectorConfig {
10    /// Path to the SQLite database file for vector metadata.
11    pub db_path: PathBuf,
12    /// Directory for HNSW index files and mmap vector files.
13    pub index_dir: PathBuf,
14    /// gRPC endpoint of the Python embedding service (e.g. `"http://localhost:50051"`).
15    pub embedding_service_url: String,
16    /// Default embedding dimensionality (384 = all-MiniLM-L6-v2).
17    pub default_dimensions: usize,
18    /// HNSW `ef_construction` build parameter (higher → better recall, slower build).
19    pub ef_construction: usize,
20    /// HNSW `M` connections parameter (higher → better recall, more memory).
21    pub m_connections: usize,
22    /// HNSW `ef` search parameter (higher → better recall, slower search).
23    pub ef_search: usize,
24    /// Maximum number of vectors per index.
25    pub max_elements: usize,
26    /// Number of embedding LRU cache entries.
27    pub cache_size: usize,
28    /// Maximum number of texts per embedding gRPC call.
29    pub batch_size: usize,
30    /// Timeout for embedding gRPC calls in milliseconds.
31    pub embedding_timeout_ms: u64,
32    /// Number of rayon threads for parallel index operations.
33    pub num_threads: usize,
34    /// Default workspace id used when callers do not provide one explicitly.
35    pub default_workspace_id: String,
36    /// SQLite path for API key storage.
37    pub api_key_store_path: PathBuf,
38    /// Default request budget per workspace in requests/second.
39    pub rate_limit_rps: u32,
40    /// Require authentication for inbound API requests.
41    pub require_auth: bool,
42}
43
44impl Default for VectorConfig {
45    fn default() -> Self {
46        VectorConfig {
47            db_path: PathBuf::from("claw_vector.db"),
48            index_dir: PathBuf::from("claw_vector_indices"),
49            embedding_service_url: "http://localhost:50051".into(),
50            default_dimensions: 384,
51            ef_construction: 200,
52            m_connections: 16,
53            ef_search: 50,
54            max_elements: 1_000_000,
55            cache_size: 10_000,
56            batch_size: 64,
57            embedding_timeout_ms: 5_000,
58            num_threads: std::thread::available_parallelism()
59                .unwrap_or(NonZeroUsize::new(4).unwrap())
60                .get(),
61            default_workspace_id: "default".into(),
62            api_key_store_path: PathBuf::from("claw_vector_auth.db"),
63            rate_limit_rps: 100,
64            require_auth: !cfg!(test),
65        }
66    }
67}
68
69impl VectorConfig {
70    /// Return a new builder initialised with the default configuration.
71    pub fn builder() -> VectorConfigBuilder {
72        VectorConfigBuilder::default()
73    }
74
75    /// Load configuration from environment variables, falling back to defaults.
76    ///
77    /// Recognised variables:
78    /// - `CLAW_VECTOR_DB_PATH`
79    /// - `CLAW_VECTOR_INDEX_DIR`
80    /// - `CLAW_EMBEDDING_URL`
81    /// - `CLAW_DEFAULT_WORKSPACE_ID`
82    /// - `CLAW_API_KEY_STORE_PATH`
83    /// - `CLAW_RATE_LIMIT_RPS`
84    /// - `CLAW_REQUIRE_AUTH`
85    pub fn from_env() -> Self {
86        let mut cfg = VectorConfig::default();
87        if let Ok(v) = std::env::var("CLAW_VECTOR_DB_PATH") {
88            cfg.db_path = PathBuf::from(v);
89        }
90        if let Ok(v) = std::env::var("CLAW_VECTOR_INDEX_DIR") {
91            cfg.index_dir = PathBuf::from(v);
92        }
93        if let Ok(v) = std::env::var("CLAW_EMBEDDING_URL") {
94            cfg.embedding_service_url = v;
95        }
96        if let Ok(v) = std::env::var("CLAW_DEFAULT_WORKSPACE_ID") {
97            cfg.default_workspace_id = v;
98        }
99        if let Ok(v) = std::env::var("CLAW_API_KEY_STORE_PATH") {
100            cfg.api_key_store_path = PathBuf::from(v);
101        }
102        if let Ok(v) = std::env::var("CLAW_RATE_LIMIT_RPS") {
103            if let Ok(parsed) = v.parse::<u32>() {
104                cfg.rate_limit_rps = parsed.max(1);
105            }
106        }
107        if let Ok(v) = std::env::var("CLAW_REQUIRE_AUTH") {
108            cfg.require_auth = matches!(v.to_ascii_lowercase().as_str(), "1" | "true" | "yes");
109        }
110        cfg
111    }
112}
113
114// ─── Builder ─────────────────────────────────────────────────────────────────
115
116/// Fluent builder for [`VectorConfig`].
117#[derive(Debug, Clone, Default)]
118pub struct VectorConfigBuilder {
119    inner: VectorConfig,
120}
121
122impl VectorConfigBuilder {
123    /// Set the SQLite database path.
124    pub fn db_path(mut self, path: impl Into<PathBuf>) -> Self {
125        self.inner.db_path = path.into();
126        self
127    }
128
129    /// Set the index directory.
130    pub fn index_dir(mut self, dir: impl Into<PathBuf>) -> Self {
131        self.inner.index_dir = dir.into();
132        self
133    }
134
135    /// Set the embedding service gRPC URL.
136    pub fn embedding_service_url(mut self, url: impl Into<String>) -> Self {
137        self.inner.embedding_service_url = url.into();
138        self
139    }
140
141    /// Set the default embedding dimensionality.
142    pub fn default_dimensions(mut self, dims: usize) -> Self {
143        self.inner.default_dimensions = dims;
144        self
145    }
146
147    /// Set the HNSW `ef_construction` parameter.
148    pub fn ef_construction(mut self, ef: usize) -> Self {
149        self.inner.ef_construction = ef;
150        self
151    }
152
153    /// Set the HNSW `M` connections parameter.
154    pub fn m_connections(mut self, m: usize) -> Self {
155        self.inner.m_connections = m;
156        self
157    }
158
159    /// Set the HNSW `ef_search` parameter.
160    pub fn ef_search(mut self, ef: usize) -> Self {
161        self.inner.ef_search = ef;
162        self
163    }
164
165    /// Set the maximum number of vectors per index.
166    pub fn max_elements(mut self, n: usize) -> Self {
167        self.inner.max_elements = n;
168        self
169    }
170
171    /// Set the LRU embedding cache capacity.
172    pub fn cache_size(mut self, n: usize) -> Self {
173        self.inner.cache_size = n;
174        self
175    }
176
177    /// Set the maximum batch size for embedding calls.
178    pub fn batch_size(mut self, n: usize) -> Self {
179        self.inner.batch_size = n;
180        self
181    }
182
183    /// Set the embedding gRPC call timeout in milliseconds.
184    pub fn embedding_timeout_ms(mut self, ms: u64) -> Self {
185        self.inner.embedding_timeout_ms = ms;
186        self
187    }
188
189    /// Set the number of rayon threads.
190    pub fn num_threads(mut self, n: usize) -> Self {
191        self.inner.num_threads = n;
192        self
193    }
194
195    /// Set the default workspace id.
196    pub fn default_workspace_id(mut self, workspace_id: impl Into<String>) -> Self {
197        self.inner.default_workspace_id = workspace_id.into();
198        self
199    }
200
201    /// Set the API key store path.
202    pub fn api_key_store_path(mut self, path: impl Into<PathBuf>) -> Self {
203        self.inner.api_key_store_path = path.into();
204        self
205    }
206
207    /// Set the workspace rate limit in requests per second.
208    pub fn rate_limit_rps(mut self, rps: u32) -> Self {
209        self.inner.rate_limit_rps = rps.max(1);
210        self
211    }
212
213    /// Set whether request authentication is required.
214    pub fn require_auth(mut self, require_auth: bool) -> Self {
215        self.inner.require_auth = require_auth;
216        self
217    }
218
219    /// Validate and return the completed [`VectorConfig`].
220    ///
221    /// # Errors
222    /// - `dimensions` must be ≥ 1
223    /// - `ef_construction` must be ≥ `m_connections`
224    /// - `m_connections` must be ≥ 2
225    pub fn build(self) -> VectorResult<VectorConfig> {
226        let cfg = self.inner;
227        if cfg.default_dimensions < 1 {
228            return Err(VectorError::Config(
229                "default_dimensions must be >= 1".into(),
230            ));
231        }
232        if cfg.m_connections < 2 {
233            return Err(VectorError::Config("m_connections must be >= 2".into()));
234        }
235        if cfg.ef_construction < cfg.m_connections {
236            return Err(VectorError::Config(
237                "ef_construction must be >= m_connections".into(),
238            ));
239        }
240        if cfg.default_workspace_id.trim().is_empty() {
241            return Err(VectorError::Config(
242                "default_workspace_id must not be empty".into(),
243            ));
244        }
245        if cfg.rate_limit_rps == 0 {
246            return Err(VectorError::Config("rate_limit_rps must be > 0".into()));
247        }
248        Ok(cfg)
249    }
250}