sapphire_retrieve/config.rs
1use serde::{Deserialize, Serialize};
2
3use crate::embed::EmbedderConfig;
4
5/// Top-level retrieve configuration (`[retrieve]` section).
6///
7/// Controls which vector database backend to use and, optionally, text
8/// embedding settings for approximate semantic search.
9#[derive(Debug, Clone, Serialize, Deserialize, Default)]
10pub struct RetrieveConfig {
11 /// Vector database backend (default: `none` — vector search disabled).
12 #[serde(default)]
13 pub db: VectorDb,
14 /// Text embedding settings. When absent, embedding is disabled even
15 /// if `db` is set to a non-`none` value.
16 #[serde(default)]
17 pub embedding: Option<EmbeddingConfig>,
18 /// Hybrid search tuning (FTS + semantic merged via Reciprocal Rank Fusion).
19 #[serde(default)]
20 pub hybrid: HybridConfig,
21}
22
23/// Settings for hybrid (FTS + semantic) search merging via Reciprocal Rank
24/// Fusion (RRF).
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct HybridConfig {
27 /// Weight for FTS results in RRF fusion (0.0–1.0, default 0.5).
28 /// The semantic weight is `1.0 - fts_weight`.
29 #[serde(default = "default_fts_weight")]
30 pub fts_weight: f64,
31 /// Constant *k* in the RRF formula: `score = 1 / (k + rank)`.
32 /// Default 60.
33 #[serde(default = "default_rrf_k")]
34 pub rrf_k: u32,
35}
36
37fn default_fts_weight() -> f64 {
38 0.5
39}
40
41fn default_rrf_k() -> u32 {
42 60
43}
44
45impl Default for HybridConfig {
46 fn default() -> Self {
47 Self {
48 fts_weight: default_fts_weight(),
49 rrf_k: default_rrf_k(),
50 }
51 }
52}
53
54/// Vector database backend for approximate (semantic) text search.
55///
56/// | Variant | Description |
57/// |--------------|----------------------------------------------------------|
58/// | `none` | Vector search disabled (default, no extra dependencies) |
59/// | `sqlite_vec` | sqlite-vec extension, stored inside the SQLite cache DB |
60/// | `lancedb` | LanceDB — suitable for larger-scale / multimodal use |
61#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
62#[serde(rename_all = "snake_case")]
63pub enum VectorDb {
64 /// Vector search is disabled. No embedding model is required.
65 #[default]
66 None,
67 /// sqlite-vec extension stored in the existing SQLite cache database.
68 SqliteVec,
69 /// LanceDB stored in a separate data directory alongside the cache.
70 #[serde(rename = "lancedb")]
71 LanceDb,
72}
73
74impl VectorDb {
75 /// Human-readable name, matching the TOML serialization.
76 pub fn as_str(self) -> &'static str {
77 match self {
78 VectorDb::None => "none",
79 VectorDb::SqliteVec => "sqlite_vec",
80 VectorDb::LanceDb => "lancedb",
81 }
82 }
83}
84
85/// Text embedding provider configuration (`[retrieve.embedding]` subsection).
86#[derive(Debug, Clone, Serialize, Deserialize, Default)]
87pub struct EmbeddingConfig {
88 /// Enable embedding and vector search.
89 #[serde(default)]
90 pub enabled: bool,
91
92 /// Embedding provider identifier: `"openai"`, `"ollama"`, or `"fastembed"`.
93 #[serde(default)]
94 pub provider: String,
95
96 /// Model name understood by the provider.
97 #[serde(default)]
98 pub model: String,
99
100 /// Name of the environment variable holding the API key.
101 /// Used by OpenAI-compatible providers; defaults to `OPENAI_API_KEY`.
102 #[serde(skip_serializing_if = "Option::is_none")]
103 pub api_key_env: Option<String>,
104
105 /// Base URL of the embedding API endpoint.
106 #[serde(skip_serializing_if = "Option::is_none")]
107 pub base_url: Option<String>,
108
109 /// Output vector dimension of the model.
110 /// Required when `db = "sqlite_vec"`.
111 #[serde(skip_serializing_if = "Option::is_none")]
112 pub dimension: Option<u32>,
113}
114
115impl EmbeddingConfig {
116 /// Convert to the runtime [`EmbedderConfig`] used by [`crate::build_embedder`].
117 /// Convert to the runtime [`EmbedderConfig`].
118 ///
119 /// `cache_dir` is left as `None`; callers should set it to the
120 /// app-provided model cache directory before calling [`crate::build_embedder`].
121 pub fn to_embedder_config(&self) -> EmbedderConfig {
122 EmbedderConfig {
123 provider: self.provider.clone(),
124 model: self.model.clone(),
125 api_key_env: self.api_key_env.clone(),
126 base_url: self.base_url.clone(),
127 cache_dir: None,
128 }
129 }
130}