lumosai_vector_postgres/
config.rs

1//! PostgreSQL configuration for vector storage
2
3use std::time::Duration;
4use serde::{Deserialize, Serialize};
5
6/// PostgreSQL vector storage configuration
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct PostgresConfig {
9    /// Database connection URL
10    pub database_url: String,
11    
12    /// Connection pool configuration
13    pub pool: PoolConfig,
14    
15    /// Table configuration
16    pub table: TableConfig,
17    
18    /// Performance settings
19    pub performance: PerformanceConfig,
20}
21
22/// Connection pool configuration
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct PoolConfig {
25    /// Maximum number of connections in the pool
26    pub max_connections: u32,
27    
28    /// Minimum number of connections in the pool
29    pub min_connections: u32,
30    
31    /// Connection timeout
32    pub connect_timeout: Duration,
33    
34    /// Idle timeout for connections
35    pub idle_timeout: Option<Duration>,
36    
37    /// Maximum lifetime of a connection
38    pub max_lifetime: Option<Duration>,
39}
40
41/// Table configuration
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct TableConfig {
44    /// Schema name (default: "public")
45    pub schema: String,
46    
47    /// Table prefix for vector tables
48    pub table_prefix: Option<String>,
49    
50    /// Whether to create tables automatically
51    pub auto_create_tables: bool,
52    
53    /// Whether to create indexes automatically
54    pub auto_create_indexes: bool,
55}
56
57/// Performance configuration
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct PerformanceConfig {
60    /// Batch size for bulk operations
61    pub batch_size: usize,
62    
63    /// Vector index type
64    pub index_type: VectorIndexType,
65    
66    /// Index parameters
67    pub index_params: IndexParams,
68    
69    /// Whether to use prepared statements
70    pub use_prepared_statements: bool,
71}
72
73/// Vector index types supported by pgvector
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub enum VectorIndexType {
76    /// IVFFlat index - good for large datasets
77    IvfFlat,
78    /// HNSW index - good for high recall
79    Hnsw,
80    /// No index - for small datasets or testing
81    None,
82}
83
84/// Index parameters for different index types
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct IndexParams {
87    /// IVFFlat parameters
88    pub ivf_flat: IvfFlatParams,
89    
90    /// HNSW parameters
91    pub hnsw: HnswParams,
92}
93
94/// IVFFlat index parameters
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct IvfFlatParams {
97    /// Number of lists (clusters)
98    pub lists: u32,
99    
100    /// Number of probes during search
101    pub probes: u32,
102}
103
104/// HNSW index parameters
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct HnswParams {
107    /// Maximum number of connections per node
108    pub m: u32,
109    
110    /// Size of the dynamic candidate list during construction
111    pub ef_construction: u32,
112    
113    /// Size of the dynamic candidate list during search
114    pub ef_search: u32,
115}
116
117impl Default for PostgresConfig {
118    fn default() -> Self {
119        Self {
120            database_url: "postgresql://localhost/lumos_vector".to_string(),
121            pool: PoolConfig::default(),
122            table: TableConfig::default(),
123            performance: PerformanceConfig::default(),
124        }
125    }
126}
127
128impl Default for PoolConfig {
129    fn default() -> Self {
130        Self {
131            max_connections: 10,
132            min_connections: 1,
133            connect_timeout: Duration::from_secs(30),
134            idle_timeout: Some(Duration::from_secs(600)),
135            max_lifetime: Some(Duration::from_secs(1800)),
136        }
137    }
138}
139
140impl Default for TableConfig {
141    fn default() -> Self {
142        Self {
143            schema: "public".to_string(),
144            table_prefix: Some("lumos_".to_string()),
145            auto_create_tables: true,
146            auto_create_indexes: true,
147        }
148    }
149}
150
151impl Default for PerformanceConfig {
152    fn default() -> Self {
153        Self {
154            batch_size: 1000,
155            index_type: VectorIndexType::Hnsw,
156            index_params: IndexParams::default(),
157            use_prepared_statements: true,
158        }
159    }
160}
161
162impl Default for IndexParams {
163    fn default() -> Self {
164        Self {
165            ivf_flat: IvfFlatParams::default(),
166            hnsw: HnswParams::default(),
167        }
168    }
169}
170
171impl Default for IvfFlatParams {
172    fn default() -> Self {
173        Self {
174            lists: 100,
175            probes: 10,
176        }
177    }
178}
179
180impl Default for HnswParams {
181    fn default() -> Self {
182        Self {
183            m: 16,
184            ef_construction: 64,
185            ef_search: 40,
186        }
187    }
188}
189
190impl PostgresConfig {
191    /// Create a new PostgreSQL configuration with database URL
192    pub fn new(database_url: impl Into<String>) -> Self {
193        Self {
194            database_url: database_url.into(),
195            ..Default::default()
196        }
197    }
198    
199    /// Set pool configuration
200    pub fn with_pool(mut self, pool: PoolConfig) -> Self {
201        self.pool = pool;
202        self
203    }
204    
205    /// Set table configuration
206    pub fn with_table(mut self, table: TableConfig) -> Self {
207        self.table = table;
208        self
209    }
210    
211    /// Set performance configuration
212    pub fn with_performance(mut self, performance: PerformanceConfig) -> Self {
213        self.performance = performance;
214        self
215    }
216    
217    /// Get the full table name with schema and prefix
218    pub fn table_name(&self, name: &str) -> String {
219        let prefix = self.table.table_prefix.as_deref().unwrap_or("");
220        format!("{}.{}{}", self.table.schema, prefix, name)
221    }
222    
223    /// Get the index name for a table
224    pub fn index_name(&self, table_name: &str, index_type: &str) -> String {
225        let prefix = self.table.table_prefix.as_deref().unwrap_or("");
226        format!("{}{}_{}_idx", prefix, table_name, index_type)
227    }
228}
229
230impl VectorIndexType {
231    /// Get the SQL for creating this index type
232    pub fn create_index_sql(&self, table_name: &str, index_name: &str, params: &IndexParams) -> String {
233        match self {
234            VectorIndexType::IvfFlat => {
235                format!(
236                    "CREATE INDEX {} ON {} USING ivfflat (embedding vector_cosine_ops) WITH (lists = {})",
237                    index_name, table_name, params.ivf_flat.lists
238                )
239            },
240            VectorIndexType::Hnsw => {
241                format!(
242                    "CREATE INDEX {} ON {} USING hnsw (embedding vector_cosine_ops) WITH (m = {}, ef_construction = {})",
243                    index_name, table_name, params.hnsw.m, params.hnsw.ef_construction
244                )
245            },
246            VectorIndexType::None => String::new(),
247        }
248    }
249    
250    /// Get the SQL for setting search parameters
251    pub fn search_params_sql(&self, params: &IndexParams) -> Vec<String> {
252        match self {
253            VectorIndexType::IvfFlat => {
254                vec![format!("SET ivfflat.probes = {}", params.ivf_flat.probes)]
255            },
256            VectorIndexType::Hnsw => {
257                vec![format!("SET hnsw.ef_search = {}", params.hnsw.ef_search)]
258            },
259            VectorIndexType::None => vec![],
260        }
261    }
262}