velesdb_migrate/
config.rs

1//! Configuration types for velesdb-migrate.
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::path::PathBuf;
6
7/// Main migration configuration.
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct MigrationConfig {
10    /// Source database configuration.
11    pub source: SourceConfig,
12    /// Destination `VelesDB` configuration.
13    pub destination: DestinationConfig,
14    /// Migration options.
15    #[serde(default)]
16    pub options: MigrationOptions,
17}
18
19/// Source database configuration.
20#[derive(Debug, Clone, Serialize, Deserialize)]
21#[serde(tag = "type")]
22pub enum SourceConfig {
23    /// `PostgreSQL` with pgvector extension.
24    #[serde(rename = "pgvector")]
25    PgVector(PgVectorConfig),
26    /// Supabase (pgvector-based).
27    #[serde(rename = "supabase")]
28    Supabase(SupabaseConfig),
29    /// Qdrant vector database.
30    #[serde(rename = "qdrant")]
31    Qdrant(QdrantConfig),
32    /// Pinecone vector database.
33    #[serde(rename = "pinecone")]
34    Pinecone(PineconeConfig),
35    /// Weaviate vector database.
36    #[serde(rename = "weaviate")]
37    Weaviate(WeaviateConfig),
38    /// Milvus vector database.
39    #[serde(rename = "milvus")]
40    Milvus(MilvusConfig),
41    /// `ChromaDB` vector database.
42    #[serde(rename = "chromadb")]
43    ChromaDB(ChromaDBConfig),
44}
45
46/// `PostgreSQL` pgvector configuration.
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct PgVectorConfig {
49    /// Connection string (postgres://user:pass@host:port/db).
50    pub connection_string: String,
51    /// Table name containing vectors.
52    pub table: String,
53    /// Column name for vector data.
54    #[serde(default = "default_vector_column")]
55    pub vector_column: String,
56    /// Column name for primary key/ID.
57    #[serde(default = "default_id_column")]
58    pub id_column: String,
59    /// Additional columns to include in payload.
60    #[serde(default)]
61    pub payload_columns: Vec<String>,
62    /// Optional WHERE clause for filtering.
63    pub filter: Option<String>,
64}
65
66/// Supabase configuration.
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct SupabaseConfig {
69    /// Supabase project URL.
70    pub url: String,
71    /// Supabase service role key or anon key.
72    pub api_key: String,
73    /// Table name containing vectors.
74    pub table: String,
75    /// Column name for vector data.
76    #[serde(default = "default_vector_column")]
77    pub vector_column: String,
78    /// Column name for primary key/ID.
79    #[serde(default = "default_id_column")]
80    pub id_column: String,
81    /// Additional columns to include in payload.
82    #[serde(default)]
83    pub payload_columns: Vec<String>,
84}
85
86/// Qdrant configuration.
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct QdrantConfig {
89    /// Qdrant server URL.
90    pub url: String,
91    /// Collection name.
92    pub collection: String,
93    /// Optional API key.
94    pub api_key: Option<String>,
95    /// Include payload fields (empty = all).
96    #[serde(default)]
97    pub payload_fields: Vec<String>,
98}
99
100/// Pinecone configuration.
101#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct PineconeConfig {
103    /// Pinecone API key.
104    pub api_key: String,
105    /// Environment (e.g., "us-east-1-aws").
106    pub environment: String,
107    /// Index name.
108    pub index: String,
109    /// Optional namespace.
110    pub namespace: Option<String>,
111}
112
113/// Weaviate configuration.
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct WeaviateConfig {
116    /// Weaviate server URL.
117    pub url: String,
118    /// Class name.
119    pub class_name: String,
120    /// Optional API key.
121    pub api_key: Option<String>,
122    /// Properties to include.
123    #[serde(default)]
124    pub properties: Vec<String>,
125}
126
127/// Milvus configuration.
128#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct MilvusConfig {
130    /// Milvus server URL.
131    pub url: String,
132    /// Collection name.
133    pub collection: String,
134    /// Optional username.
135    pub username: Option<String>,
136    /// Optional password.
137    pub password: Option<String>,
138}
139
140/// `ChromaDB` configuration.
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct ChromaDBConfig {
143    /// `ChromaDB` server URL.
144    pub url: String,
145    /// Collection name.
146    pub collection: String,
147    /// Optional tenant.
148    pub tenant: Option<String>,
149    /// Optional database.
150    pub database: Option<String>,
151}
152
153/// Destination `VelesDB` configuration.
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct DestinationConfig {
156    /// Path to `VelesDB` database directory.
157    pub path: PathBuf,
158    /// Collection name (will be created if not exists).
159    pub collection: String,
160    /// Vector dimension (must match source).
161    pub dimension: usize,
162    /// Distance metric.
163    #[serde(default)]
164    pub metric: DistanceMetric,
165    /// Storage mode.
166    #[serde(default)]
167    pub storage_mode: StorageMode,
168}
169
170/// Distance metric for `VelesDB`.
171#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
172#[serde(rename_all = "lowercase")]
173pub enum DistanceMetric {
174    /// Cosine similarity (default). Best for normalized embeddings.
175    #[default]
176    Cosine,
177    /// Euclidean distance. Best for unnormalized embeddings.
178    Euclidean,
179    /// Dot product. Fast but requires normalized vectors.
180    Dot,
181}
182
183/// Storage mode for `VelesDB`.
184#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
185#[serde(rename_all = "lowercase")]
186pub enum StorageMode {
187    /// Full precision (32-bit float). No compression.
188    #[default]
189    Full,
190    /// Scalar quantization (8-bit). 4x compression, ~99% recall.
191    SQ8,
192    /// Binary quantization (1-bit). 32x compression, ~95% recall.
193    Binary,
194}
195
196/// Migration options.
197#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct MigrationOptions {
199    /// Batch size for extraction and loading.
200    #[serde(default = "default_batch_size")]
201    pub batch_size: usize,
202    /// Enable checkpoint/resume support.
203    #[serde(default = "default_true")]
204    pub checkpoint_enabled: bool,
205    /// Checkpoint file path.
206    pub checkpoint_path: Option<PathBuf>,
207    /// Number of parallel workers.
208    #[serde(default = "default_workers")]
209    pub workers: usize,
210    /// Dry run mode (don't write to destination).
211    #[serde(default)]
212    pub dry_run: bool,
213    /// Field mappings (`source_field` -> `dest_field`).
214    #[serde(default)]
215    pub field_mappings: HashMap<String, String>,
216    /// Continue on errors.
217    #[serde(default)]
218    pub continue_on_error: bool,
219}
220
221impl Default for MigrationOptions {
222    fn default() -> Self {
223        Self {
224            batch_size: default_batch_size(),
225            checkpoint_enabled: true,
226            checkpoint_path: None,
227            workers: default_workers(),
228            dry_run: false,
229            field_mappings: HashMap::new(),
230            continue_on_error: false,
231        }
232    }
233}
234
235fn default_vector_column() -> String {
236    "embedding".to_string()
237}
238
239fn default_id_column() -> String {
240    "id".to_string()
241}
242
243fn default_batch_size() -> usize {
244    1000
245}
246
247fn default_workers() -> usize {
248    4
249}
250
251fn default_true() -> bool {
252    true
253}
254
255impl MigrationConfig {
256    /// Load configuration from a YAML file.
257    ///
258    /// # Errors
259    ///
260    /// Returns an error if the file cannot be read or parsed.
261    pub fn from_file(path: &std::path::Path) -> crate::error::Result<Self> {
262        let content = std::fs::read_to_string(path)?;
263        let config: Self = serde_yaml::from_str(&content)?;
264        Ok(config)
265    }
266
267    /// Validate the configuration.
268    ///
269    /// # Errors
270    ///
271    /// Returns an error if the configuration is invalid.
272    pub fn validate(&self) -> crate::error::Result<()> {
273        if self.destination.dimension == 0 {
274            return Err(crate::error::Error::Config(
275                "dimension must be greater than 0".to_string(),
276            ));
277        }
278        if self.destination.collection.is_empty() {
279            return Err(crate::error::Error::Config(
280                "collection name cannot be empty".to_string(),
281            ));
282        }
283        Ok(())
284    }
285}
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290
291    #[test]
292    fn test_config_defaults() {
293        let options = MigrationOptions::default();
294        assert_eq!(options.batch_size, 1000);
295        assert_eq!(options.workers, 4);
296        assert!(options.checkpoint_enabled);
297        assert!(!options.dry_run);
298    }
299
300    #[test]
301    fn test_config_validate_dimension() {
302        let config = MigrationConfig {
303            source: SourceConfig::Qdrant(QdrantConfig {
304                url: "http://localhost:6333".to_string(),
305                collection: "test".to_string(),
306                api_key: None,
307                payload_fields: vec![],
308            }),
309            destination: DestinationConfig {
310                path: PathBuf::from("./test_db"),
311                collection: "test".to_string(),
312                dimension: 0,
313                metric: DistanceMetric::Cosine,
314                storage_mode: StorageMode::Full,
315            },
316            options: MigrationOptions::default(),
317        };
318
319        let result = config.validate();
320        assert!(result.is_err());
321    }
322
323    #[test]
324    fn test_config_yaml_parse() {
325        let yaml = r#"
326source:
327  type: qdrant
328  url: http://localhost:6333
329  collection: documents
330destination:
331  path: ./velesdb_data
332  collection: docs
333  dimension: 768
334options:
335  batch_size: 500
336"#;
337        let config: MigrationConfig = serde_yaml::from_str(yaml).unwrap();
338        assert_eq!(config.destination.dimension, 768);
339        assert_eq!(config.options.batch_size, 500);
340    }
341}