velesdb_migrate/
config.rs

1//! Configuration types for velesdb-migrate.
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::path::PathBuf;
6
7/// Main migration configuration.
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct MigrationConfig {
10    /// Source database configuration.
11    pub source: SourceConfig,
12    /// Destination `VelesDB` configuration.
13    pub destination: DestinationConfig,
14    /// Migration options.
15    #[serde(default)]
16    pub options: MigrationOptions,
17}
18
19/// Source database configuration.
20#[derive(Debug, Clone, Serialize, Deserialize)]
21#[serde(tag = "type")]
22pub enum SourceConfig {
23    /// `PostgreSQL` with pgvector extension.
24    #[serde(rename = "pgvector")]
25    PgVector(PgVectorConfig),
26    /// Supabase (pgvector-based).
27    #[serde(rename = "supabase")]
28    Supabase(SupabaseConfig),
29    /// Qdrant vector database.
30    #[serde(rename = "qdrant")]
31    Qdrant(QdrantConfig),
32    /// Pinecone vector database.
33    #[serde(rename = "pinecone")]
34    Pinecone(PineconeConfig),
35    /// Weaviate vector database.
36    #[serde(rename = "weaviate")]
37    Weaviate(WeaviateConfig),
38    /// Milvus vector database.
39    #[serde(rename = "milvus")]
40    Milvus(MilvusConfig),
41    /// `ChromaDB` vector database.
42    #[serde(rename = "chromadb")]
43    ChromaDB(ChromaDBConfig),
44    /// JSON file import.
45    #[serde(rename = "json_file")]
46    JsonFile(crate::connectors::json_file::JsonFileConfig),
47    /// CSV file import.
48    #[serde(rename = "csv_file")]
49    CsvFile(crate::connectors::csv_file::CsvFileConfig),
50    /// MongoDB Atlas Vector Search.
51    #[serde(rename = "mongodb")]
52    MongoDB(crate::connectors::mongodb::MongoDBConfig),
53    /// Elasticsearch/OpenSearch with vector search.
54    #[serde(rename = "elasticsearch")]
55    Elasticsearch(crate::connectors::elasticsearch::ElasticsearchConfig),
56    /// Redis Vector Search (Redis Stack).
57    #[serde(rename = "redis")]
58    Redis(crate::connectors::redis::RedisConfig),
59}
60
61/// `PostgreSQL` pgvector configuration.
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct PgVectorConfig {
64    /// Connection string (postgres://user:pass@host:port/db).
65    pub connection_string: String,
66    /// Table name containing vectors.
67    pub table: String,
68    /// Column name for vector data.
69    #[serde(default = "default_vector_column")]
70    pub vector_column: String,
71    /// Column name for primary key/ID.
72    #[serde(default = "default_id_column")]
73    pub id_column: String,
74    /// Additional columns to include in payload.
75    #[serde(default)]
76    pub payload_columns: Vec<String>,
77    /// Optional WHERE clause for filtering.
78    pub filter: Option<String>,
79}
80
81/// Supabase configuration.
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct SupabaseConfig {
84    /// Supabase project URL.
85    pub url: String,
86    /// Supabase service role key or anon key.
87    pub api_key: String,
88    /// Table name containing vectors.
89    pub table: String,
90    /// Column name for vector data.
91    #[serde(default = "default_vector_column")]
92    pub vector_column: String,
93    /// Column name for primary key/ID.
94    #[serde(default = "default_id_column")]
95    pub id_column: String,
96    /// Additional columns to include in payload.
97    #[serde(default)]
98    pub payload_columns: Vec<String>,
99}
100
101/// Qdrant configuration.
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct QdrantConfig {
104    /// Qdrant server URL.
105    pub url: String,
106    /// Collection name.
107    pub collection: String,
108    /// Optional API key.
109    pub api_key: Option<String>,
110    /// Include payload fields (empty = all).
111    #[serde(default)]
112    pub payload_fields: Vec<String>,
113}
114
115/// Pinecone configuration.
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct PineconeConfig {
118    /// Pinecone API key.
119    pub api_key: String,
120    /// Environment (e.g., "us-east-1-aws").
121    pub environment: String,
122    /// Index name.
123    pub index: String,
124    /// Optional namespace.
125    pub namespace: Option<String>,
126}
127
128/// Weaviate configuration.
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct WeaviateConfig {
131    /// Weaviate server URL.
132    pub url: String,
133    /// Class name.
134    pub class_name: String,
135    /// Optional API key.
136    pub api_key: Option<String>,
137    /// Properties to include.
138    #[serde(default)]
139    pub properties: Vec<String>,
140}
141
142/// Milvus configuration.
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct MilvusConfig {
145    /// Milvus server URL.
146    pub url: String,
147    /// Collection name.
148    pub collection: String,
149    /// Optional username.
150    pub username: Option<String>,
151    /// Optional password.
152    pub password: Option<String>,
153}
154
155/// `ChromaDB` configuration.
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct ChromaDBConfig {
158    /// `ChromaDB` server URL.
159    pub url: String,
160    /// Collection name.
161    pub collection: String,
162    /// Optional tenant.
163    pub tenant: Option<String>,
164    /// Optional database.
165    pub database: Option<String>,
166}
167
168/// Destination `VelesDB` configuration.
169#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct DestinationConfig {
171    /// Path to `VelesDB` database directory.
172    pub path: PathBuf,
173    /// Collection name (will be created if not exists).
174    pub collection: String,
175    /// Vector dimension (must match source).
176    pub dimension: usize,
177    /// Distance metric.
178    #[serde(default)]
179    pub metric: DistanceMetric,
180    /// Storage mode.
181    #[serde(default)]
182    pub storage_mode: StorageMode,
183}
184
185/// Distance metric for `VelesDB`.
186#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
187#[serde(rename_all = "lowercase")]
188pub enum DistanceMetric {
189    /// Cosine similarity (default). Best for normalized embeddings.
190    #[default]
191    Cosine,
192    /// Euclidean distance. Best for unnormalized embeddings.
193    Euclidean,
194    /// Dot product. Fast but requires normalized vectors.
195    Dot,
196}
197
198/// Storage mode for `VelesDB`.
199#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
200#[serde(rename_all = "lowercase")]
201pub enum StorageMode {
202    /// Full precision (32-bit float). No compression.
203    #[default]
204    Full,
205    /// Scalar quantization (8-bit). 4x compression, ~99% recall.
206    SQ8,
207    /// Binary quantization (1-bit). 32x compression, ~95% recall.
208    Binary,
209}
210
211/// Migration options.
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct MigrationOptions {
214    /// Batch size for extraction and loading.
215    #[serde(default = "default_batch_size")]
216    pub batch_size: usize,
217    /// Enable checkpoint/resume support.
218    #[serde(default = "default_true")]
219    pub checkpoint_enabled: bool,
220    /// Checkpoint file path.
221    pub checkpoint_path: Option<PathBuf>,
222    /// Number of parallel workers.
223    #[serde(default = "default_workers")]
224    pub workers: usize,
225    /// Dry run mode (don't write to destination).
226    #[serde(default)]
227    pub dry_run: bool,
228    /// Field mappings (`source_field` -> `dest_field`).
229    #[serde(default)]
230    pub field_mappings: HashMap<String, String>,
231    /// Continue on errors.
232    #[serde(default)]
233    pub continue_on_error: bool,
234}
235
236impl Default for MigrationOptions {
237    fn default() -> Self {
238        Self {
239            batch_size: default_batch_size(),
240            checkpoint_enabled: true,
241            checkpoint_path: None,
242            workers: default_workers(),
243            dry_run: false,
244            field_mappings: HashMap::new(),
245            continue_on_error: false,
246        }
247    }
248}
249
250fn default_vector_column() -> String {
251    "embedding".to_string()
252}
253
254fn default_id_column() -> String {
255    "id".to_string()
256}
257
258fn default_batch_size() -> usize {
259    1000
260}
261
262fn default_workers() -> usize {
263    4
264}
265
266fn default_true() -> bool {
267    true
268}
269
270impl MigrationConfig {
271    /// Load configuration from a YAML file.
272    ///
273    /// # Errors
274    ///
275    /// Returns an error if the file cannot be read or parsed.
276    pub fn from_file(path: &std::path::Path) -> crate::error::Result<Self> {
277        let content = std::fs::read_to_string(path)?;
278        let config: Self = serde_yaml::from_str(&content)?;
279        Ok(config)
280    }
281
282    /// Validate the configuration.
283    ///
284    /// # Errors
285    ///
286    /// Returns an error if the configuration is invalid.
287    pub fn validate(&self) -> crate::error::Result<()> {
288        if self.destination.dimension == 0 {
289            return Err(crate::error::Error::Config(
290                "dimension must be greater than 0".to_string(),
291            ));
292        }
293        if self.destination.collection.is_empty() {
294            return Err(crate::error::Error::Config(
295                "collection name cannot be empty".to_string(),
296            ));
297        }
298        Ok(())
299    }
300}
301
302#[cfg(test)]
303mod tests {
304    use super::*;
305
306    #[test]
307    fn test_config_defaults() {
308        let options = MigrationOptions::default();
309        assert_eq!(options.batch_size, 1000);
310        assert_eq!(options.workers, 4);
311        assert!(options.checkpoint_enabled);
312        assert!(!options.dry_run);
313    }
314
315    #[test]
316    fn test_config_validate_dimension() {
317        let config = MigrationConfig {
318            source: SourceConfig::Qdrant(QdrantConfig {
319                url: "http://localhost:6333".to_string(),
320                collection: "test".to_string(),
321                api_key: None,
322                payload_fields: vec![],
323            }),
324            destination: DestinationConfig {
325                path: PathBuf::from("./test_db"),
326                collection: "test".to_string(),
327                dimension: 0,
328                metric: DistanceMetric::Cosine,
329                storage_mode: StorageMode::Full,
330            },
331            options: MigrationOptions::default(),
332        };
333
334        let result = config.validate();
335        assert!(result.is_err());
336    }
337
338    #[test]
339    fn test_config_yaml_parse() {
340        let yaml = r#"
341source:
342  type: qdrant
343  url: http://localhost:6333
344  collection: documents
345destination:
346  path: ./velesdb_data
347  collection: docs
348  dimension: 768
349options:
350  batch_size: 500
351"#;
352        let config: MigrationConfig = serde_yaml::from_str(yaml).unwrap();
353        assert_eq!(config.destination.dimension, 768);
354        assert_eq!(config.options.batch_size, 500);
355    }
356}