1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::path::PathBuf;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct MigrationConfig {
10 pub source: SourceConfig,
12 pub destination: DestinationConfig,
14 #[serde(default)]
16 pub options: MigrationOptions,
17}
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
21#[serde(tag = "type")]
22pub enum SourceConfig {
23 #[serde(rename = "pgvector")]
25 PgVector(PgVectorConfig),
26 #[serde(rename = "supabase")]
28 Supabase(SupabaseConfig),
29 #[serde(rename = "qdrant")]
31 Qdrant(QdrantConfig),
32 #[serde(rename = "pinecone")]
34 Pinecone(PineconeConfig),
35 #[serde(rename = "weaviate")]
37 Weaviate(WeaviateConfig),
38 #[serde(rename = "milvus")]
40 Milvus(MilvusConfig),
41 #[serde(rename = "chromadb")]
43 ChromaDB(ChromaDBConfig),
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct PgVectorConfig {
49 pub connection_string: String,
51 pub table: String,
53 #[serde(default = "default_vector_column")]
55 pub vector_column: String,
56 #[serde(default = "default_id_column")]
58 pub id_column: String,
59 #[serde(default)]
61 pub payload_columns: Vec<String>,
62 pub filter: Option<String>,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct SupabaseConfig {
69 pub url: String,
71 pub api_key: String,
73 pub table: String,
75 #[serde(default = "default_vector_column")]
77 pub vector_column: String,
78 #[serde(default = "default_id_column")]
80 pub id_column: String,
81 #[serde(default)]
83 pub payload_columns: Vec<String>,
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct QdrantConfig {
89 pub url: String,
91 pub collection: String,
93 pub api_key: Option<String>,
95 #[serde(default)]
97 pub payload_fields: Vec<String>,
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct PineconeConfig {
103 pub api_key: String,
105 pub environment: String,
107 pub index: String,
109 pub namespace: Option<String>,
111}
112
113#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct WeaviateConfig {
116 pub url: String,
118 pub class_name: String,
120 pub api_key: Option<String>,
122 #[serde(default)]
124 pub properties: Vec<String>,
125}
126
127#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct MilvusConfig {
130 pub url: String,
132 pub collection: String,
134 pub username: Option<String>,
136 pub password: Option<String>,
138}
139
140#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct ChromaDBConfig {
143 pub url: String,
145 pub collection: String,
147 pub tenant: Option<String>,
149 pub database: Option<String>,
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct DestinationConfig {
156 pub path: PathBuf,
158 pub collection: String,
160 pub dimension: usize,
162 #[serde(default)]
164 pub metric: DistanceMetric,
165 #[serde(default)]
167 pub storage_mode: StorageMode,
168}
169
170#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
172#[serde(rename_all = "lowercase")]
173pub enum DistanceMetric {
174 #[default]
176 Cosine,
177 Euclidean,
179 Dot,
181}
182
183#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
185#[serde(rename_all = "lowercase")]
186pub enum StorageMode {
187 #[default]
189 Full,
190 SQ8,
192 Binary,
194}
195
196#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct MigrationOptions {
199 #[serde(default = "default_batch_size")]
201 pub batch_size: usize,
202 #[serde(default = "default_true")]
204 pub checkpoint_enabled: bool,
205 pub checkpoint_path: Option<PathBuf>,
207 #[serde(default = "default_workers")]
209 pub workers: usize,
210 #[serde(default)]
212 pub dry_run: bool,
213 #[serde(default)]
215 pub field_mappings: HashMap<String, String>,
216 #[serde(default)]
218 pub continue_on_error: bool,
219}
220
221impl Default for MigrationOptions {
222 fn default() -> Self {
223 Self {
224 batch_size: default_batch_size(),
225 checkpoint_enabled: true,
226 checkpoint_path: None,
227 workers: default_workers(),
228 dry_run: false,
229 field_mappings: HashMap::new(),
230 continue_on_error: false,
231 }
232 }
233}
234
235fn default_vector_column() -> String {
236 "embedding".to_string()
237}
238
239fn default_id_column() -> String {
240 "id".to_string()
241}
242
243fn default_batch_size() -> usize {
244 1000
245}
246
247fn default_workers() -> usize {
248 4
249}
250
251fn default_true() -> bool {
252 true
253}
254
255impl MigrationConfig {
256 pub fn from_file(path: &std::path::Path) -> crate::error::Result<Self> {
262 let content = std::fs::read_to_string(path)?;
263 let config: Self = serde_yaml::from_str(&content)?;
264 Ok(config)
265 }
266
267 pub fn validate(&self) -> crate::error::Result<()> {
273 if self.destination.dimension == 0 {
274 return Err(crate::error::Error::Config(
275 "dimension must be greater than 0".to_string(),
276 ));
277 }
278 if self.destination.collection.is_empty() {
279 return Err(crate::error::Error::Config(
280 "collection name cannot be empty".to_string(),
281 ));
282 }
283 Ok(())
284 }
285}
286
287#[cfg(test)]
288mod tests {
289 use super::*;
290
291 #[test]
292 fn test_config_defaults() {
293 let options = MigrationOptions::default();
294 assert_eq!(options.batch_size, 1000);
295 assert_eq!(options.workers, 4);
296 assert!(options.checkpoint_enabled);
297 assert!(!options.dry_run);
298 }
299
300 #[test]
301 fn test_config_validate_dimension() {
302 let config = MigrationConfig {
303 source: SourceConfig::Qdrant(QdrantConfig {
304 url: "http://localhost:6333".to_string(),
305 collection: "test".to_string(),
306 api_key: None,
307 payload_fields: vec![],
308 }),
309 destination: DestinationConfig {
310 path: PathBuf::from("./test_db"),
311 collection: "test".to_string(),
312 dimension: 0,
313 metric: DistanceMetric::Cosine,
314 storage_mode: StorageMode::Full,
315 },
316 options: MigrationOptions::default(),
317 };
318
319 let result = config.validate();
320 assert!(result.is_err());
321 }
322
323 #[test]
324 fn test_config_yaml_parse() {
325 let yaml = r#"
326source:
327 type: qdrant
328 url: http://localhost:6333
329 collection: documents
330destination:
331 path: ./velesdb_data
332 collection: docs
333 dimension: 768
334options:
335 batch_size: 500
336"#;
337 let config: MigrationConfig = serde_yaml::from_str(yaml).unwrap();
338 assert_eq!(config.destination.dimension, 768);
339 assert_eq!(config.options.batch_size, 500);
340 }
341}