1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::path::PathBuf;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct MigrationConfig {
10 pub source: SourceConfig,
12 pub destination: DestinationConfig,
14 #[serde(default)]
16 pub options: MigrationOptions,
17}
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
21#[serde(tag = "type")]
22pub enum SourceConfig {
23 #[serde(rename = "pgvector")]
25 PgVector(PgVectorConfig),
26 #[serde(rename = "supabase")]
28 Supabase(SupabaseConfig),
29 #[serde(rename = "qdrant")]
31 Qdrant(QdrantConfig),
32 #[serde(rename = "pinecone")]
34 Pinecone(PineconeConfig),
35 #[serde(rename = "weaviate")]
37 Weaviate(WeaviateConfig),
38 #[serde(rename = "milvus")]
40 Milvus(MilvusConfig),
41 #[serde(rename = "chromadb")]
43 ChromaDB(ChromaDBConfig),
44 #[serde(rename = "json_file")]
46 JsonFile(crate::connectors::json_file::JsonFileConfig),
47 #[serde(rename = "csv_file")]
49 CsvFile(crate::connectors::csv_file::CsvFileConfig),
50 #[serde(rename = "mongodb")]
52 MongoDB(crate::connectors::mongodb::MongoDBConfig),
53 #[serde(rename = "elasticsearch")]
55 Elasticsearch(crate::connectors::elasticsearch::ElasticsearchConfig),
56 #[serde(rename = "redis")]
58 Redis(crate::connectors::redis::RedisConfig),
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct PgVectorConfig {
64 pub connection_string: String,
66 pub table: String,
68 #[serde(default = "default_vector_column")]
70 pub vector_column: String,
71 #[serde(default = "default_id_column")]
73 pub id_column: String,
74 #[serde(default)]
76 pub payload_columns: Vec<String>,
77 pub filter: Option<String>,
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct SupabaseConfig {
84 pub url: String,
86 pub api_key: String,
88 pub table: String,
90 #[serde(default = "default_vector_column")]
92 pub vector_column: String,
93 #[serde(default = "default_id_column")]
95 pub id_column: String,
96 #[serde(default)]
98 pub payload_columns: Vec<String>,
99}
100
101#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct QdrantConfig {
104 pub url: String,
106 pub collection: String,
108 pub api_key: Option<String>,
110 #[serde(default)]
112 pub payload_fields: Vec<String>,
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct PineconeConfig {
118 pub api_key: String,
120 pub environment: String,
122 pub index: String,
124 pub namespace: Option<String>,
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct WeaviateConfig {
131 pub url: String,
133 pub class_name: String,
135 pub api_key: Option<String>,
137 #[serde(default)]
139 pub properties: Vec<String>,
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct MilvusConfig {
145 pub url: String,
147 pub collection: String,
149 pub username: Option<String>,
151 pub password: Option<String>,
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct ChromaDBConfig {
158 pub url: String,
160 pub collection: String,
162 pub tenant: Option<String>,
164 pub database: Option<String>,
166}
167
168#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct DestinationConfig {
171 pub path: PathBuf,
173 pub collection: String,
175 pub dimension: usize,
177 #[serde(default)]
179 pub metric: DistanceMetric,
180 #[serde(default)]
182 pub storage_mode: StorageMode,
183}
184
185#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
187#[serde(rename_all = "lowercase")]
188pub enum DistanceMetric {
189 #[default]
191 Cosine,
192 Euclidean,
194 Dot,
196}
197
198#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
200#[serde(rename_all = "lowercase")]
201pub enum StorageMode {
202 #[default]
204 Full,
205 SQ8,
207 Binary,
209}
210
211#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct MigrationOptions {
214 #[serde(default = "default_batch_size")]
216 pub batch_size: usize,
217 #[serde(default = "default_true")]
219 pub checkpoint_enabled: bool,
220 pub checkpoint_path: Option<PathBuf>,
222 #[serde(default = "default_workers")]
224 pub workers: usize,
225 #[serde(default)]
227 pub dry_run: bool,
228 #[serde(default)]
230 pub field_mappings: HashMap<String, String>,
231 #[serde(default)]
233 pub continue_on_error: bool,
234}
235
236impl Default for MigrationOptions {
237 fn default() -> Self {
238 Self {
239 batch_size: default_batch_size(),
240 checkpoint_enabled: true,
241 checkpoint_path: None,
242 workers: default_workers(),
243 dry_run: false,
244 field_mappings: HashMap::new(),
245 continue_on_error: false,
246 }
247 }
248}
249
250fn default_vector_column() -> String {
251 "embedding".to_string()
252}
253
254fn default_id_column() -> String {
255 "id".to_string()
256}
257
258fn default_batch_size() -> usize {
259 1000
260}
261
262fn default_workers() -> usize {
263 4
264}
265
266fn default_true() -> bool {
267 true
268}
269
270impl MigrationConfig {
271 pub fn from_file(path: &std::path::Path) -> crate::error::Result<Self> {
277 let content = std::fs::read_to_string(path)?;
278 let config: Self = serde_yaml::from_str(&content)?;
279 Ok(config)
280 }
281
282 pub fn validate(&self) -> crate::error::Result<()> {
288 if self.destination.dimension == 0 {
289 return Err(crate::error::Error::Config(
290 "dimension must be greater than 0".to_string(),
291 ));
292 }
293 if self.destination.collection.is_empty() {
294 return Err(crate::error::Error::Config(
295 "collection name cannot be empty".to_string(),
296 ));
297 }
298 Ok(())
299 }
300}
301
302#[cfg(test)]
303mod tests {
304 use super::*;
305
306 #[test]
307 fn test_config_defaults() {
308 let options = MigrationOptions::default();
309 assert_eq!(options.batch_size, 1000);
310 assert_eq!(options.workers, 4);
311 assert!(options.checkpoint_enabled);
312 assert!(!options.dry_run);
313 }
314
315 #[test]
316 fn test_config_validate_dimension() {
317 let config = MigrationConfig {
318 source: SourceConfig::Qdrant(QdrantConfig {
319 url: "http://localhost:6333".to_string(),
320 collection: "test".to_string(),
321 api_key: None,
322 payload_fields: vec![],
323 }),
324 destination: DestinationConfig {
325 path: PathBuf::from("./test_db"),
326 collection: "test".to_string(),
327 dimension: 0,
328 metric: DistanceMetric::Cosine,
329 storage_mode: StorageMode::Full,
330 },
331 options: MigrationOptions::default(),
332 };
333
334 let result = config.validate();
335 assert!(result.is_err());
336 }
337
338 #[test]
339 fn test_config_yaml_parse() {
340 let yaml = r#"
341source:
342 type: qdrant
343 url: http://localhost:6333
344 collection: documents
345destination:
346 path: ./velesdb_data
347 collection: docs
348 dimension: 768
349options:
350 batch_size: 500
351"#;
352 let config: MigrationConfig = serde_yaml::from_str(yaml).unwrap();
353 assert_eq!(config.destination.dimension, 768);
354 assert_eq!(config.options.batch_size, 500);
355 }
356}