1use ruvector_core::types::{DistanceMetric, HnswConfig, QuantizationConfig};
4use ruvector_core::vector_db::VectorDB;
5use serde::{Deserialize, Serialize};
6
7use crate::error::{CollectionError, Result};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct CollectionConfig {
12 pub dimensions: usize,
14
15 pub distance_metric: DistanceMetric,
17
18 pub hnsw_config: Option<HnswConfig>,
20
21 pub quantization: Option<QuantizationConfig>,
23
24 pub on_disk_payload: bool,
26}
27
28impl CollectionConfig {
29 pub fn validate(&self) -> Result<()> {
31 if self.dimensions == 0 {
32 return Err(CollectionError::InvalidConfiguration {
33 message: "Dimensions must be greater than 0".to_string(),
34 });
35 }
36
37 if self.dimensions > 100_000 {
38 return Err(CollectionError::InvalidConfiguration {
39 message: "Dimensions exceeds maximum of 100,000".to_string(),
40 });
41 }
42
43 if let Some(ref hnsw_config) = self.hnsw_config {
45 if hnsw_config.m == 0 {
46 return Err(CollectionError::InvalidConfiguration {
47 message: "HNSW M parameter must be greater than 0".to_string(),
48 });
49 }
50
51 if hnsw_config.ef_construction < hnsw_config.m {
52 return Err(CollectionError::InvalidConfiguration {
53 message: "HNSW ef_construction must be >= M".to_string(),
54 });
55 }
56
57 if hnsw_config.ef_search == 0 {
58 return Err(CollectionError::InvalidConfiguration {
59 message: "HNSW ef_search must be greater than 0".to_string(),
60 });
61 }
62 }
63
64 Ok(())
65 }
66
67 pub fn with_dimensions(dimensions: usize) -> Self {
69 Self {
70 dimensions,
71 distance_metric: DistanceMetric::Cosine,
72 hnsw_config: Some(HnswConfig::default()),
73 quantization: Some(QuantizationConfig::Scalar),
74 on_disk_payload: true,
75 }
76 }
77}
78
79pub struct Collection {
81 pub name: String,
83
84 pub config: CollectionConfig,
86
87 pub db: VectorDB,
89
90 pub created_at: i64,
92
93 pub updated_at: i64,
95}
96
97impl std::fmt::Debug for Collection {
98 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 f.debug_struct("Collection")
100 .field("name", &self.name)
101 .field("config", &self.config)
102 .field("created_at", &self.created_at)
103 .field("updated_at", &self.updated_at)
104 .field("db", &"<VectorDB>")
105 .finish()
106 }
107}
108
109impl Collection {
110 pub fn new(name: String, config: CollectionConfig, storage_path: String) -> Result<Self> {
112 config.validate()?;
114
115 let db_options = ruvector_core::types::DbOptions {
117 dimensions: config.dimensions,
118 distance_metric: config.distance_metric,
119 storage_path,
120 hnsw_config: config.hnsw_config.clone(),
121 quantization: config.quantization.clone(),
122 };
123
124 let db = VectorDB::new(db_options)?;
125
126 let now = std::time::SystemTime::now()
127 .duration_since(std::time::UNIX_EPOCH)
128 .unwrap()
129 .as_secs() as i64;
130
131 Ok(Self {
132 name,
133 config,
134 db,
135 created_at: now,
136 updated_at: now,
137 })
138 }
139
140 pub fn stats(&self) -> Result<CollectionStats> {
142 let vectors_count = self.db.len()?;
143
144 Ok(CollectionStats {
145 vectors_count,
146 segments_count: 1, disk_size_bytes: 0, ram_size_bytes: 0, })
150 }
151
152 pub fn touch(&mut self) {
154 self.updated_at = std::time::SystemTime::now()
155 .duration_since(std::time::UNIX_EPOCH)
156 .unwrap()
157 .as_secs() as i64;
158 }
159}
160
161#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct CollectionStats {
164 pub vectors_count: usize,
166
167 pub segments_count: usize,
169
170 pub disk_size_bytes: u64,
172
173 pub ram_size_bytes: u64,
175}
176
177impl CollectionStats {
178 pub fn is_empty(&self) -> bool {
180 self.vectors_count == 0
181 }
182
183 pub fn disk_size_human(&self) -> String {
185 format_bytes(self.disk_size_bytes)
186 }
187
188 pub fn ram_size_human(&self) -> String {
190 format_bytes(self.ram_size_bytes)
191 }
192}
193
194fn format_bytes(bytes: u64) -> String {
196 const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
197
198 if bytes == 0 {
199 return "0 B".to_string();
200 }
201
202 let mut size = bytes as f64;
203 let mut unit_idx = 0;
204
205 while size >= 1024.0 && unit_idx < UNITS.len() - 1 {
206 size /= 1024.0;
207 unit_idx += 1;
208 }
209
210 format!("{:.2} {}", size, UNITS[unit_idx])
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216 use ruvector_core::types::HnswConfig;
217
218 #[test]
221 fn test_collection_config_validation() {
222 let config = CollectionConfig::with_dimensions(384);
224 assert!(config.validate().is_ok());
225
226 let config = CollectionConfig {
228 dimensions: 0,
229 distance_metric: DistanceMetric::Cosine,
230 hnsw_config: None,
231 quantization: None,
232 on_disk_payload: true,
233 };
234 assert!(config.validate().is_err());
235
236 let config = CollectionConfig {
238 dimensions: 200_000,
239 distance_metric: DistanceMetric::Cosine,
240 hnsw_config: None,
241 quantization: None,
242 on_disk_payload: true,
243 };
244 assert!(config.validate().is_err());
245 }
246
247 #[test]
248 fn test_config_validates_at_boundary_dimensions() {
249 let config = CollectionConfig {
251 dimensions: 1,
252 distance_metric: DistanceMetric::Cosine,
253 hnsw_config: None,
254 quantization: None,
255 on_disk_payload: false,
256 };
257 assert!(config.validate().is_ok());
258
259 let config = CollectionConfig {
261 dimensions: 100_000,
262 distance_metric: DistanceMetric::Cosine,
263 hnsw_config: None,
264 quantization: None,
265 on_disk_payload: false,
266 };
267 assert!(config.validate().is_ok());
268
269 let config = CollectionConfig {
271 dimensions: 100_001,
272 distance_metric: DistanceMetric::Cosine,
273 hnsw_config: None,
274 quantization: None,
275 on_disk_payload: false,
276 };
277 assert!(config.validate().is_err());
278 }
279
280 #[test]
281 fn test_config_validates_hnsw_m_zero() {
282 let config = CollectionConfig {
283 dimensions: 128,
284 distance_metric: DistanceMetric::Euclidean,
285 hnsw_config: Some(HnswConfig {
286 m: 0,
287 ef_construction: 200,
288 ef_search: 100,
289 max_elements: 1000,
290 }),
291 quantization: None,
292 on_disk_payload: false,
293 };
294 let err = config.validate().unwrap_err();
295 assert!(err.to_string().contains("M parameter"));
296 }
297
298 #[test]
299 fn test_config_validates_hnsw_ef_construction_less_than_m() {
300 let config = CollectionConfig {
301 dimensions: 128,
302 distance_metric: DistanceMetric::Cosine,
303 hnsw_config: Some(HnswConfig {
304 m: 32,
305 ef_construction: 16, ef_search: 100,
307 max_elements: 1000,
308 }),
309 quantization: None,
310 on_disk_payload: false,
311 };
312 let err = config.validate().unwrap_err();
313 assert!(err.to_string().contains("ef_construction"));
314 }
315
316 #[test]
317 fn test_config_validates_hnsw_ef_search_zero() {
318 let config = CollectionConfig {
319 dimensions: 128,
320 distance_metric: DistanceMetric::Cosine,
321 hnsw_config: Some(HnswConfig {
322 m: 16,
323 ef_construction: 200,
324 ef_search: 0,
325 max_elements: 1000,
326 }),
327 quantization: None,
328 on_disk_payload: false,
329 };
330 let err = config.validate().unwrap_err();
331 assert!(err.to_string().contains("ef_search"));
332 }
333
334 #[test]
335 fn test_config_valid_hnsw_passes() {
336 let config = CollectionConfig {
337 dimensions: 64,
338 distance_metric: DistanceMetric::DotProduct,
339 hnsw_config: Some(HnswConfig {
340 m: 16,
341 ef_construction: 128,
342 ef_search: 50,
343 max_elements: 5000,
344 }),
345 quantization: None,
346 on_disk_payload: true,
347 };
348 assert!(config.validate().is_ok());
349 }
350
351 #[test]
354 fn test_with_dimensions_sets_fields() {
355 let config = CollectionConfig::with_dimensions(256);
356 assert_eq!(config.dimensions, 256);
357 assert!(matches!(config.distance_metric, DistanceMetric::Cosine));
358 assert!(config.hnsw_config.is_some());
359 assert!(config.quantization.is_some());
360 assert!(config.on_disk_payload);
361 }
362
363 #[test]
366 fn test_config_serialization_roundtrip() {
367 let config = CollectionConfig::with_dimensions(384);
368 let json = serde_json::to_string(&config).expect("serialize");
369 let deserialized: CollectionConfig = serde_json::from_str(&json).expect("deserialize");
370 assert_eq!(deserialized.dimensions, 384);
371 }
372
373 #[test]
376 fn test_collection_new_with_valid_config() {
377 let temp = std::env::temp_dir().join("ruvector_test_coll_new_valid");
378 let _ = std::fs::remove_dir_all(&temp);
379 std::fs::create_dir_all(&temp).unwrap();
380
381 let db_path = temp.join("vectors.db").to_string_lossy().to_string();
382 let config = CollectionConfig::with_dimensions(64);
383 let coll = Collection::new("test_coll".to_string(), config, db_path);
384 assert!(coll.is_ok());
385
386 let coll = coll.unwrap();
387 assert_eq!(coll.name, "test_coll");
388 assert_eq!(coll.config.dimensions, 64);
389 assert!(coll.created_at > 0);
390 assert_eq!(coll.created_at, coll.updated_at);
391
392 let _ = std::fs::remove_dir_all(&temp);
393 }
394
395 #[test]
396 fn test_collection_new_rejects_zero_dimensions() {
397 let temp = std::env::temp_dir().join("ruvector_test_coll_new_zero");
398 let _ = std::fs::remove_dir_all(&temp);
399 std::fs::create_dir_all(&temp).unwrap();
400
401 let db_path = temp.join("vectors.db").to_string_lossy().to_string();
402 let config = CollectionConfig {
403 dimensions: 0,
404 distance_metric: DistanceMetric::Cosine,
405 hnsw_config: None,
406 quantization: None,
407 on_disk_payload: false,
408 };
409 let result = Collection::new("bad".to_string(), config, db_path);
410 assert!(result.is_err());
411
412 let _ = std::fs::remove_dir_all(&temp);
413 }
414
415 #[test]
418 fn test_collection_stats_on_empty() {
419 let temp = std::env::temp_dir().join("ruvector_test_coll_stats_empty");
420 let _ = std::fs::remove_dir_all(&temp);
421 std::fs::create_dir_all(&temp).unwrap();
422
423 let db_path = temp.join("vectors.db").to_string_lossy().to_string();
424 let config = CollectionConfig::with_dimensions(32);
425 let coll = Collection::new("stats_test".to_string(), config, db_path).unwrap();
426
427 let stats = coll.stats().unwrap();
428 assert_eq!(stats.vectors_count, 0);
429 assert!(stats.is_empty());
430
431 let _ = std::fs::remove_dir_all(&temp);
432 }
433
434 #[test]
437 fn test_collection_touch_updates_timestamp() {
438 let temp = std::env::temp_dir().join("ruvector_test_coll_touch");
439 let _ = std::fs::remove_dir_all(&temp);
440 std::fs::create_dir_all(&temp).unwrap();
441
442 let db_path = temp.join("vectors.db").to_string_lossy().to_string();
443 let config = CollectionConfig::with_dimensions(32);
444 let mut coll = Collection::new("touch_test".to_string(), config, db_path).unwrap();
445
446 let before = coll.updated_at;
447 coll.touch();
449 assert!(coll.updated_at >= before);
450
451 let _ = std::fs::remove_dir_all(&temp);
452 }
453
454 #[test]
457 fn test_collection_debug_format() {
458 let temp = std::env::temp_dir().join("ruvector_test_coll_debug");
459 let _ = std::fs::remove_dir_all(&temp);
460 std::fs::create_dir_all(&temp).unwrap();
461
462 let db_path = temp.join("vectors.db").to_string_lossy().to_string();
463 let config = CollectionConfig::with_dimensions(16);
464 let coll = Collection::new("debug_test".to_string(), config, db_path).unwrap();
465
466 let debug_str = format!("{:?}", coll);
467 assert!(debug_str.contains("debug_test"));
468 assert!(debug_str.contains("<VectorDB>"));
469
470 let _ = std::fs::remove_dir_all(&temp);
471 }
472
473 #[test]
476 fn test_collection_stats_is_empty() {
477 let stats = CollectionStats {
478 vectors_count: 0,
479 segments_count: 1,
480 disk_size_bytes: 0,
481 ram_size_bytes: 0,
482 };
483 assert!(stats.is_empty());
484
485 let stats = CollectionStats {
486 vectors_count: 5,
487 segments_count: 1,
488 disk_size_bytes: 1024,
489 ram_size_bytes: 512,
490 };
491 assert!(!stats.is_empty());
492 }
493
494 #[test]
495 fn test_collection_stats_human_readable_sizes() {
496 let stats = CollectionStats {
497 vectors_count: 100,
498 segments_count: 1,
499 disk_size_bytes: 1048576, ram_size_bytes: 2048, };
502 assert_eq!(stats.disk_size_human(), "1.00 MB");
503 assert_eq!(stats.ram_size_human(), "2.00 KB");
504 }
505
506 #[test]
507 fn test_collection_stats_zero_bytes_human() {
508 let stats = CollectionStats {
509 vectors_count: 0,
510 segments_count: 0,
511 disk_size_bytes: 0,
512 ram_size_bytes: 0,
513 };
514 assert_eq!(stats.disk_size_human(), "0 B");
515 assert_eq!(stats.ram_size_human(), "0 B");
516 }
517
518 #[test]
519 fn test_collection_stats_serde_roundtrip() {
520 let stats = CollectionStats {
521 vectors_count: 42,
522 segments_count: 3,
523 disk_size_bytes: 999,
524 ram_size_bytes: 888,
525 };
526 let json = serde_json::to_string(&stats).unwrap();
527 let deserialized: CollectionStats = serde_json::from_str(&json).unwrap();
528 assert_eq!(deserialized.vectors_count, 42);
529 assert_eq!(deserialized.segments_count, 3);
530 assert_eq!(deserialized.disk_size_bytes, 999);
531 assert_eq!(deserialized.ram_size_bytes, 888);
532 }
533
534 #[test]
537 fn test_format_bytes() {
538 assert_eq!(format_bytes(0), "0 B");
539 assert_eq!(format_bytes(512), "512.00 B");
540 assert_eq!(format_bytes(1024), "1.00 KB");
541 assert_eq!(format_bytes(1536), "1.50 KB");
542 assert_eq!(format_bytes(1048576), "1.00 MB");
543 assert_eq!(format_bytes(1073741824), "1.00 GB");
544 }
545
546 #[test]
547 fn test_format_bytes_terabyte() {
548 assert_eq!(format_bytes(1099511627776), "1.00 TB");
549 }
550
551 #[test]
552 fn test_format_bytes_small_values() {
553 assert_eq!(format_bytes(1), "1.00 B");
554 assert_eq!(format_bytes(1023), "1023.00 B");
555 }
556
557 #[test]
560 fn test_config_all_distance_metrics_validate() {
561 for metric in [
562 DistanceMetric::Cosine,
563 DistanceMetric::Euclidean,
564 DistanceMetric::DotProduct,
565 DistanceMetric::Manhattan,
566 ] {
567 let config = CollectionConfig {
568 dimensions: 128,
569 distance_metric: metric,
570 hnsw_config: None,
571 quantization: None,
572 on_disk_payload: false,
573 };
574 assert!(config.validate().is_ok(), "Failed for metric {:?}", metric);
575 }
576 }
577}