ruvector_collections/
collection.rs1use ruvector_core::types::{DistanceMetric, HnswConfig, QuantizationConfig};
4use ruvector_core::vector_db::VectorDB;
5use serde::{Deserialize, Serialize};
6
7use crate::error::{CollectionError, Result};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct CollectionConfig {
12 pub dimensions: usize,
14
15 pub distance_metric: DistanceMetric,
17
18 pub hnsw_config: Option<HnswConfig>,
20
21 pub quantization: Option<QuantizationConfig>,
23
24 pub on_disk_payload: bool,
26}
27
28impl CollectionConfig {
29 pub fn validate(&self) -> Result<()> {
31 if self.dimensions == 0 {
32 return Err(CollectionError::InvalidConfiguration {
33 message: "Dimensions must be greater than 0".to_string(),
34 });
35 }
36
37 if self.dimensions > 100_000 {
38 return Err(CollectionError::InvalidConfiguration {
39 message: "Dimensions exceeds maximum of 100,000".to_string(),
40 });
41 }
42
43 if let Some(ref hnsw_config) = self.hnsw_config {
45 if hnsw_config.m == 0 {
46 return Err(CollectionError::InvalidConfiguration {
47 message: "HNSW M parameter must be greater than 0".to_string(),
48 });
49 }
50
51 if hnsw_config.ef_construction < hnsw_config.m {
52 return Err(CollectionError::InvalidConfiguration {
53 message: "HNSW ef_construction must be >= M".to_string(),
54 });
55 }
56
57 if hnsw_config.ef_search == 0 {
58 return Err(CollectionError::InvalidConfiguration {
59 message: "HNSW ef_search must be greater than 0".to_string(),
60 });
61 }
62 }
63
64 Ok(())
65 }
66
67 pub fn with_dimensions(dimensions: usize) -> Self {
69 Self {
70 dimensions,
71 distance_metric: DistanceMetric::Cosine,
72 hnsw_config: Some(HnswConfig::default()),
73 quantization: Some(QuantizationConfig::Scalar),
74 on_disk_payload: true,
75 }
76 }
77}
78
79pub struct Collection {
81 pub name: String,
83
84 pub config: CollectionConfig,
86
87 pub db: VectorDB,
89
90 pub created_at: i64,
92
93 pub updated_at: i64,
95}
96
97impl std::fmt::Debug for Collection {
98 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 f.debug_struct("Collection")
100 .field("name", &self.name)
101 .field("config", &self.config)
102 .field("created_at", &self.created_at)
103 .field("updated_at", &self.updated_at)
104 .field("db", &"<VectorDB>")
105 .finish()
106 }
107}
108
109impl Collection {
110 pub fn new(name: String, config: CollectionConfig, storage_path: String) -> Result<Self> {
112 config.validate()?;
114
115 let db_options = ruvector_core::types::DbOptions {
117 dimensions: config.dimensions,
118 distance_metric: config.distance_metric,
119 storage_path,
120 hnsw_config: config.hnsw_config.clone(),
121 quantization: config.quantization.clone(),
122 };
123
124 let db = VectorDB::new(db_options)?;
125
126 let now = std::time::SystemTime::now()
127 .duration_since(std::time::UNIX_EPOCH)
128 .unwrap()
129 .as_secs() as i64;
130
131 Ok(Self {
132 name,
133 config,
134 db,
135 created_at: now,
136 updated_at: now,
137 })
138 }
139
140 pub fn stats(&self) -> Result<CollectionStats> {
142 let vectors_count = self.db.len()?;
143
144 Ok(CollectionStats {
145 vectors_count,
146 segments_count: 1, disk_size_bytes: 0, ram_size_bytes: 0, })
150 }
151
152 pub fn touch(&mut self) {
154 self.updated_at = std::time::SystemTime::now()
155 .duration_since(std::time::UNIX_EPOCH)
156 .unwrap()
157 .as_secs() as i64;
158 }
159}
160
161#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct CollectionStats {
164 pub vectors_count: usize,
166
167 pub segments_count: usize,
169
170 pub disk_size_bytes: u64,
172
173 pub ram_size_bytes: u64,
175}
176
177impl CollectionStats {
178 pub fn is_empty(&self) -> bool {
180 self.vectors_count == 0
181 }
182
183 pub fn disk_size_human(&self) -> String {
185 format_bytes(self.disk_size_bytes)
186 }
187
188 pub fn ram_size_human(&self) -> String {
190 format_bytes(self.ram_size_bytes)
191 }
192}
193
194fn format_bytes(bytes: u64) -> String {
196 const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
197
198 if bytes == 0 {
199 return "0 B".to_string();
200 }
201
202 let mut size = bytes as f64;
203 let mut unit_idx = 0;
204
205 while size >= 1024.0 && unit_idx < UNITS.len() - 1 {
206 size /= 1024.0;
207 unit_idx += 1;
208 }
209
210 format!("{:.2} {}", size, UNITS[unit_idx])
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216
217 #[test]
218 fn test_collection_config_validation() {
219 let config = CollectionConfig::with_dimensions(384);
221 assert!(config.validate().is_ok());
222
223 let config = CollectionConfig {
225 dimensions: 0,
226 distance_metric: DistanceMetric::Cosine,
227 hnsw_config: None,
228 quantization: None,
229 on_disk_payload: true,
230 };
231 assert!(config.validate().is_err());
232
233 let config = CollectionConfig {
235 dimensions: 200_000,
236 distance_metric: DistanceMetric::Cosine,
237 hnsw_config: None,
238 quantization: None,
239 on_disk_payload: true,
240 };
241 assert!(config.validate().is_err());
242 }
243
244 #[test]
245 fn test_format_bytes() {
246 assert_eq!(format_bytes(0), "0 B");
247 assert_eq!(format_bytes(512), "512.00 B");
248 assert_eq!(format_bytes(1024), "1.00 KB");
249 assert_eq!(format_bytes(1536), "1.50 KB");
250 assert_eq!(format_bytes(1048576), "1.00 MB");
251 assert_eq!(format_bytes(1073741824), "1.00 GB");
252 }
253}