use ruvector_core::types::{DistanceMetric, HnswConfig, QuantizationConfig};
use ruvector_core::vector_db::VectorDB;
use serde::{Deserialize, Serialize};
use crate::error::{CollectionError, Result};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollectionConfig {
pub dimensions: usize,
pub distance_metric: DistanceMetric,
pub hnsw_config: Option<HnswConfig>,
pub quantization: Option<QuantizationConfig>,
pub on_disk_payload: bool,
}
impl CollectionConfig {
pub fn validate(&self) -> Result<()> {
if self.dimensions == 0 {
return Err(CollectionError::InvalidConfiguration {
message: "Dimensions must be greater than 0".to_string(),
});
}
if self.dimensions > 100_000 {
return Err(CollectionError::InvalidConfiguration {
message: "Dimensions exceeds maximum of 100,000".to_string(),
});
}
if let Some(ref hnsw_config) = self.hnsw_config {
if hnsw_config.m == 0 {
return Err(CollectionError::InvalidConfiguration {
message: "HNSW M parameter must be greater than 0".to_string(),
});
}
if hnsw_config.ef_construction < hnsw_config.m {
return Err(CollectionError::InvalidConfiguration {
message: "HNSW ef_construction must be >= M".to_string(),
});
}
if hnsw_config.ef_search == 0 {
return Err(CollectionError::InvalidConfiguration {
message: "HNSW ef_search must be greater than 0".to_string(),
});
}
}
Ok(())
}
pub fn with_dimensions(dimensions: usize) -> Self {
Self {
dimensions,
distance_metric: DistanceMetric::Cosine,
hnsw_config: Some(HnswConfig::default()),
quantization: Some(QuantizationConfig::Scalar),
on_disk_payload: true,
}
}
}
pub struct Collection {
pub name: String,
pub config: CollectionConfig,
pub db: VectorDB,
pub created_at: i64,
pub updated_at: i64,
}
impl std::fmt::Debug for Collection {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Collection")
.field("name", &self.name)
.field("config", &self.config)
.field("created_at", &self.created_at)
.field("updated_at", &self.updated_at)
.field("db", &"<VectorDB>")
.finish()
}
}
impl Collection {
pub fn new(name: String, config: CollectionConfig, storage_path: String) -> Result<Self> {
config.validate()?;
let db_options = ruvector_core::types::DbOptions {
dimensions: config.dimensions,
distance_metric: config.distance_metric,
storage_path,
hnsw_config: config.hnsw_config.clone(),
quantization: config.quantization.clone(),
};
let db = VectorDB::new(db_options)?;
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs() as i64;
Ok(Self {
name,
config,
db,
created_at: now,
updated_at: now,
})
}
pub fn stats(&self) -> Result<CollectionStats> {
let vectors_count = self.db.len()?;
Ok(CollectionStats {
vectors_count,
segments_count: 1, disk_size_bytes: 0, ram_size_bytes: 0, })
}
pub fn touch(&mut self) {
self.updated_at = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs() as i64;
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollectionStats {
pub vectors_count: usize,
pub segments_count: usize,
pub disk_size_bytes: u64,
pub ram_size_bytes: u64,
}
impl CollectionStats {
pub fn is_empty(&self) -> bool {
self.vectors_count == 0
}
pub fn disk_size_human(&self) -> String {
format_bytes(self.disk_size_bytes)
}
pub fn ram_size_human(&self) -> String {
format_bytes(self.ram_size_bytes)
}
}
fn format_bytes(bytes: u64) -> String {
const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
if bytes == 0 {
return "0 B".to_string();
}
let mut size = bytes as f64;
let mut unit_idx = 0;
while size >= 1024.0 && unit_idx < UNITS.len() - 1 {
size /= 1024.0;
unit_idx += 1;
}
format!("{:.2} {}", size, UNITS[unit_idx])
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_collection_config_validation() {
let config = CollectionConfig::with_dimensions(384);
assert!(config.validate().is_ok());
let config = CollectionConfig {
dimensions: 0,
distance_metric: DistanceMetric::Cosine,
hnsw_config: None,
quantization: None,
on_disk_payload: true,
};
assert!(config.validate().is_err());
let config = CollectionConfig {
dimensions: 200_000,
distance_metric: DistanceMetric::Cosine,
hnsw_config: None,
quantization: None,
on_disk_payload: true,
};
assert!(config.validate().is_err());
}
#[test]
fn test_format_bytes() {
assert_eq!(format_bytes(0), "0 B");
assert_eq!(format_bytes(512), "512.00 B");
assert_eq!(format_bytes(1024), "1.00 KB");
assert_eq!(format_bytes(1536), "1.50 KB");
assert_eq!(format_bytes(1048576), "1.00 MB");
assert_eq!(format_bytes(1073741824), "1.00 GB");
}
}