use std::collections::HashMap;
use std::fmt;
use manifoldb_core::CollectionId;
use serde::{Deserialize, Serialize};
use super::VectorConfig;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct CollectionName(String);
impl CollectionName {
pub fn new(name: impl Into<String>) -> Result<Self, CollectionNameError> {
let name = name.into();
if name.is_empty() {
return Err(CollectionNameError::Empty);
}
if !name.chars().all(|c| c.is_alphanumeric() || c == '_' || c == '-') {
return Err(CollectionNameError::InvalidCharacters(name));
}
if name.len() > 255 {
return Err(CollectionNameError::TooLong(name.len()));
}
Ok(Self(name))
}
#[must_use]
pub fn as_str(&self) -> &str {
&self.0
}
#[must_use]
pub fn into_string(self) -> String {
self.0
}
}
impl fmt::Display for CollectionName {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl AsRef<str> for CollectionName {
fn as_ref(&self) -> &str {
&self.0
}
}
#[derive(Debug, Clone, thiserror::Error)]
pub enum CollectionNameError {
#[error("collection name cannot be empty")]
Empty,
#[error("collection name '{0}' contains invalid characters (allowed: alphanumeric, underscore, hyphen)")]
InvalidCharacters(String),
#[error("collection name too long: {0} bytes (maximum: 255)")]
TooLong(usize),
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Collection {
id: CollectionId,
name: CollectionName,
vectors: HashMap<String, VectorConfig>,
payload_schema: Option<PayloadSchema>,
created_at: u64,
updated_at: u64,
}
impl Collection {
pub fn new(id: CollectionId, name: CollectionName) -> Self {
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
Self {
id,
name,
vectors: HashMap::new(),
payload_schema: None,
created_at: now,
updated_at: now,
}
}
#[must_use]
pub fn with_vector(mut self, name: impl Into<String>, config: VectorConfig) -> Self {
self.vectors.insert(name.into(), config);
self
}
#[must_use]
pub fn with_vectors(mut self, vectors: HashMap<String, VectorConfig>) -> Self {
self.vectors.extend(vectors);
self
}
#[must_use]
pub fn with_payload_schema(mut self, schema: PayloadSchema) -> Self {
self.payload_schema = Some(schema);
self
}
#[must_use]
pub fn id(&self) -> CollectionId {
self.id
}
#[must_use]
pub fn name(&self) -> &CollectionName {
&self.name
}
#[must_use]
pub fn vectors(&self) -> &HashMap<String, VectorConfig> {
&self.vectors
}
#[must_use]
pub fn get_vector(&self, name: &str) -> Option<&VectorConfig> {
self.vectors.get(name)
}
#[must_use]
pub fn has_vector(&self, name: &str) -> bool {
self.vectors.contains_key(name)
}
#[must_use]
pub fn payload_schema(&self) -> Option<&PayloadSchema> {
self.payload_schema.as_ref()
}
#[must_use]
pub fn created_at(&self) -> u64 {
self.created_at
}
#[must_use]
pub fn updated_at(&self) -> u64 {
self.updated_at
}
pub fn add_vector(&mut self, name: impl Into<String>, config: VectorConfig) {
self.vectors.insert(name.into(), config);
self.touch();
}
pub fn remove_vector(&mut self, name: &str) -> Option<VectorConfig> {
let config = self.vectors.remove(name);
if config.is_some() {
self.touch();
}
config
}
fn touch(&mut self) {
self.updated_at = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PayloadSchema {
pub fields: HashMap<String, PayloadFieldType>,
pub allow_extra_fields: bool,
}
impl PayloadSchema {
#[must_use]
pub fn new() -> Self {
Self { fields: HashMap::new(), allow_extra_fields: true }
}
#[must_use]
pub fn strict() -> Self {
Self { fields: HashMap::new(), allow_extra_fields: false }
}
#[must_use]
pub fn with_field(mut self, name: impl Into<String>, field_type: PayloadFieldType) -> Self {
self.fields.insert(name.into(), field_type);
self
}
#[must_use]
pub fn with_extra_fields(mut self, allow: bool) -> Self {
self.allow_extra_fields = allow;
self
}
}
impl Default for PayloadSchema {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum PayloadFieldType {
String { max_length: Option<usize> },
Integer { min: Option<i64>, max: Option<i64> },
Float { min: Option<f64>, max: Option<f64> },
Boolean,
Array { element_type: Box<PayloadFieldType>, max_length: Option<usize> },
Object { schema: Box<PayloadSchema> },
Any,
}
impl PayloadFieldType {
#[must_use]
pub const fn string() -> Self {
Self::String { max_length: None }
}
#[must_use]
pub const fn string_with_max_length(max: usize) -> Self {
Self::String { max_length: Some(max) }
}
#[must_use]
pub const fn integer() -> Self {
Self::Integer { min: None, max: None }
}
#[must_use]
pub const fn integer_range(min: i64, max: i64) -> Self {
Self::Integer { min: Some(min), max: Some(max) }
}
#[must_use]
pub const fn float() -> Self {
Self::Float { min: None, max: None }
}
#[must_use]
pub const fn boolean() -> Self {
Self::Boolean
}
#[must_use]
pub fn array(element_type: PayloadFieldType) -> Self {
Self::Array { element_type: Box::new(element_type), max_length: None }
}
#[must_use]
pub const fn any() -> Self {
Self::Any
}
}
#[cfg(test)]
mod tests {
use manifoldb_vector::distance::DistanceMetric;
use super::*;
#[test]
fn test_collection_name_valid() {
let name = CollectionName::new("my_documents").unwrap();
assert_eq!(name.as_str(), "my_documents");
let name2 = CollectionName::new("docs-v2").unwrap();
assert_eq!(name2.as_str(), "docs-v2");
let name3 = CollectionName::new("Collection123").unwrap();
assert_eq!(name3.as_str(), "Collection123");
}
#[test]
fn test_collection_name_empty_fails() {
let result = CollectionName::new("");
assert!(matches!(result, Err(CollectionNameError::Empty)));
}
#[test]
fn test_collection_name_invalid_chars_fails() {
let result = CollectionName::new("my documents"); assert!(matches!(result, Err(CollectionNameError::InvalidCharacters(_))));
let result = CollectionName::new("my.documents"); assert!(matches!(result, Err(CollectionNameError::InvalidCharacters(_))));
let result = CollectionName::new("my/documents"); assert!(matches!(result, Err(CollectionNameError::InvalidCharacters(_))));
}
#[test]
fn test_collection_builder() {
use crate::collection::VectorConfig;
let collection =
Collection::new(CollectionId::new(1), CollectionName::new("documents").unwrap())
.with_vector("dense", VectorConfig::dense(768, DistanceMetric::Cosine))
.with_vector("sparse", VectorConfig::sparse(30522));
assert_eq!(collection.name().as_str(), "documents");
assert_eq!(collection.vectors().len(), 2);
assert!(collection.has_vector("dense"));
assert!(collection.has_vector("sparse"));
assert!(!collection.has_vector("nonexistent"));
}
#[test]
fn test_collection_add_remove_vector() {
use crate::collection::VectorConfig;
let mut collection =
Collection::new(CollectionId::new(1), CollectionName::new("test").unwrap());
collection.add_vector("dense", VectorConfig::dense(768, DistanceMetric::Cosine));
assert!(collection.has_vector("dense"));
let removed = collection.remove_vector("dense");
assert!(removed.is_some());
assert!(!collection.has_vector("dense"));
}
#[test]
fn test_payload_schema() {
let schema = PayloadSchema::new()
.with_field("title", PayloadFieldType::string())
.with_field("score", PayloadFieldType::float())
.with_field("tags", PayloadFieldType::array(PayloadFieldType::string()));
assert_eq!(schema.fields.len(), 3);
assert!(schema.allow_extra_fields);
}
#[test]
fn test_payload_schema_strict() {
let schema =
PayloadSchema::strict().with_field("required_field", PayloadFieldType::string());
assert!(!schema.allow_extra_fields);
}
}