use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use crate::enums::{FieldType, IndexType, MetricType};
use crate::error::{VectorDBError, Result};
pub type SparseVector = Vec<Vec<f64>>;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HNSWParams {
#[serde(rename = "M")]
pub m: u32,
#[serde(rename = "efConstruction")]
pub ef_construction: u32,
}
impl HNSWParams {
pub fn new(m: u32, ef_construction: u32) -> Self {
Self { m, ef_construction }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IVFFLATParams {
pub nlist: u32,
}
impl IVFFLATParams {
pub fn new(nlist: u32) -> Self {
Self { nlist }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IVFPQParams {
#[serde(rename = "M")]
pub m: u32,
pub nlist: u32,
}
impl IVFPQParams {
pub fn new(nlist: u32, m: u32) -> Self {
Self { m, nlist }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IVFSQ8Params {
pub nlist: u32,
}
impl IVFSQ8Params {
pub fn new(nlist: u32) -> Self {
Self { nlist }
}
}
pub type IVFSQ4Params = IVFSQ8Params;
pub type IVFSQ16Params = IVFSQ8Params;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum IndexParams {
HNSW(HNSWParams),
IVFFLAT(IVFFLATParams),
IVFPQ(IVFPQParams),
IVFSQ8(IVFSQ8Params),
IVFSQ4(IVFSQ4Params),
IVFSQ16(IVFSQ16Params),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexField {
#[serde(rename = "fieldName")]
pub name: String,
#[serde(rename = "fieldType")]
pub field_type: FieldType,
#[serde(rename = "indexType", skip_serializing_if = "Option::is_none")]
pub index_type: Option<IndexType>,
}
impl IndexField {
pub fn new(name: impl Into<String>, field_type: FieldType, index_type: Option<IndexType>) -> Self {
Self {
name: name.into(),
field_type,
index_type,
}
}
pub fn is_vector_field(&self) -> bool {
matches!(
self.field_type,
FieldType::Vector | FieldType::BinaryVector | FieldType::Float16Vector | FieldType::BFloat16Vector
)
}
pub fn is_sparse_vector_field(&self) -> bool {
matches!(self.field_type, FieldType::SparseVector)
}
pub fn is_primary_key(&self) -> bool {
matches!(self.index_type, Some(IndexType::PRIMARY_KEY))
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VectorIndex {
#[serde(flatten)]
pub base: IndexField,
#[serde(skip_serializing_if = "Option::is_none")]
pub dimension: Option<u32>,
#[serde(rename = "metricType", skip_serializing_if = "Option::is_none")]
pub metric_type: Option<MetricType>,
#[serde(skip_serializing_if = "Option::is_none")]
pub params: Option<IndexParams>,
#[serde(rename = "indexedCount", skip_serializing_if = "Option::is_none")]
pub indexed_count: Option<u64>,
}
impl VectorIndex {
pub fn new(
name: impl Into<String>,
dimension: u32,
index_type: IndexType,
metric_type: MetricType,
params: Option<IndexParams>,
) -> Self {
Self {
base: IndexField::new(name, FieldType::Vector, Some(index_type)),
dimension: Some(dimension),
metric_type: Some(metric_type),
params,
indexed_count: None,
}
}
pub fn with_field_type(
name: impl Into<String>,
dimension: u32,
index_type: IndexType,
metric_type: MetricType,
field_type: FieldType,
params: Option<IndexParams>,
) -> Self {
Self {
base: IndexField::new(name, field_type, Some(index_type)),
dimension: Some(dimension),
metric_type: Some(metric_type),
params,
indexed_count: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FilterIndex {
#[serde(flatten)]
pub base: IndexField,
#[serde(rename = "autoId", skip_serializing_if = "Option::is_none")]
pub auto_id: Option<String>,
}
impl FilterIndex {
pub fn new(name: impl Into<String>, field_type: FieldType, index_type: IndexType) -> Self {
Self {
base: IndexField::new(name, field_type, Some(index_type)),
auto_id: None,
}
}
pub fn with_auto_id(mut self, auto_id: impl Into<String>) -> Self {
self.auto_id = Some(auto_id.into());
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SparseIndex {
#[serde(flatten)]
pub base: IndexField,
#[serde(rename = "metricType")]
pub metric_type: MetricType,
}
impl SparseIndex {
pub fn new(
name: impl Into<String>,
index_type: IndexType,
metric_type: MetricType,
) -> Self {
Self {
base: IndexField::new(name, FieldType::SparseVector, Some(index_type)),
metric_type,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum IndexDefinition {
Vector(VectorIndex),
Filter(FilterIndex),
Sparse(SparseIndex),
}
impl IndexDefinition {
pub fn name(&self) -> &str {
match self {
IndexDefinition::Vector(idx) => &idx.base.name,
IndexDefinition::Filter(idx) => &idx.base.name,
IndexDefinition::Sparse(idx) => &idx.base.name,
}
}
pub fn is_primary_key(&self) -> bool {
match self {
IndexDefinition::Vector(idx) => idx.base.is_primary_key(),
IndexDefinition::Filter(idx) => idx.base.is_primary_key(),
IndexDefinition::Sparse(idx) => idx.base.is_primary_key(),
}
}
}
#[derive(Debug, Clone, Default)]
pub struct Index {
indexes: HashMap<String, IndexDefinition>,
primary_field: Option<String>,
}
impl Index {
pub fn new() -> Self {
Self::default()
}
pub fn add_vector_index(&mut self, index: VectorIndex) -> Result<&mut Self> {
let name = index.base.name.clone();
if self.indexes.contains_key(&name) {
return Err(VectorDBError::server_error(
15000,
"fieldName must exist and be unique",
));
}
if index.base.is_primary_key() {
self.primary_field = Some(name.clone());
}
self.indexes.insert(name, IndexDefinition::Vector(index));
Ok(self)
}
pub fn add_filter_index(&mut self, index: FilterIndex) -> Result<&mut Self> {
let name = index.base.name.clone();
if self.indexes.contains_key(&name) {
return Err(VectorDBError::server_error(
15000,
"fieldName must exist and be unique",
));
}
if index.base.is_primary_key() {
self.primary_field = Some(name.clone());
}
self.indexes.insert(name, IndexDefinition::Filter(index));
Ok(self)
}
pub fn add_sparse_index(&mut self, index: SparseIndex) -> Result<&mut Self> {
let name = index.base.name.clone();
if self.indexes.contains_key(&name) {
return Err(VectorDBError::server_error(
15000,
"fieldName must exist and be unique",
));
}
if index.base.is_primary_key() {
self.primary_field = Some(name.clone());
}
self.indexes.insert(name, IndexDefinition::Sparse(index));
Ok(self)
}
pub fn remove(&mut self, index_name: &str) -> &mut Self {
self.indexes.remove(index_name);
self
}
pub fn get(&self, name: &str) -> Option<&IndexDefinition> {
self.indexes.get(name)
}
pub fn list(&self) -> Vec<&IndexDefinition> {
self.indexes.values().collect()
}
pub fn primary_field(&self) -> Option<&str> {
self.primary_field.as_deref()
}
pub fn is_empty(&self) -> bool {
self.indexes.is_empty()
}
}
impl Serialize for Index {
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
self.indexes.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for Index {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let indexes: HashMap<String, IndexDefinition> = HashMap::deserialize(deserializer)?;
let primary_field = indexes
.values()
.find(|idx| idx.is_primary_key())
.map(|idx| idx.name().to_string());
Ok(Self {
indexes,
primary_field,
})
}
}