use crate::collection::CollectionType;
use crate::columnar::{ColumnarProfile, DocumentMode};
use crate::vector_ann::VectorQuantization;
use crate::vector_distance::DistanceMetric;
#[repr(u8)]
#[derive(
Debug,
Clone,
Copy,
Default,
PartialEq,
Eq,
Hash,
serde::Serialize,
serde::Deserialize,
zerompk::ToMessagePack,
zerompk::FromMessagePack,
)]
#[non_exhaustive]
pub enum PrimaryEngine {
#[default]
Document = 0,
Strict = 1,
KeyValue = 2,
Columnar = 3,
Spatial = 4,
Vector = 10,
}
impl PrimaryEngine {
pub fn infer_from_collection_type(ct: &CollectionType) -> Self {
match ct {
CollectionType::Document(DocumentMode::Schemaless) => Self::Document,
CollectionType::Document(DocumentMode::Strict(_)) => Self::Strict,
CollectionType::Columnar(ColumnarProfile::Plain) => Self::Columnar,
CollectionType::Columnar(ColumnarProfile::Timeseries { .. }) => Self::Columnar,
CollectionType::Columnar(ColumnarProfile::Spatial { .. }) => Self::Spatial,
CollectionType::KeyValue(_) => Self::KeyValue,
}
}
}
#[derive(
Debug,
Clone,
PartialEq,
serde::Serialize,
serde::Deserialize,
zerompk::ToMessagePack,
zerompk::FromMessagePack,
)]
pub struct VectorPrimaryConfig {
pub vector_field: String,
pub dim: u32,
pub quantization: VectorQuantization,
pub m: u8,
pub ef_construction: u16,
pub metric: DistanceMetric,
pub payload_indexes: Vec<(String, PayloadIndexKind)>,
}
impl Default for VectorPrimaryConfig {
fn default() -> Self {
Self {
vector_field: String::new(),
dim: 0,
quantization: VectorQuantization::default(),
m: 16,
ef_construction: 200,
metric: DistanceMetric::Cosine,
payload_indexes: Vec::new(),
}
}
}
#[derive(
Debug,
Clone,
Copy,
Default,
PartialEq,
Eq,
serde::Serialize,
serde::Deserialize,
zerompk::ToMessagePack,
zerompk::FromMessagePack,
)]
#[non_exhaustive]
pub enum PayloadIndexKind {
#[default]
Equality,
Range,
Boolean,
}
#[derive(
Debug,
Clone,
PartialEq,
serde::Serialize,
serde::Deserialize,
zerompk::ToMessagePack,
zerompk::FromMessagePack,
)]
#[non_exhaustive]
pub enum PayloadAtom {
Eq(String, crate::Value),
In(String, Vec<crate::Value>),
Range {
field: String,
low: Option<crate::Value>,
low_inclusive: bool,
high: Option<crate::Value>,
high_inclusive: bool,
},
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn primary_engine_default_is_document() {
assert_eq!(PrimaryEngine::default(), PrimaryEngine::Document);
}
#[test]
fn infer_from_collection_type_document_schemaless() {
let ct = CollectionType::document();
assert_eq!(
PrimaryEngine::infer_from_collection_type(&ct),
PrimaryEngine::Document
);
}
#[test]
fn infer_from_collection_type_document_strict() {
use crate::columnar::{ColumnDef, ColumnType, StrictSchema};
let schema = StrictSchema::new(vec![
ColumnDef::required("id", ColumnType::Int64).with_primary_key(),
])
.unwrap();
let ct = CollectionType::strict(schema);
assert_eq!(
PrimaryEngine::infer_from_collection_type(&ct),
PrimaryEngine::Strict
);
}
#[test]
fn infer_from_collection_type_columnar_plain() {
let ct = CollectionType::columnar();
assert_eq!(
PrimaryEngine::infer_from_collection_type(&ct),
PrimaryEngine::Columnar
);
}
#[test]
fn infer_from_collection_type_columnar_timeseries() {
let ct = CollectionType::timeseries("ts", "1h");
assert_eq!(
PrimaryEngine::infer_from_collection_type(&ct),
PrimaryEngine::Columnar
);
}
#[test]
fn infer_from_collection_type_columnar_spatial() {
let ct = CollectionType::spatial("geom");
assert_eq!(
PrimaryEngine::infer_from_collection_type(&ct),
PrimaryEngine::Spatial
);
}
#[test]
fn infer_from_collection_type_kv() {
use crate::columnar::{ColumnDef, ColumnType, StrictSchema};
let schema = StrictSchema::new(vec![
ColumnDef::required("k", ColumnType::String).with_primary_key(),
])
.unwrap();
let ct = CollectionType::kv(schema);
assert_eq!(
PrimaryEngine::infer_from_collection_type(&ct),
PrimaryEngine::KeyValue
);
}
#[test]
fn primary_engine_serde_roundtrip() {
for variant in [
PrimaryEngine::Document,
PrimaryEngine::Strict,
PrimaryEngine::KeyValue,
PrimaryEngine::Columnar,
PrimaryEngine::Spatial,
PrimaryEngine::Vector,
] {
let json = sonic_rs::to_string(&variant).unwrap();
let back: PrimaryEngine = sonic_rs::from_str(&json).unwrap();
assert_eq!(back, variant);
}
}
#[test]
fn primary_engine_msgpack_roundtrip() {
for variant in [
PrimaryEngine::Document,
PrimaryEngine::Strict,
PrimaryEngine::KeyValue,
PrimaryEngine::Columnar,
PrimaryEngine::Spatial,
PrimaryEngine::Vector,
] {
let bytes = zerompk::to_msgpack_vec(&variant).unwrap();
let back: PrimaryEngine = zerompk::from_msgpack(&bytes).unwrap();
assert_eq!(back, variant);
}
}
#[test]
fn vector_primary_config_serde_roundtrip() {
let cfg = VectorPrimaryConfig {
vector_field: "embedding".to_string(),
dim: 1024,
quantization: VectorQuantization::RaBitQ,
m: 32,
ef_construction: 200,
metric: DistanceMetric::Cosine,
payload_indexes: vec![
("category".to_string(), PayloadIndexKind::Equality),
("timestamp".to_string(), PayloadIndexKind::Range),
],
};
let json = sonic_rs::to_string(&cfg).unwrap();
let back: VectorPrimaryConfig = sonic_rs::from_str(&json).unwrap();
assert_eq!(back, cfg);
}
#[test]
fn vector_primary_config_msgpack_roundtrip() {
let cfg = VectorPrimaryConfig {
vector_field: "vec".to_string(),
dim: 512,
quantization: VectorQuantization::Bbq,
m: 16,
ef_construction: 100,
metric: DistanceMetric::L2,
payload_indexes: vec![],
};
let bytes = zerompk::to_msgpack_vec(&cfg).unwrap();
let back: VectorPrimaryConfig = zerompk::from_msgpack(&bytes).unwrap();
assert_eq!(back, cfg);
}
}