use std::collections::HashMap;
use std::sync::Mutex;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ArtifactState {
Unavailable,
Building,
Ready,
Failed,
Fallback,
}
impl ArtifactState {
pub fn as_str(self) -> &'static str {
match self {
ArtifactState::Unavailable => "unavailable",
ArtifactState::Building => "building",
ArtifactState::Ready => "ready",
ArtifactState::Failed => "failed",
ArtifactState::Fallback => "fallback",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum VectorSource {
Column(String),
Payload(String),
}
impl VectorSource {
pub fn as_str(&self) -> &str {
match self {
VectorSource::Column(s) | VectorSource::Payload(s) => s.as_str(),
}
}
pub fn kind_str(&self) -> &'static str {
match self {
VectorSource::Column(_) => "column",
VectorSource::Payload(_) => "payload",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum VectorIndexType {
Hnsw,
Ivf,
TurboQuant,
TurboVec,
Scalar,
}
impl VectorIndexType {
pub fn as_str(self) -> &'static str {
match self {
VectorIndexType::Hnsw => "hnsw",
VectorIndexType::Ivf => "ivf",
VectorIndexType::TurboQuant => "turboquant",
VectorIndexType::TurboVec => "turbovec",
VectorIndexType::Scalar => "scalar",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DistanceMetric {
Cosine,
InnerProduct,
L2,
}
impl DistanceMetric {
pub fn as_str(self) -> &'static str {
match self {
DistanceMetric::Cosine => "cosine",
DistanceMetric::InnerProduct => "inner_product",
DistanceMetric::L2 => "l2",
}
}
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct TurboArtifactParams {
pub family: String,
pub subspaces: Option<u32>,
pub bits_per_code: Option<u32>,
pub codebook_size: Option<u32>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct VectorMetadata {
pub collection: String,
pub source: VectorSource,
pub dimensions: u32,
pub metric: DistanceMetric,
pub index_type: VectorIndexType,
pub row_count: u64,
pub search_capable: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ArtifactMetadata {
pub collection: String,
pub state: ArtifactState,
pub encoded_artifact_present: bool,
pub params: Option<TurboArtifactParams>,
pub scalar_fallback_active: bool,
pub rebuild_progress_pct: Option<u8>,
pub last_error: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct VectorIntrospection {
pub vector: VectorMetadata,
pub artifact: ArtifactMetadata,
}
#[derive(Debug, Clone)]
struct Entry {
vector: VectorMetadata,
artifact: ArtifactMetadata,
}
#[derive(Debug, Default)]
pub struct VectorIntrospectionRegistry {
entries: Mutex<HashMap<String, Entry>>,
}
impl VectorIntrospectionRegistry {
pub fn new() -> Self {
Self::default()
}
pub fn publish(&self, vector: VectorMetadata, artifact: ArtifactMetadata) {
debug_assert_eq!(
vector.collection, artifact.collection,
"vector and artifact metadata must agree on collection name"
);
let key = vector.collection.clone();
let mut map = self.entries.lock().unwrap_or_else(|p| p.into_inner());
map.insert(key, Entry { vector, artifact });
}
pub fn update_artifact(&self, artifact: ArtifactMetadata) -> bool {
let mut map = self.entries.lock().unwrap_or_else(|p| p.into_inner());
match map.get_mut(&artifact.collection) {
Some(entry) => {
let capable = match artifact.state {
ArtifactState::Ready => true,
ArtifactState::Fallback | ArtifactState::Failed => {
artifact.scalar_fallback_active
}
ArtifactState::Building | ArtifactState::Unavailable => {
artifact.scalar_fallback_active
}
};
entry.vector.search_capable = capable;
entry.artifact = artifact;
true
}
None => false,
}
}
pub fn forget(&self, collection: &str) -> bool {
let mut map = self.entries.lock().unwrap_or_else(|p| p.into_inner());
map.remove(collection).is_some()
}
pub fn snapshot(&self) -> Vec<VectorIntrospection> {
let map = self.entries.lock().unwrap_or_else(|p| p.into_inner());
let mut rows: Vec<VectorIntrospection> = map
.values()
.map(|e| VectorIntrospection {
vector: e.vector.clone(),
artifact: e.artifact.clone(),
})
.collect();
rows.sort_by(|a, b| a.vector.collection.cmp(&b.vector.collection));
rows
}
pub fn get(&self, collection: &str) -> Option<VectorIntrospection> {
let map = self.entries.lock().unwrap_or_else(|p| p.into_inner());
map.get(collection).map(|e| VectorIntrospection {
vector: e.vector.clone(),
artifact: e.artifact.clone(),
})
}
pub fn len(&self) -> usize {
self.entries.lock().unwrap_or_else(|p| p.into_inner()).len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
#[cfg(test)]
mod tests {
use super::*;
fn ready_vector(collection: &str, dim: u32) -> VectorMetadata {
VectorMetadata {
collection: collection.into(),
source: VectorSource::Column("embedding".into()),
dimensions: dim,
metric: DistanceMetric::Cosine,
index_type: VectorIndexType::TurboQuant,
row_count: 1_024,
search_capable: true,
}
}
fn ready_artifact(collection: &str) -> ArtifactMetadata {
ArtifactMetadata {
collection: collection.into(),
state: ArtifactState::Ready,
encoded_artifact_present: true,
params: Some(TurboArtifactParams {
family: "turboquant".into(),
subspaces: Some(8),
bits_per_code: Some(8),
codebook_size: Some(256),
}),
scalar_fallback_active: false,
rebuild_progress_pct: None,
last_error: None,
}
}
#[test]
fn ready_collection_round_trips_through_registry() {
let reg = VectorIntrospectionRegistry::new();
reg.publish(ready_vector("docs", 384), ready_artifact("docs"));
assert_eq!(reg.len(), 1);
let row = reg.get("docs").expect("collection was published");
assert_eq!(row.vector.collection, "docs");
assert_eq!(row.vector.dimensions, 384);
assert_eq!(row.vector.metric, DistanceMetric::Cosine);
assert_eq!(row.vector.index_type, VectorIndexType::TurboQuant);
assert_eq!(row.vector.row_count, 1_024);
assert!(row.vector.search_capable);
assert!(matches!(row.vector.source, VectorSource::Column(ref c) if c == "embedding"));
assert_eq!(row.artifact.state, ArtifactState::Ready);
assert!(row.artifact.encoded_artifact_present);
assert!(!row.artifact.scalar_fallback_active);
assert!(row.artifact.last_error.is_none());
let params = row.artifact.params.expect("turbo params present");
assert_eq!(params.family, "turboquant");
assert_eq!(params.subspaces, Some(8));
assert_eq!(params.bits_per_code, Some(8));
assert_eq!(params.codebook_size, Some(256));
let snap = reg.snapshot();
assert_eq!(snap.len(), 1);
assert_eq!(snap[0].vector.collection, "docs");
}
#[test]
fn unavailable_then_fallback_states_are_distinguishable() {
let reg = VectorIntrospectionRegistry::new();
let mut vector = ready_vector("embeddings", 128);
vector.row_count = 0;
vector.search_capable = false;
let unavailable = ArtifactMetadata {
collection: "embeddings".into(),
state: ArtifactState::Unavailable,
encoded_artifact_present: false,
params: None,
scalar_fallback_active: false,
rebuild_progress_pct: None,
last_error: None,
};
reg.publish(vector, unavailable);
let row = reg.get("embeddings").unwrap();
assert_eq!(row.artifact.state, ArtifactState::Unavailable);
assert!(!row.artifact.encoded_artifact_present);
assert!(row.artifact.params.is_none());
assert!(!row.vector.search_capable);
let fallback = ArtifactMetadata {
collection: "embeddings".into(),
state: ArtifactState::Fallback,
encoded_artifact_present: true,
params: Some(TurboArtifactParams {
family: "turbovec".into(),
subspaces: Some(4),
bits_per_code: Some(4),
codebook_size: Some(16),
}),
scalar_fallback_active: true,
rebuild_progress_pct: None,
last_error: Some("dimension drift; serving scalar until rebuild".into()),
};
assert!(reg.update_artifact(fallback));
let row = reg.get("embeddings").unwrap();
assert_eq!(row.artifact.state, ArtifactState::Fallback);
assert!(row.artifact.scalar_fallback_active);
assert!(row
.artifact
.last_error
.as_deref()
.unwrap()
.contains("scalar"));
assert!(
row.vector.search_capable,
"scalar fallback keeps SEARCH alive even when the artifact is in Fallback"
);
}
#[test]
fn artifact_states_distinct_and_search_capability_tracks_them() {
let reg = VectorIntrospectionRegistry::new();
reg.publish(ready_vector("k", 64), ready_artifact("k"));
let building = ArtifactMetadata {
collection: "k".into(),
state: ArtifactState::Building,
encoded_artifact_present: false,
params: None,
scalar_fallback_active: false,
rebuild_progress_pct: Some(42),
last_error: None,
};
assert!(reg.update_artifact(building));
let row = reg.get("k").unwrap();
assert_eq!(row.artifact.state, ArtifactState::Building);
assert_eq!(row.artifact.rebuild_progress_pct, Some(42));
assert!(
!row.vector.search_capable,
"Building without fallback is not search-capable"
);
let failed = ArtifactMetadata {
collection: "k".into(),
state: ArtifactState::Failed,
encoded_artifact_present: false,
params: None,
scalar_fallback_active: false,
rebuild_progress_pct: None,
last_error: Some("codec error: subspace=3 page=12".into()),
};
assert!(reg.update_artifact(failed));
let row = reg.get("k").unwrap();
assert_eq!(row.artifact.state, ArtifactState::Failed);
assert!(!row.vector.search_capable);
assert_eq!(
row.artifact.last_error.as_deref(),
Some("codec error: subspace=3 page=12")
);
assert!(reg.update_artifact(ready_artifact("k")));
let row = reg.get("k").unwrap();
assert_eq!(row.artifact.state, ArtifactState::Ready);
assert!(row.vector.search_capable);
assert!(row.artifact.last_error.is_none());
}
#[test]
fn update_artifact_no_ops_for_unpublished_collection() {
let reg = VectorIntrospectionRegistry::new();
let orphan = ArtifactMetadata {
collection: "ghost".into(),
state: ArtifactState::Building,
encoded_artifact_present: false,
params: None,
scalar_fallback_active: false,
rebuild_progress_pct: None,
last_error: None,
};
assert!(!reg.update_artifact(orphan));
assert!(reg.is_empty());
}
#[test]
fn forget_drops_collection() {
let reg = VectorIntrospectionRegistry::new();
reg.publish(ready_vector("a", 8), ready_artifact("a"));
reg.publish(ready_vector("b", 8), ready_artifact("b"));
assert!(reg.forget("a"));
assert!(!reg.forget("a"), "second forget no-ops");
let names: Vec<_> = reg
.snapshot()
.into_iter()
.map(|r| r.vector.collection)
.collect();
assert_eq!(names, vec!["b".to_string()]);
}
#[test]
fn snapshot_is_deterministically_ordered() {
let reg = VectorIntrospectionRegistry::new();
reg.publish(ready_vector("zeta", 8), ready_artifact("zeta"));
reg.publish(ready_vector("alpha", 8), ready_artifact("alpha"));
reg.publish(ready_vector("mu", 8), ready_artifact("mu"));
let names: Vec<_> = reg
.snapshot()
.into_iter()
.map(|r| r.vector.collection)
.collect();
assert_eq!(
names,
vec!["alpha".to_string(), "mu".to_string(), "zeta".to_string()]
);
}
#[test]
fn artifact_state_strings_are_stable() {
assert_eq!(ArtifactState::Unavailable.as_str(), "unavailable");
assert_eq!(ArtifactState::Building.as_str(), "building");
assert_eq!(ArtifactState::Ready.as_str(), "ready");
assert_eq!(ArtifactState::Failed.as_str(), "failed");
assert_eq!(ArtifactState::Fallback.as_str(), "fallback");
}
}