dakera-storage 0.10.1

Storage backends for the Dakera AI memory platform
Documentation
use std::sync::Arc;

use async_trait::async_trait;
use common::{NamespaceId, Result, Vector, VectorId};

/// Index types that can be persisted
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum IndexType {
    /// HNSW graph index
    Hnsw,
    /// Product Quantization index
    Pq,
    /// IVF (Inverted File) index
    Ivf,
    /// SPFresh index
    SpFresh,
    /// Full-text inverted index
    FullText,
}

impl IndexType {
    pub fn as_str(&self) -> &'static str {
        match self {
            IndexType::Hnsw => "hnsw",
            IndexType::Pq => "pq",
            IndexType::Ivf => "ivf",
            IndexType::SpFresh => "spfresh",
            IndexType::FullText => "fulltext",
        }
    }
}

/// Storage trait for persisting index data
#[async_trait]
pub trait IndexStorage: Send + Sync {
    /// Save index data for a namespace
    async fn save_index(
        &self,
        namespace: &NamespaceId,
        index_type: IndexType,
        data: Vec<u8>,
    ) -> Result<()>;

    /// Load index data for a namespace
    async fn load_index(
        &self,
        namespace: &NamespaceId,
        index_type: IndexType,
    ) -> Result<Option<Vec<u8>>>;

    /// Delete index data for a namespace
    async fn delete_index(&self, namespace: &NamespaceId, index_type: IndexType) -> Result<bool>;

    /// Check if index exists for a namespace
    async fn index_exists(&self, namespace: &NamespaceId, index_type: IndexType) -> Result<bool>;

    /// List all indexes for a namespace
    async fn list_indexes(&self, namespace: &NamespaceId) -> Result<Vec<IndexType>>;
}

/// Blanket implementation for Arc<T>
#[async_trait]
impl<T: IndexStorage + ?Sized> IndexStorage for Arc<T> {
    async fn save_index(
        &self,
        namespace: &NamespaceId,
        index_type: IndexType,
        data: Vec<u8>,
    ) -> Result<()> {
        (**self).save_index(namespace, index_type, data).await
    }

    async fn load_index(
        &self,
        namespace: &NamespaceId,
        index_type: IndexType,
    ) -> Result<Option<Vec<u8>>> {
        (**self).load_index(namespace, index_type).await
    }

    async fn delete_index(&self, namespace: &NamespaceId, index_type: IndexType) -> Result<bool> {
        (**self).delete_index(namespace, index_type).await
    }

    async fn index_exists(&self, namespace: &NamespaceId, index_type: IndexType) -> Result<bool> {
        (**self).index_exists(namespace, index_type).await
    }

    async fn list_indexes(&self, namespace: &NamespaceId) -> Result<Vec<IndexType>> {
        (**self).list_indexes(namespace).await
    }
}

/// Core storage abstraction - implementations can be in-memory, S3, etc.
#[async_trait]
pub trait VectorStorage: Send + Sync {
    /// Store or update vectors in a namespace
    async fn upsert(&self, namespace: &NamespaceId, vectors: Vec<Vector>) -> Result<usize>;

    /// Get vectors by IDs
    async fn get(&self, namespace: &NamespaceId, ids: &[VectorId]) -> Result<Vec<Vector>>;

    /// Get all vectors in a namespace (for brute-force search)
    async fn get_all(&self, namespace: &NamespaceId) -> Result<Vec<Vector>>;

    /// Delete vectors by IDs
    async fn delete(&self, namespace: &NamespaceId, ids: &[VectorId]) -> Result<usize>;

    /// Check if namespace exists
    async fn namespace_exists(&self, namespace: &NamespaceId) -> Result<bool>;

    /// Create namespace if it doesn't exist
    async fn ensure_namespace(&self, namespace: &NamespaceId) -> Result<()>;

    /// Get vector count in namespace
    async fn count(&self, namespace: &NamespaceId) -> Result<usize>;

    /// Get vector dimension for namespace (None if empty)
    async fn dimension(&self, namespace: &NamespaceId) -> Result<Option<usize>>;

    /// List all namespaces
    async fn list_namespaces(&self) -> Result<Vec<NamespaceId>>;

    /// Delete a namespace and all its vectors
    async fn delete_namespace(&self, namespace: &NamespaceId) -> Result<bool>;

    /// Clean up expired vectors in a namespace
    /// Returns the number of vectors removed
    async fn cleanup_expired(&self, namespace: &NamespaceId) -> Result<usize>;

    /// Clean up expired vectors in all namespaces
    /// Returns total number of vectors removed
    async fn cleanup_all_expired(&self) -> Result<usize>;
}

/// Blanket implementation for Arc<T> to enable dynamic dispatch
#[async_trait]
impl<T: VectorStorage + ?Sized> VectorStorage for Arc<T> {
    async fn upsert(&self, namespace: &NamespaceId, vectors: Vec<Vector>) -> Result<usize> {
        (**self).upsert(namespace, vectors).await
    }

    async fn get(&self, namespace: &NamespaceId, ids: &[VectorId]) -> Result<Vec<Vector>> {
        (**self).get(namespace, ids).await
    }

    async fn get_all(&self, namespace: &NamespaceId) -> Result<Vec<Vector>> {
        (**self).get_all(namespace).await
    }

    async fn delete(&self, namespace: &NamespaceId, ids: &[VectorId]) -> Result<usize> {
        (**self).delete(namespace, ids).await
    }

    async fn namespace_exists(&self, namespace: &NamespaceId) -> Result<bool> {
        (**self).namespace_exists(namespace).await
    }

    async fn ensure_namespace(&self, namespace: &NamespaceId) -> Result<()> {
        (**self).ensure_namespace(namespace).await
    }

    async fn count(&self, namespace: &NamespaceId) -> Result<usize> {
        (**self).count(namespace).await
    }

    async fn dimension(&self, namespace: &NamespaceId) -> Result<Option<usize>> {
        (**self).dimension(namespace).await
    }

    async fn list_namespaces(&self) -> Result<Vec<NamespaceId>> {
        (**self).list_namespaces().await
    }

    async fn delete_namespace(&self, namespace: &NamespaceId) -> Result<bool> {
        (**self).delete_namespace(namespace).await
    }

    async fn cleanup_expired(&self, namespace: &NamespaceId) -> Result<usize> {
        (**self).cleanup_expired(namespace).await
    }

    async fn cleanup_all_expired(&self) -> Result<usize> {
        (**self).cleanup_all_expired().await
    }
}