enact-core 0.0.2

Core agent runtime for Enact - Graph-Native AI agents
Documentation
//! VectorStore - Semantic memory and RAG
//!
//! The VectorStore provides semantic search capabilities for long-term memory
//! and retrieval-augmented generation (RAG).
//!
//! ## Guarantees
//!
//! - **Consistency**: Eventually consistent
//! - **Authority**: Non-authoritative - for recall only
//! - **Availability**: Search may return stale results
//! - **Durability**: Documents are durable after `upsert()` returns
//!
//! ## Use Cases
//!
//! - Long-term agent memory
//! - Document retrieval for RAG
//! - Semantic search over execution history
//!
//! @see docs/TECHNICAL/14-PERSISTENCE-LAYER.md

use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

use super::StorageBackend;

/// A document with embedding for vector storage
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VectorDocument {
    /// Unique identifier for this document
    pub id: String,
    /// The text content
    pub content: String,
    /// Pre-computed embedding vector
    ///
    /// Dimensions must match the collection's configured dimension.
    pub embedding: Vec<f32>,
    /// Arbitrary metadata for filtering
    pub metadata: HashMap<String, serde_json::Value>,
}

impl VectorDocument {
    /// Create a new document
    pub fn new(id: impl Into<String>, content: impl Into<String>, embedding: Vec<f32>) -> Self {
        Self {
            id: id.into(),
            content: content.into(),
            embedding,
            metadata: HashMap::new(),
        }
    }

    /// Add metadata
    pub fn with_metadata(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
        self.metadata.insert(key.into(), value);
        self
    }
}

/// Search result with similarity score
#[derive(Debug, Clone)]
pub struct VectorSearchResult {
    /// The matched document
    pub document: VectorDocument,
    /// Similarity score (higher = more similar)
    ///
    /// Score semantics depend on the distance metric:
    /// - Cosine: 0.0 to 1.0 (1.0 = identical)
    /// - Euclidean: 0.0 to infinity (0.0 = identical)
    pub score: f32,
}

/// Single filter condition
#[derive(Debug, Clone)]
pub struct FilterCondition {
    pub key: String,
    pub value: serde_json::Value,
}

impl FilterCondition {
    pub fn new(key: impl Into<String>, value: serde_json::Value) -> Self {
        Self {
            key: key.into(),
            value,
        }
    }
}

/// Filter for vector search
#[derive(Debug, Clone, Default)]
pub struct VectorFilter {
    /// Metadata conditions (all must match)
    pub conditions: HashMap<String, serde_json::Value>,
    /// Any-of conditions (at least one must match)
    pub any: Vec<FilterCondition>,
    /// None-of conditions (must not match)
    pub none: Vec<FilterCondition>,
}

impl VectorFilter {
    /// Create a new empty filter
    pub fn new() -> Self {
        Self::default()
    }

    /// Add a condition
    pub fn with_condition(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
        self.conditions.insert(key.into(), value);
        self
    }

    /// Add an any-of condition
    pub fn with_any_condition(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
        self.any.push(FilterCondition::new(key, value));
        self
    }

    /// Add a none-of condition
    pub fn with_none_condition(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
        self.none.push(FilterCondition::new(key, value));
        self
    }

    /// Check if filter is empty
    pub fn is_empty(&self) -> bool {
        self.conditions.is_empty() && self.any.is_empty() && self.none.is_empty()
    }
}

/// VectorStore trait - semantic memory
///
/// Provides vector similarity search for RAG and long-term memory.
#[async_trait]
pub trait VectorStore: StorageBackend {
    // =========================================================================
    // Collection management
    // =========================================================================

    /// Create a collection if it doesn't exist
    ///
    /// # Arguments
    /// * `collection` - Collection name
    /// * `dimension` - Vector dimension (must match embeddings)
    async fn ensure_collection(&self, collection: &str, dimension: usize) -> anyhow::Result<()>;

    /// Delete a collection and all its documents
    async fn delete_collection(&self, collection: &str) -> anyhow::Result<()>;

    /// List all collections
    async fn list_collections(&self) -> anyhow::Result<Vec<String>>;

    /// Check if a collection exists
    async fn collection_exists(&self, collection: &str) -> anyhow::Result<bool>;

    // =========================================================================
    // Document operations
    // =========================================================================

    /// Upsert a document (insert or update)
    ///
    /// If a document with the same ID exists, it is replaced.
    async fn upsert(&self, collection: &str, document: VectorDocument) -> anyhow::Result<()>;

    /// Upsert multiple documents atomically
    async fn upsert_batch(
        &self,
        collection: &str,
        documents: Vec<VectorDocument>,
    ) -> anyhow::Result<()>;

    /// Get a document by ID
    async fn get(&self, collection: &str, id: &str) -> anyhow::Result<Option<VectorDocument>>;

    /// Delete a document by ID
    async fn delete(&self, collection: &str, id: &str) -> anyhow::Result<()>;

    /// Delete multiple documents by ID
    async fn delete_batch(&self, collection: &str, ids: &[String]) -> anyhow::Result<()>;

    // =========================================================================
    // Search operations
    // =========================================================================

    /// Search for similar documents
    ///
    /// # Arguments
    /// * `collection` - Collection to search
    /// * `query_embedding` - Query vector (must match collection dimension)
    /// * `limit` - Maximum number of results
    /// * `filter` - Optional metadata filter
    ///
    /// # Returns
    /// Results ordered by similarity (highest score first)
    async fn search(
        &self,
        collection: &str,
        query_embedding: &[f32],
        limit: usize,
        filter: Option<VectorFilter>,
    ) -> anyhow::Result<Vec<VectorSearchResult>>;

    /// Search with score threshold
    ///
    /// Only returns results with score >= min_score.
    async fn search_with_threshold(
        &self,
        collection: &str,
        query_embedding: &[f32],
        limit: usize,
        min_score: f32,
        filter: Option<VectorFilter>,
    ) -> anyhow::Result<Vec<VectorSearchResult>> {
        let results = self
            .search(collection, query_embedding, limit, filter)
            .await?;
        Ok(results
            .into_iter()
            .filter(|r| r.score >= min_score)
            .collect())
    }

    // =========================================================================
    // Utility operations
    // =========================================================================

    /// Count documents in a collection
    async fn count(&self, collection: &str) -> anyhow::Result<u64>;

    /// Get collection info (dimension, count, etc.)
    async fn collection_info(&self, collection: &str) -> anyhow::Result<CollectionInfo>;
}

/// Information about a collection
#[derive(Debug, Clone)]
pub struct CollectionInfo {
    /// Collection name
    pub name: String,
    /// Vector dimension
    pub dimension: usize,
    /// Number of documents
    pub count: u64,
    /// Distance metric used
    pub distance_metric: DistanceMetric,
}

/// Distance metric for similarity calculation
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum DistanceMetric {
    /// Cosine similarity (default)
    #[default]
    Cosine,
    /// Euclidean distance
    Euclidean,
    /// Dot product
    DotProduct,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_document_creation() {
        let doc = VectorDocument::new("doc1", "Hello world", vec![0.1, 0.2, 0.3])
            .with_metadata("source", serde_json::json!("test"))
            .with_metadata("page", serde_json::json!(1));

        assert_eq!(doc.id, "doc1");
        assert_eq!(doc.content, "Hello world");
        assert_eq!(doc.embedding.len(), 3);
        assert_eq!(doc.metadata.get("source").unwrap(), "test");
        assert_eq!(doc.metadata.get("page").unwrap(), 1);
    }

    #[test]
    fn test_filter_creation() {
        let filter = VectorFilter::new()
            .with_condition("tenant_id", serde_json::json!("acme"))
            .with_condition("status", serde_json::json!("active"))
            .with_any_condition("visibility", serde_json::json!("team:legal"))
            .with_none_condition("denyScopes", serde_json::json!("team:blocked"));

        assert!(!filter.is_empty());
        assert_eq!(filter.conditions.len(), 2);
        assert_eq!(filter.any.len(), 1);
        assert_eq!(filter.none.len(), 1);
    }

    #[test]
    fn test_document_serialization() {
        let doc = VectorDocument::new("doc1", "Test content", vec![0.1, 0.2])
            .with_metadata("key", serde_json::json!("value"));

        let json = serde_json::to_string(&doc).unwrap();
        let parsed: VectorDocument = serde_json::from_str(&json).unwrap();

        assert_eq!(parsed.id, "doc1");
        assert_eq!(parsed.embedding, vec![0.1, 0.2]);
    }
}