chaotic_semantic_memory 0.3.6

AI memory systems with hyperdimensional vectors and chaotic reservoirs
Documentation
//! External embedding model bridge for semantic accuracy.
//!
//! Provides an `EmbeddingProvider` trait with multiple backends:
//! - HDC text encoder (default, semantically blind but fast)
//! - FastEmbed (local ONNX models, opt-in via `embed-fastembed` feature)
//! - OpenAI HTTP API (opt-in via `embed-openai` feature)
//! - Voyage HTTP API (opt-in via `embed-voyage` feature)
//!
//! All backends project native embeddings to HVec10240 via sparse random projection
//! (Achlioptas method), preserving cosine similarity with Johnson-Lindenstrauss guarantees.

mod hdc_text;
mod projection;

#[cfg(feature = "embed-fastembed")]
mod fastembed;
#[cfg(feature = "embed-openai")]
mod remote_openai;
#[cfg(feature = "embed-voyage")]
mod remote_voyage;

pub use hdc_text::HdcTextProvider;
pub use projection::{Projection, ProjectionConfig};

#[cfg(feature = "embed-fastembed")]
pub use fastembed::FastEmbedProvider;
#[cfg(feature = "embed-openai")]
pub use remote_openai::OpenAiProvider;
#[cfg(feature = "embed-voyage")]
pub use remote_voyage::VoyageProvider;

use crate::error::Result;
use crate::hyperdim::HVec10240;

/// Embedding provider trait for text-to-vector conversion.
///
/// Implementations may be:
/// - Local (HDC hash, FastEmbed ONNX)
/// - Remote HTTP (OpenAI, Voyage)
///
/// All providers project their native dimensionality to HVec10240.
#[async_trait::async_trait]
pub trait EmbeddingProvider: Send + Sync {
    /// Provider name for logging and CLI.
    fn name(&self) -> &str;

    /// Native embedding dimension before projection.
    fn native_dim(&self) -> usize;

    /// Embed a single text string.
    async fn embed(&self, text: &str) -> Result<Vec<f32>>;

    /// Embed multiple texts in batch (more efficient for remote providers).
    async fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>>;

    /// Project a native embedding to HVec10240.
    ///
    /// Default implementation uses the provider's projection matrix.
    fn project(&self, vec: &[f32], projection: &Projection) -> HVec10240 {
        projection.project(vec)
    }
}

/// Factory to get an embedding provider by name.
///
/// Format: "provider_name" or "provider_name:model_name"
pub fn get_provider(name: &str) -> Result<std::sync::Arc<dyn EmbeddingProvider>> {
    let parts: Vec<&str> = name.splitn(2, ':').collect();
    let provider_name = parts[0];
    let _model_name = parts.get(1).copied();

    match provider_name {
        "hdc-text" | "hdc" => Ok(std::sync::Arc::new(HdcTextProvider::new())),

        "fastembed" => {
            #[cfg(feature = "embed-fastembed")]
            {
                if let Some(model) = _model_name {
                    Ok(std::sync::Arc::new(FastEmbedProvider::with_model(model)?))
                } else {
                    Ok(std::sync::Arc::new(FastEmbedProvider::new()?))
                }
            }
            #[cfg(not(feature = "embed-fastembed"))]
            Err(crate::error::MemoryError::Config(
                "embed-fastembed feature not enabled".into(),
            ))
        }

        "openai" => {
            #[cfg(feature = "embed-openai")]
            {
                let mut provider = OpenAiProvider::from_env()?;
                if let Some(model) = _model_name {
                    provider = provider.with_model(model);
                }
                Ok(std::sync::Arc::new(provider))
            }
            #[cfg(not(feature = "embed-openai"))]
            Err(crate::error::MemoryError::Config(
                "embed-openai feature not enabled".into(),
            ))
        }

        "voyage" => {
            #[cfg(feature = "embed-voyage")]
            {
                let mut provider = VoyageProvider::from_env()?;
                if let Some(model) = _model_name {
                    provider = provider.with_model(model);
                }
                Ok(std::sync::Arc::new(provider))
            }
            #[cfg(not(feature = "embed-voyage"))]
            Err(crate::error::MemoryError::Config(
                "embed-voyage feature not enabled".into(),
            ))
        }

        _ => Err(crate::error::MemoryError::Config(format!(
            "unknown embedding provider: {provider_name}"
        ))),
    }
}