trusty-search 0.22.1

Machine-wide hybrid code search service: BM25 + vector + KG, zero cold-start, MCP server
Documentation
//! Embedding abstraction — thin facade over the shared `trusty-embedder` crate.
//!
//! Why: The `Embedder` trait + `FastEmbedder` + `MockEmbedder` previously
//! lived in this crate. They've been moved to the shared `trusty-embedder`
//! crate so trusty-memory and trusty-search ship the same implementation
//! (LRU cache, ORT warmup, deterministic mock). This module keeps the
//! existing in-crate `Embedder` trait shape (`embed(&str)` + `embed_batch(&[&str])`)
//! so the rest of trusty-search compiles unchanged.
//! What: A local `Embedder` trait that mirrors the historic API, plus a
//! blanket-impl adapter that delegates to the shared `trusty_common::embedder::Embedder`.
//! `FastEmbedder` and `MockEmbedder` are re-exports.
//! Test: existing indexer / concept_cluster tests exercise this surface;
//! shared-crate behaviour is covered upstream in `trusty-embedder`.

use anyhow::Result;
use async_trait::async_trait;

pub use trusty_common::embedder::{FastEmbedder, EMBED_DIM};

#[cfg(any(test, feature = "test-support"))]
pub use trusty_common::embedder::MockEmbedder;

/// trusty-search-flavoured embedder trait.
///
/// Why: Historic call sites pass `&str` / `&[&str]` directly. The shared
/// `trusty_common::embedder::Embedder` settled on `&[String]` as its primitive (it
/// owns the LRU cache key, so it needs owned strings anyway). This trait
/// preserves the old surface — every `&str` is cloned into a `String` on
/// the way down, which matches what the old per-call code did internally.
/// What: an async `embed(&str) -> Vec<f32>` and `embed_batch(&[&str]) ->
/// Vec<Vec<f32>>`, plus `dimension()`.
/// Test: covered indirectly via every `CodeIndexer` test that runs against
/// `MockEmbedder`.
#[async_trait]
pub trait Embedder: Send + Sync {
    async fn embed(&self, text: &str) -> Result<Vec<f32>>;
    async fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>>;
    fn dimension(&self) -> usize;

    /// Active ONNX execution provider for this embedder.
    ///
    /// Why: forwards the shared-crate `Embedder::provider()` through the
    /// in-crate facade so call sites that hold a `&dyn Embedder` (i.e. the
    /// reindex pipeline) can pick provider-appropriate batch sizes without
    /// reaching past the facade.
    /// What: default returns `ExecutionProvider::Cpu`; the blanket adapter
    /// below forwards to the underlying `trusty_common::embedder::Embedder`.
    /// Test: covered by the public-surface compile check.
    fn provider(&self) -> trusty_common::embedder::ExecutionProvider {
        trusty_common::embedder::ExecutionProvider::Cpu
    }
}

/// Adapter: every shared `trusty_common::embedder::Embedder` automatically implements
/// the in-crate trait via owned-string conversion.
#[async_trait]
impl<E> Embedder for E
where
    E: trusty_common::embedder::Embedder,
{
    async fn embed(&self, text: &str) -> Result<Vec<f32>> {
        trusty_common::embedder::embed_one(self, text).await
    }

    async fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
        let owned: Vec<String> = texts.iter().map(|s| (*s).to_owned()).collect();
        <E as trusty_common::embedder::Embedder>::embed_batch(self, &owned).await
    }

    fn dimension(&self) -> usize {
        <E as trusty_common::embedder::Embedder>::dimension(self)
    }

    fn provider(&self) -> trusty_common::embedder::ExecutionProvider {
        <E as trusty_common::embedder::Embedder>::provider(self)
    }
}