polyc_embeddings/lib.rs
1//! Provider-agnostic embedding trait for polychrome.
2//!
3//! This crate defines the [`EmbeddingProvider`] trait that every concrete
4//! embedding backend implements. It is the embedding-side analogue of
5//! `polyc-llm`'s [`LlmProvider`](https://docs.rs/polyc-llm): a thin
6//! seam so the tool-search layer can swap backends — pure-Rust static
7//! embeddings (`model2vec-rs`) by default, a heavier transformer
8//! (candle + `EmbeddingGemma`) when shallow matching isn't enough — without
9//! touching its own code.
10//!
11//! Tool retrieval is a shallow-semantic task over short strings (a query vs.
12//! tool name+description), so the default backend is intentionally tiny and
13//! fully deterministic: a fixed lookup table + mean pooling embeds bit-for-bit
14//! identically for a pinned model across hardware, which lines up with the
15//! reproducible prompt-hash discipline elsewhere in the system.
16//!
17//! # Modules
18//!
19//! - [`error`] — the [`EmbeddingError`] bound that [`EmbeddingProvider::Error`]
20//! must satisfy.
21//! - [`erased`] — type erasure to a single [`DynEmbeddingProvider`] trait
22//! object, mirroring `polyc-llm`'s `erased` module.
23
24pub mod erased;
25pub mod error;
26
27use async_trait::async_trait;
28
29pub use erased::{BoxError, DynEmbeddingProvider, ErasedEmbeddingProvider, into_dyn};
30pub use error::EmbeddingError;
31
32/// The seam between the tool-search engine and any concrete embedding backend.
33///
34/// One implementation per backend, dispatched behind a trait object so the
35/// retrieval layer swaps backends without recompiling. The `'static` bound and
36/// [`Send`] + [`Sync`] make providers storable in the control plane and
37/// shareable across tasks. [`Self::Error`] is bounded by [`EmbeddingError`] so
38/// failures are uniform across backends while each keeps its own concrete error.
39///
40/// `model_id` and `dimensions` are pinned, cheap getters: the `model_id` is
41/// recorded in the event log alongside a retrieval set so a result is
42/// reproducible, and `dimensions` lets callers size an index without a probe
43/// embed.
44#[async_trait]
45pub trait EmbeddingProvider: Send + Sync + 'static {
46 /// The provider's concrete error type. Bounded by [`EmbeddingError`].
47 type Error: EmbeddingError;
48
49 /// Stable identifier for the embedding model (e.g. `"potion-base-8M"`).
50 ///
51 /// Pinned per backend instance and recorded next to any retrieval set so
52 /// the result can be reproduced. Two providers with the same `model_id`
53 /// must produce comparable vectors.
54 fn model_id(&self) -> &str;
55
56 /// Dimensionality of the vectors this provider produces.
57 fn dimensions(&self) -> usize;
58
59 /// Embed a batch of texts, returning one vector per input in order.
60 ///
61 /// Batched because the static backend amortises almost nothing per-call but
62 /// callers (indexing a connector's whole tool catalogue) embed many short
63 /// strings at once. Each returned vector has [`Self::dimensions`] elements.
64 ///
65 /// # Errors
66 ///
67 /// Returns [`Self::Error`] if the model cannot embed the batch (e.g. an
68 /// uninitialised model or, for transformer backends, a tokenisation fault).
69 async fn embed(&self, texts: &[String]) -> Result<Vec<Vec<f32>>, Self::Error>;
70}