Skip to main content

bytesandbrains_core/
codec.rs

1use crate::embedding::EmbeddingSpace;
2use crate::index::OpRef;
3
4/// Encode, decode, and optionally train a compressed embedding representation.
5///
6/// `Codec` is the data-plane counterpart to [`Index`](crate::Index): an index
7/// stores and retrieves vectors, while a codec compresses them. If a codec also
8/// participates in gossip, the gossip protocol wraps the codec and implements
9/// [`OverlayProtocol`](crate::OverlayProtocol) separately — separation of
10/// data-plane from control-plane.
11///
12/// Every operation returns an [`OpRef`] handle rather than a bare result,
13/// mirroring the pattern used by [`Index`](crate::Index). This lets the same
14/// trait describe both in-process codecs (where the handle completes
15/// synchronously) and networked codecs (where the handle tracks a remote RPC).
16///
17/// Like [`Index`](crate::Index), `Codec` includes training and observation
18/// methods directly. Codecs that don't need training should make
19/// [`train`](Codec::train) and [`observe`](Codec::observe) no-ops and return
20/// `true` from [`is_trained`](Codec::is_trained).
21///
22/// ## Associated type families
23///
24/// | Associated type | Operation handle (GAT) | Operations |
25/// |---|---|---|
26/// | [`Encoded`](Codec::Encoded) | [`EncodeRef`](Codec::EncodeRef) | [`encode`](Codec::encode), [`encode_batch`](Codec::encode_batch) |
27/// | — | [`DecodeRef`](Codec::DecodeRef) | [`decode`](Codec::decode), [`decode_batch`](Codec::decode_batch) |
28/// | — | [`TrainRef`](Codec::TrainRef) | [`train`](Codec::train) |
29/// | — | [`ObserveRef`](Codec::ObserveRef) | [`observe`](Codec::observe), [`observe_batch`](Codec::observe_batch) |
30pub trait Codec<S: EmbeddingSpace> {
31    /// The compressed representation produced by [`encode`](Codec::encode).
32    type Encoded: Clone;
33
34    /// Handle to an in-flight encode operation.
35    type EncodeRef<'a>: OpRef where Self: 'a;
36
37    /// Handle to an in-flight decode operation.
38    type DecodeRef<'a>: OpRef where Self: 'a;
39
40    /// Handle to an in-flight train operation.
41    type TrainRef<'a>: OpRef where Self: 'a;
42
43    /// Handle to an in-flight observe operation.
44    type ObserveRef<'a>: OpRef where Self: 'a;
45
46    /// Compress a single embedding.
47    fn encode(&mut self, embedding: &S::EmbeddingData) -> Self::EncodeRef<'_>;
48
49    /// Compress a batch of embeddings.
50    fn encode_batch(&mut self, embeddings: &[S::EmbeddingData]) -> Vec<Self::EncodeRef<'_>>;
51
52    /// Reconstruct an embedding from its compressed form.
53    fn decode(&self, encoded: &Self::Encoded) -> Self::DecodeRef<'_>;
54
55    /// Reconstruct a batch of embeddings from their compressed forms.
56    fn decode_batch(&self, encoded: &[Self::Encoded]) -> Vec<Self::DecodeRef<'_>>;
57
58    /// Returns the fixed byte-size of an encoded vector, if the encoding is
59    /// fixed-size. Variable-length encodings return `None`.
60    fn code_size(&self) -> Option<usize>;
61
62    /// Batch training from a set of embeddings.
63    ///
64    /// Codecs that don't learn from data should make this a no-op and return
65    /// `true` from [`is_trained`](Codec::is_trained).
66    fn train(&mut self, embeddings: &[S::EmbeddingData]) -> Self::TrainRef<'_>;
67
68    /// Online/incremental update from a single observation.
69    fn observe(&mut self, embedding: &S::EmbeddingData) -> Self::ObserveRef<'_>;
70
71    /// Online/incremental update from a batch of observations.
72    fn observe_batch(&mut self, embeddings: &[S::EmbeddingData]) -> Vec<Self::ObserveRef<'_>>;
73
74    /// Whether the codec has been trained and is ready to encode/decode.
75    fn is_trained(&self) -> bool;
76}