bytesandbrains_core/codec.rs
1use crate::embedding::EmbeddingSpace;
2use crate::index::OpRef;
3
4/// Encode, decode, and optionally train a compressed embedding representation.
5///
6/// `Codec` is the data-plane counterpart to [`Index`](crate::Index): an index
7/// stores and retrieves vectors, while a codec compresses them. If a codec also
8/// participates in gossip, the gossip protocol wraps the codec and implements
9/// [`OverlayProtocol`](crate::OverlayProtocol) separately — separation of
10/// data-plane from control-plane.
11///
12/// Every operation returns an [`OpRef`] handle rather than a bare result,
13/// mirroring the pattern used by [`Index`](crate::Index). This lets the same
14/// trait describe both in-process codecs (where the handle completes
15/// synchronously) and networked codecs (where the handle tracks a remote RPC).
16///
17/// Like [`Index`](crate::Index), `Codec` includes training and observation
18/// methods directly. Codecs that don't need training should make
19/// [`train`](Codec::train) and [`observe`](Codec::observe) no-ops and return
20/// `true` from [`is_trained`](Codec::is_trained).
21///
22/// ## Associated type families
23///
24/// | Associated type | Operation handle (GAT) | Operations |
25/// |---|---|---|
26/// | [`Encoded`](Codec::Encoded) | [`EncodeRef`](Codec::EncodeRef) | [`encode`](Codec::encode), [`encode_batch`](Codec::encode_batch) |
27/// | — | [`DecodeRef`](Codec::DecodeRef) | [`decode`](Codec::decode), [`decode_batch`](Codec::decode_batch) |
28/// | — | [`TrainRef`](Codec::TrainRef) | [`train`](Codec::train) |
29/// | — | [`ObserveRef`](Codec::ObserveRef) | [`observe`](Codec::observe), [`observe_batch`](Codec::observe_batch) |
30pub trait Codec<S: EmbeddingSpace> {
31 /// The compressed representation produced by [`encode`](Codec::encode).
32 type Encoded: Clone;
33
34 /// Handle to an in-flight encode operation.
35 type EncodeRef<'a>: OpRef where Self: 'a;
36
37 /// Handle to an in-flight decode operation.
38 type DecodeRef<'a>: OpRef where Self: 'a;
39
40 /// Handle to an in-flight train operation.
41 type TrainRef<'a>: OpRef where Self: 'a;
42
43 /// Handle to an in-flight observe operation.
44 type ObserveRef<'a>: OpRef where Self: 'a;
45
46 /// Compress a single embedding.
47 fn encode(&mut self, embedding: &S::EmbeddingData) -> Self::EncodeRef<'_>;
48
49 /// Compress a batch of embeddings.
50 fn encode_batch(&mut self, embeddings: &[S::EmbeddingData]) -> Vec<Self::EncodeRef<'_>>;
51
52 /// Reconstruct an embedding from its compressed form.
53 fn decode(&self, encoded: &Self::Encoded) -> Self::DecodeRef<'_>;
54
55 /// Reconstruct a batch of embeddings from their compressed forms.
56 fn decode_batch(&self, encoded: &[Self::Encoded]) -> Vec<Self::DecodeRef<'_>>;
57
58 /// Returns the fixed byte-size of an encoded vector, if the encoding is
59 /// fixed-size. Variable-length encodings return `None`.
60 fn code_size(&self) -> Option<usize>;
61
62 /// Batch training from a set of embeddings.
63 ///
64 /// Codecs that don't learn from data should make this a no-op and return
65 /// `true` from [`is_trained`](Codec::is_trained).
66 fn train(&mut self, embeddings: &[S::EmbeddingData]) -> Self::TrainRef<'_>;
67
68 /// Online/incremental update from a single observation.
69 fn observe(&mut self, embedding: &S::EmbeddingData) -> Self::ObserveRef<'_>;
70
71 /// Online/incremental update from a batch of observations.
72 fn observe_batch(&mut self, embeddings: &[S::EmbeddingData]) -> Vec<Self::ObserveRef<'_>>;
73
74 /// Whether the codec has been trained and is ready to encode/decode.
75 fn is_trained(&self) -> bool;
76}