Skip to main content

bcp_types/
embedding_ref.rs

1use crate::error::TypeError;
2use crate::fields::{decode_bytes_value, decode_field_header, encode_bytes_field, skip_field};
3
4/// EMBEDDING_REF block — vector embedding reference.
5///
6/// Points to a pre-computed vector embedding stored externally (e.g. in
7/// a vector database). The `source_hash` provides a content-addressable
8/// link back to the original content that was embedded, using BLAKE3.
9///
10/// Field layout within body:
11///
12/// ```text
13/// ┌──────────┬───────────┬─────────────┬─────────────────────────┐
14/// │ Field ID │ Wire Type │ Name        │ Description             │
15/// ├──────────┼───────────┼─────────────┼─────────────────────────┤
16/// │ 1        │ Bytes     │ vector_id   │ Vector store identifier │
17/// │ 2        │ Bytes     │ source_hash │ BLAKE3 content hash     │
18/// │ 3        │ Bytes     │ model       │ Embedding model name    │
19/// └──────────┴───────────┴─────────────┴─────────────────────────┘
20/// ```
21#[derive(Clone, Debug, PartialEq, Eq)]
22pub struct EmbeddingRefBlock {
23    /// Opaque identifier for the vector in the external store.
24    pub vector_id: Vec<u8>,
25    /// BLAKE3 hash of the source content that was embedded.
26    pub source_hash: Vec<u8>,
27    /// Name of the embedding model (e.g. "text-embedding-3-small").
28    pub model: String,
29}
30
31impl EmbeddingRefBlock {
32    /// Serialize this block's fields into a TLV-encoded body.
33    pub fn encode_body(&self) -> Vec<u8> {
34        let mut buf = Vec::new();
35        encode_bytes_field(&mut buf, 1, &self.vector_id);
36        encode_bytes_field(&mut buf, 2, &self.source_hash);
37        encode_bytes_field(&mut buf, 3, self.model.as_bytes());
38        buf
39    }
40
41    /// Deserialize an EMBEDDING_REF block from a TLV-encoded body.
42    pub fn decode_body(mut buf: &[u8]) -> Result<Self, TypeError> {
43        let mut vector_id: Option<Vec<u8>> = None;
44        let mut source_hash: Option<Vec<u8>> = None;
45        let mut model: Option<String> = None;
46
47        while !buf.is_empty() {
48            let (header, n) = decode_field_header(buf)?;
49            buf = &buf[n..];
50
51            match header.field_id {
52                1 => {
53                    let (data, n) = decode_bytes_value(buf)?;
54                    buf = &buf[n..];
55                    vector_id = Some(data.to_vec());
56                }
57                2 => {
58                    let (data, n) = decode_bytes_value(buf)?;
59                    buf = &buf[n..];
60                    source_hash = Some(data.to_vec());
61                }
62                3 => {
63                    let (data, n) = decode_bytes_value(buf)?;
64                    buf = &buf[n..];
65                    model = Some(String::from_utf8_lossy(data).into_owned());
66                }
67                _ => {
68                    let n = skip_field(buf, header.wire_type)?;
69                    buf = &buf[n..];
70                }
71            }
72        }
73
74        Ok(Self {
75            vector_id: vector_id.ok_or(TypeError::MissingRequiredField { field: "vector_id" })?,
76            source_hash: source_hash.ok_or(TypeError::MissingRequiredField {
77                field: "source_hash",
78            })?,
79            model: model.ok_or(TypeError::MissingRequiredField { field: "model" })?,
80        })
81    }
82}
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87
88    #[test]
89    fn roundtrip_embedding_ref() {
90        let block = EmbeddingRefBlock {
91            vector_id: b"vec-001-abc".to_vec(),
92            source_hash: vec![0xAB; 32], // 32-byte BLAKE3 hash
93            model: "text-embedding-3-small".to_string(),
94        };
95        let body = block.encode_body();
96        let decoded = EmbeddingRefBlock::decode_body(&body).unwrap();
97        assert_eq!(decoded, block);
98    }
99}