manifoldb_vector/encoding/
keys.rs1use manifoldb_core::EntityId;
4
5use crate::types::EmbeddingName;
6
7pub const PREFIX_EMBEDDING_SPACE: u8 = 0x10;
9
10pub const PREFIX_EMBEDDING: u8 = 0x11;
12
13pub const PREFIX_SPARSE_EMBEDDING_SPACE: u8 = 0x12;
15
16pub const PREFIX_SPARSE_EMBEDDING: u8 = 0x13;
18
19pub const PREFIX_MULTI_VECTOR_SPACE: u8 = 0x14;
21
22pub const PREFIX_MULTI_VECTOR: u8 = 0x15;
24
25#[must_use]
29pub fn hash_name(name: &str) -> u64 {
30 const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
31 const FNV_PRIME: u64 = 0x0100_0000_01b3;
32
33 let mut hash = FNV_OFFSET;
34 for byte in name.as_bytes() {
35 hash ^= u64::from(*byte);
36 hash = hash.wrapping_mul(FNV_PRIME);
37 }
38 hash
39}
40
41#[must_use]
45pub fn encode_embedding_space_key(name: &EmbeddingName) -> Vec<u8> {
46 let mut key = Vec::with_capacity(9);
47 key.push(PREFIX_EMBEDDING_SPACE);
48 key.extend_from_slice(&hash_name(name.as_str()).to_be_bytes());
49 key
50}
51
52#[must_use]
56pub fn decode_embedding_space_key(key: &[u8]) -> Option<u64> {
57 if key.len() != 9 || key[0] != PREFIX_EMBEDDING_SPACE {
58 return None;
59 }
60 let bytes: [u8; 8] = key[1..9].try_into().ok()?;
61 Some(u64::from_be_bytes(bytes))
62}
63
64#[must_use]
71pub fn encode_embedding_key(name: &EmbeddingName, entity_id: EntityId) -> Vec<u8> {
72 let mut key = Vec::with_capacity(17);
73 key.push(PREFIX_EMBEDDING);
74 key.extend_from_slice(&hash_name(name.as_str()).to_be_bytes());
75 key.extend_from_slice(&entity_id.as_u64().to_be_bytes());
76 key
77}
78
79#[must_use]
84pub fn encode_embedding_prefix(name: &EmbeddingName) -> Vec<u8> {
85 let mut key = Vec::with_capacity(9);
86 key.push(PREFIX_EMBEDDING);
87 key.extend_from_slice(&hash_name(name.as_str()).to_be_bytes());
88 key
89}
90
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
93pub struct EmbeddingKey {
94 pub name_hash: u64,
96 pub entity_id: EntityId,
98}
99
100#[must_use]
104pub fn decode_embedding_key(key: &[u8]) -> Option<EmbeddingKey> {
105 if key.len() != 17 || key[0] != PREFIX_EMBEDDING {
106 return None;
107 }
108
109 let name_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
110 let entity_id_bytes: [u8; 8] = key[9..17].try_into().ok()?;
111
112 Some(EmbeddingKey {
113 name_hash: u64::from_be_bytes(name_hash_bytes),
114 entity_id: EntityId::new(u64::from_be_bytes(entity_id_bytes)),
115 })
116}
117
118#[must_use]
122pub fn decode_embedding_entity_id(key: &[u8]) -> Option<EntityId> {
123 decode_embedding_key(key).map(|k| k.entity_id)
124}
125
126#[must_use]
132pub fn encode_sparse_embedding_space_key(name: &EmbeddingName) -> Vec<u8> {
133 let mut key = Vec::with_capacity(9);
134 key.push(PREFIX_SPARSE_EMBEDDING_SPACE);
135 key.extend_from_slice(&hash_name(name.as_str()).to_be_bytes());
136 key
137}
138
139#[must_use]
143pub fn decode_sparse_embedding_space_key(key: &[u8]) -> Option<u64> {
144 if key.len() != 9 || key[0] != PREFIX_SPARSE_EMBEDDING_SPACE {
145 return None;
146 }
147 let bytes: [u8; 8] = key[1..9].try_into().ok()?;
148 Some(u64::from_be_bytes(bytes))
149}
150
151#[must_use]
155pub fn encode_sparse_embedding_key(name: &EmbeddingName, entity_id: EntityId) -> Vec<u8> {
156 let mut key = Vec::with_capacity(17);
157 key.push(PREFIX_SPARSE_EMBEDDING);
158 key.extend_from_slice(&hash_name(name.as_str()).to_be_bytes());
159 key.extend_from_slice(&entity_id.as_u64().to_be_bytes());
160 key
161}
162
163#[must_use]
165pub fn encode_sparse_embedding_prefix(name: &EmbeddingName) -> Vec<u8> {
166 let mut key = Vec::with_capacity(9);
167 key.push(PREFIX_SPARSE_EMBEDDING);
168 key.extend_from_slice(&hash_name(name.as_str()).to_be_bytes());
169 key
170}
171
172#[derive(Debug, Clone, Copy, PartialEq, Eq)]
174pub struct SparseEmbeddingKey {
175 pub name_hash: u64,
177 pub entity_id: EntityId,
179}
180
181#[must_use]
185pub fn decode_sparse_embedding_key(key: &[u8]) -> Option<SparseEmbeddingKey> {
186 if key.len() != 17 || key[0] != PREFIX_SPARSE_EMBEDDING {
187 return None;
188 }
189
190 let name_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
191 let entity_id_bytes: [u8; 8] = key[9..17].try_into().ok()?;
192
193 Some(SparseEmbeddingKey {
194 name_hash: u64::from_be_bytes(name_hash_bytes),
195 entity_id: EntityId::new(u64::from_be_bytes(entity_id_bytes)),
196 })
197}
198
199#[must_use]
203pub fn decode_sparse_embedding_entity_id(key: &[u8]) -> Option<EntityId> {
204 decode_sparse_embedding_key(key).map(|k| k.entity_id)
205}
206
207#[cfg(test)]
208mod tests {
209 use super::*;
210
211 #[test]
212 fn embedding_space_key_roundtrip() {
213 let name = EmbeddingName::new("test_space").unwrap();
214 let key = encode_embedding_space_key(&name);
215
216 assert_eq!(key.len(), 9);
217 assert_eq!(key[0], PREFIX_EMBEDDING_SPACE);
218
219 let decoded = decode_embedding_space_key(&key);
220 assert_eq!(decoded, Some(hash_name("test_space")));
221 }
222
223 #[test]
224 fn embedding_key_roundtrip() {
225 let name = EmbeddingName::new("text_embedding").unwrap();
226 let entity_id = EntityId::new(12345);
227
228 let key = encode_embedding_key(&name, entity_id);
229 assert_eq!(key.len(), 17);
230 assert_eq!(key[0], PREFIX_EMBEDDING);
231
232 let decoded = decode_embedding_key(&key).unwrap();
233 assert_eq!(decoded.name_hash, hash_name("text_embedding"));
234 assert_eq!(decoded.entity_id, entity_id);
235 }
236
237 #[test]
238 fn embedding_keys_ordered_by_space_then_entity() {
239 let space1 = EmbeddingName::new("aaa").unwrap();
240 let space2 = EmbeddingName::new("zzz").unwrap();
241
242 let key1 = encode_embedding_key(&space1, EntityId::new(1));
243 let key2 = encode_embedding_key(&space1, EntityId::new(2));
244 let key3 = encode_embedding_key(&space2, EntityId::new(1));
245
246 assert!(key1 < key2);
248
249 let prefix1 = encode_embedding_prefix(&space1);
252 assert!(key1.starts_with(&prefix1));
253 assert!(key2.starts_with(&prefix1));
254 assert!(!key3.starts_with(&prefix1));
255 }
256
257 #[test]
258 fn embedding_prefix_groups_embeddings_by_space() {
259 let name = EmbeddingName::new("my_space").unwrap();
260 let prefix = encode_embedding_prefix(&name);
261
262 for id in [0u64, 1, 100, u64::MAX] {
263 let key = encode_embedding_key(&name, EntityId::new(id));
264 assert!(key.starts_with(&prefix));
265 }
266 }
267
268 #[test]
269 fn decode_invalid_embedding_key() {
270 assert!(decode_embedding_key(&[PREFIX_EMBEDDING_SPACE; 17]).is_none());
272
273 assert!(decode_embedding_key(&[PREFIX_EMBEDDING; 10]).is_none());
275
276 assert!(decode_embedding_key(&[]).is_none());
278 }
279
280 #[test]
281 fn decode_invalid_space_key() {
282 assert!(decode_embedding_space_key(&[PREFIX_EMBEDDING; 9]).is_none());
284
285 assert!(decode_embedding_space_key(&[PREFIX_EMBEDDING_SPACE; 5]).is_none());
287
288 assert!(decode_embedding_space_key(&[]).is_none());
290 }
291
292 #[test]
293 fn key_prefixes_partition_keyspace() {
294 let name = EmbeddingName::new("test").unwrap();
295 let space_key = encode_embedding_space_key(&name);
296 let embedding_key = encode_embedding_key(&name, EntityId::new(1));
297
298 assert!(space_key[0] != embedding_key[0]);
299 }
300
301 #[test]
302 fn hash_consistency() {
303 assert_eq!(hash_name("test"), hash_name("test"));
305
306 assert_ne!(hash_name("test1"), hash_name("test2"));
308 }
309}