manifoldb_vector/encoding/
point_keys.rs1use manifoldb_core::PointId;
26
27pub const PREFIX_COLLECTION: u8 = 0x20;
29
30pub const PREFIX_POINT_PAYLOAD: u8 = 0x21;
32
33pub const PREFIX_POINT_DENSE_VECTOR: u8 = 0x22;
35
36pub const PREFIX_POINT_SPARSE_VECTOR: u8 = 0x23;
38
39pub const PREFIX_POINT_MULTI_VECTOR: u8 = 0x24;
41
42#[must_use]
46pub fn hash_name(name: &str) -> u64 {
47 const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
48 const FNV_PRIME: u64 = 0x0100_0000_01b3;
49
50 let mut hash = FNV_OFFSET;
51 for byte in name.as_bytes() {
52 hash ^= u64::from(*byte);
53 hash = hash.wrapping_mul(FNV_PRIME);
54 }
55 hash
56}
57
58#[must_use]
66pub fn encode_collection_key(collection_name: &str) -> Vec<u8> {
67 let mut key = Vec::with_capacity(9);
68 key.push(PREFIX_COLLECTION);
69 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
70 key
71}
72
73#[must_use]
75pub fn encode_collection_prefix() -> Vec<u8> {
76 vec![PREFIX_COLLECTION]
77}
78
79#[derive(Debug, Clone, Copy, PartialEq, Eq)]
81pub struct CollectionKey {
82 pub name_hash: u64,
84}
85
86#[must_use]
90pub fn decode_collection_key(key: &[u8]) -> Option<CollectionKey> {
91 if key.len() != 9 || key[0] != PREFIX_COLLECTION {
92 return None;
93 }
94 let name_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
95 Some(CollectionKey { name_hash: u64::from_be_bytes(name_hash_bytes) })
96}
97
98#[must_use]
106pub fn encode_point_payload_key(collection_name: &str, point_id: PointId) -> Vec<u8> {
107 let mut key = Vec::with_capacity(17);
108 key.push(PREFIX_POINT_PAYLOAD);
109 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
110 key.extend_from_slice(&point_id.as_u64().to_be_bytes());
111 key
112}
113
114#[must_use]
116pub fn encode_point_payload_prefix(collection_name: &str) -> Vec<u8> {
117 let mut key = Vec::with_capacity(9);
118 key.push(PREFIX_POINT_PAYLOAD);
119 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
120 key
121}
122
123#[derive(Debug, Clone, Copy, PartialEq, Eq)]
125pub struct PointPayloadKey {
126 pub collection_name_hash: u64,
128 pub point_id: PointId,
130}
131
132#[must_use]
136pub fn decode_point_payload_key(key: &[u8]) -> Option<PointPayloadKey> {
137 if key.len() != 17 || key[0] != PREFIX_POINT_PAYLOAD {
138 return None;
139 }
140 let name_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
141 let point_id_bytes: [u8; 8] = key[9..17].try_into().ok()?;
142 Some(PointPayloadKey {
143 collection_name_hash: u64::from_be_bytes(name_hash_bytes),
144 point_id: PointId::new(u64::from_be_bytes(point_id_bytes)),
145 })
146}
147
148#[must_use]
150pub fn decode_point_payload_point_id(key: &[u8]) -> Option<PointId> {
151 decode_point_payload_key(key).map(|k| k.point_id)
152}
153
154#[must_use]
162pub fn encode_dense_vector_key(
163 collection_name: &str,
164 point_id: PointId,
165 vector_name: &str,
166) -> Vec<u8> {
167 let mut key = Vec::with_capacity(25);
168 key.push(PREFIX_POINT_DENSE_VECTOR);
169 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
170 key.extend_from_slice(&point_id.as_u64().to_be_bytes());
171 key.extend_from_slice(&hash_name(vector_name).to_be_bytes());
172 key
173}
174
175#[must_use]
177pub fn encode_dense_vector_point_prefix(collection_name: &str, point_id: PointId) -> Vec<u8> {
178 let mut key = Vec::with_capacity(17);
179 key.push(PREFIX_POINT_DENSE_VECTOR);
180 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
181 key.extend_from_slice(&point_id.as_u64().to_be_bytes());
182 key
183}
184
185#[must_use]
187pub fn encode_dense_vector_collection_prefix(collection_name: &str) -> Vec<u8> {
188 let mut key = Vec::with_capacity(9);
189 key.push(PREFIX_POINT_DENSE_VECTOR);
190 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
191 key
192}
193
194#[derive(Debug, Clone, Copy, PartialEq, Eq)]
196pub struct DenseVectorKey {
197 pub collection_name_hash: u64,
199 pub point_id: PointId,
201 pub vector_name_hash: u64,
203}
204
205#[must_use]
209pub fn decode_dense_vector_key(key: &[u8]) -> Option<DenseVectorKey> {
210 if key.len() != 25 || key[0] != PREFIX_POINT_DENSE_VECTOR {
211 return None;
212 }
213 let name_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
214 let point_id_bytes: [u8; 8] = key[9..17].try_into().ok()?;
215 let vector_name_hash_bytes: [u8; 8] = key[17..25].try_into().ok()?;
216 Some(DenseVectorKey {
217 collection_name_hash: u64::from_be_bytes(name_hash_bytes),
218 point_id: PointId::new(u64::from_be_bytes(point_id_bytes)),
219 vector_name_hash: u64::from_be_bytes(vector_name_hash_bytes),
220 })
221}
222
223#[must_use]
231pub fn encode_sparse_vector_key(
232 collection_name: &str,
233 point_id: PointId,
234 vector_name: &str,
235) -> Vec<u8> {
236 let mut key = Vec::with_capacity(25);
237 key.push(PREFIX_POINT_SPARSE_VECTOR);
238 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
239 key.extend_from_slice(&point_id.as_u64().to_be_bytes());
240 key.extend_from_slice(&hash_name(vector_name).to_be_bytes());
241 key
242}
243
244#[must_use]
246pub fn encode_sparse_vector_point_prefix(collection_name: &str, point_id: PointId) -> Vec<u8> {
247 let mut key = Vec::with_capacity(17);
248 key.push(PREFIX_POINT_SPARSE_VECTOR);
249 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
250 key.extend_from_slice(&point_id.as_u64().to_be_bytes());
251 key
252}
253
254#[must_use]
256pub fn encode_sparse_vector_collection_prefix(collection_name: &str) -> Vec<u8> {
257 let mut key = Vec::with_capacity(9);
258 key.push(PREFIX_POINT_SPARSE_VECTOR);
259 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
260 key
261}
262
263#[derive(Debug, Clone, Copy, PartialEq, Eq)]
265pub struct SparseVectorKey {
266 pub collection_name_hash: u64,
268 pub point_id: PointId,
270 pub vector_name_hash: u64,
272}
273
274#[must_use]
278pub fn decode_sparse_vector_key(key: &[u8]) -> Option<SparseVectorKey> {
279 if key.len() != 25 || key[0] != PREFIX_POINT_SPARSE_VECTOR {
280 return None;
281 }
282 let name_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
283 let point_id_bytes: [u8; 8] = key[9..17].try_into().ok()?;
284 let vector_name_hash_bytes: [u8; 8] = key[17..25].try_into().ok()?;
285 Some(SparseVectorKey {
286 collection_name_hash: u64::from_be_bytes(name_hash_bytes),
287 point_id: PointId::new(u64::from_be_bytes(point_id_bytes)),
288 vector_name_hash: u64::from_be_bytes(vector_name_hash_bytes),
289 })
290}
291
292#[must_use]
300pub fn encode_multi_vector_key(
301 collection_name: &str,
302 point_id: PointId,
303 vector_name: &str,
304) -> Vec<u8> {
305 let mut key = Vec::with_capacity(25);
306 key.push(PREFIX_POINT_MULTI_VECTOR);
307 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
308 key.extend_from_slice(&point_id.as_u64().to_be_bytes());
309 key.extend_from_slice(&hash_name(vector_name).to_be_bytes());
310 key
311}
312
313#[must_use]
315pub fn encode_multi_vector_point_prefix(collection_name: &str, point_id: PointId) -> Vec<u8> {
316 let mut key = Vec::with_capacity(17);
317 key.push(PREFIX_POINT_MULTI_VECTOR);
318 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
319 key.extend_from_slice(&point_id.as_u64().to_be_bytes());
320 key
321}
322
323#[must_use]
325pub fn encode_multi_vector_collection_prefix(collection_name: &str) -> Vec<u8> {
326 let mut key = Vec::with_capacity(9);
327 key.push(PREFIX_POINT_MULTI_VECTOR);
328 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
329 key
330}
331
332#[derive(Debug, Clone, Copy, PartialEq, Eq)]
334pub struct MultiVectorKey {
335 pub collection_name_hash: u64,
337 pub point_id: PointId,
339 pub vector_name_hash: u64,
341}
342
343#[must_use]
347pub fn decode_multi_vector_key(key: &[u8]) -> Option<MultiVectorKey> {
348 if key.len() != 25 || key[0] != PREFIX_POINT_MULTI_VECTOR {
349 return None;
350 }
351 let name_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
352 let point_id_bytes: [u8; 8] = key[9..17].try_into().ok()?;
353 let vector_name_hash_bytes: [u8; 8] = key[17..25].try_into().ok()?;
354 Some(MultiVectorKey {
355 collection_name_hash: u64::from_be_bytes(name_hash_bytes),
356 point_id: PointId::new(u64::from_be_bytes(point_id_bytes)),
357 vector_name_hash: u64::from_be_bytes(vector_name_hash_bytes),
358 })
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364
365 #[test]
366 fn collection_key_roundtrip() {
367 let key = encode_collection_key("documents");
368 assert_eq!(key.len(), 9);
369 assert_eq!(key[0], PREFIX_COLLECTION);
370
371 let decoded = decode_collection_key(&key).unwrap();
372 assert_eq!(decoded.name_hash, hash_name("documents"));
373 }
374
375 #[test]
376 fn point_payload_key_roundtrip() {
377 let collection = "documents";
378 let point_id = PointId::new(42);
379 let key = encode_point_payload_key(collection, point_id);
380
381 assert_eq!(key.len(), 17);
382 assert_eq!(key[0], PREFIX_POINT_PAYLOAD);
383
384 let decoded = decode_point_payload_key(&key).unwrap();
385 assert_eq!(decoded.collection_name_hash, hash_name(collection));
386 assert_eq!(decoded.point_id, point_id);
387 }
388
389 #[test]
390 fn dense_vector_key_roundtrip() {
391 let collection = "documents";
392 let point_id = PointId::new(123);
393 let vector_name = "text_embedding";
394 let key = encode_dense_vector_key(collection, point_id, vector_name);
395
396 assert_eq!(key.len(), 25);
397 assert_eq!(key[0], PREFIX_POINT_DENSE_VECTOR);
398
399 let decoded = decode_dense_vector_key(&key).unwrap();
400 assert_eq!(decoded.collection_name_hash, hash_name(collection));
401 assert_eq!(decoded.point_id, point_id);
402 assert_eq!(decoded.vector_name_hash, hash_name(vector_name));
403 }
404
405 #[test]
406 fn sparse_vector_key_roundtrip() {
407 let collection = "documents";
408 let point_id = PointId::new(456);
409 let vector_name = "sparse_embedding";
410 let key = encode_sparse_vector_key(collection, point_id, vector_name);
411
412 assert_eq!(key.len(), 25);
413 assert_eq!(key[0], PREFIX_POINT_SPARSE_VECTOR);
414
415 let decoded = decode_sparse_vector_key(&key).unwrap();
416 assert_eq!(decoded.collection_name_hash, hash_name(collection));
417 assert_eq!(decoded.point_id, point_id);
418 assert_eq!(decoded.vector_name_hash, hash_name(vector_name));
419 }
420
421 #[test]
422 fn multi_vector_key_roundtrip() {
423 let collection = "documents";
424 let point_id = PointId::new(789);
425 let vector_name = "colbert";
426 let key = encode_multi_vector_key(collection, point_id, vector_name);
427
428 assert_eq!(key.len(), 25);
429 assert_eq!(key[0], PREFIX_POINT_MULTI_VECTOR);
430
431 let decoded = decode_multi_vector_key(&key).unwrap();
432 assert_eq!(decoded.collection_name_hash, hash_name(collection));
433 assert_eq!(decoded.point_id, point_id);
434 assert_eq!(decoded.vector_name_hash, hash_name(vector_name));
435 }
436
437 #[test]
438 fn keys_ordered_within_collection() {
439 let collection = "test";
440
441 let key1 = encode_point_payload_key(collection, PointId::new(1));
442 let key2 = encode_point_payload_key(collection, PointId::new(2));
443 let key3 = encode_point_payload_key(collection, PointId::new(100));
444
445 assert!(key1 < key2);
447 assert!(key2 < key3);
448 }
449
450 #[test]
451 fn prefix_matches_keys() {
452 let collection = "documents";
453 let prefix = encode_point_payload_prefix(collection);
454
455 let key1 = encode_point_payload_key(collection, PointId::new(1));
456 let key2 = encode_point_payload_key(collection, PointId::new(1000));
457
458 assert!(key1.starts_with(&prefix));
460 assert!(key2.starts_with(&prefix));
461
462 let key_other = encode_point_payload_key("other_collection", PointId::new(1));
464 assert!(!key_other.starts_with(&prefix));
465 }
466
467 #[test]
468 fn vector_key_prefix_matches() {
469 let collection = "docs";
470 let point_id = PointId::new(42);
471
472 let prefix = encode_dense_vector_point_prefix(collection, point_id);
473 let key1 = encode_dense_vector_key(collection, point_id, "embedding1");
474 let key2 = encode_dense_vector_key(collection, point_id, "embedding2");
475
476 assert!(key1.starts_with(&prefix));
477 assert!(key2.starts_with(&prefix));
478
479 let key_other = encode_dense_vector_key(collection, PointId::new(99), "embedding1");
481 assert!(!key_other.starts_with(&prefix));
482 }
483
484 #[test]
485 fn decode_invalid_keys() {
486 assert!(decode_collection_key(&[PREFIX_POINT_PAYLOAD; 9]).is_none());
488 assert!(decode_point_payload_key(&[PREFIX_COLLECTION; 17]).is_none());
489 assert!(decode_dense_vector_key(&[PREFIX_POINT_SPARSE_VECTOR; 25]).is_none());
490
491 assert!(decode_collection_key(&[PREFIX_COLLECTION; 5]).is_none());
493 assert!(decode_point_payload_key(&[PREFIX_POINT_PAYLOAD; 10]).is_none());
494 assert!(decode_dense_vector_key(&[PREFIX_POINT_DENSE_VECTOR; 20]).is_none());
495
496 assert!(decode_collection_key(&[]).is_none());
498 assert!(decode_point_payload_key(&[]).is_none());
499 assert!(decode_dense_vector_key(&[]).is_none());
500 }
501}