manifoldb_vector/encoding/
point_keys.rs

1//! Key encoding for point storage.
2//!
3//! This module provides key encoding functions for point storage, supporting
4//! efficient lookups and range scans by collection and point ID.
5//!
6//! # Key Format
7//!
8//! All keys use big-endian encoding for proper sort order in range scans.
9//!
10//! ## Collection metadata
11//! `[PREFIX_COLLECTION][collection_name_hash]`
12//!
13//! ## Point payload
14//! `[PREFIX_POINT_PAYLOAD][collection_name_hash][point_id]`
15//!
16//! ## Dense vectors
17//! `[PREFIX_POINT_DENSE_VECTOR][collection_name_hash][point_id][vector_name_hash]`
18//!
19//! ## Sparse vectors
20//! `[PREFIX_POINT_SPARSE_VECTOR][collection_name_hash][point_id][vector_name_hash]`
21//!
22//! ## Multi-vectors
23//! `[PREFIX_POINT_MULTI_VECTOR][collection_name_hash][point_id][vector_name_hash]`
24
25use manifoldb_core::PointId;
26
27/// Key prefix for collection metadata.
28pub const PREFIX_COLLECTION: u8 = 0x20;
29
30/// Key prefix for point payloads.
31pub const PREFIX_POINT_PAYLOAD: u8 = 0x21;
32
33/// Key prefix for dense vectors.
34pub const PREFIX_POINT_DENSE_VECTOR: u8 = 0x22;
35
36/// Key prefix for sparse vectors.
37pub const PREFIX_POINT_SPARSE_VECTOR: u8 = 0x23;
38
39/// Key prefix for multi-vectors.
40pub const PREFIX_POINT_MULTI_VECTOR: u8 = 0x24;
41
42/// Compute a hash for a name using FNV-1a.
43///
44/// This is the same hash function used for embedding names.
45#[must_use]
46pub fn hash_name(name: &str) -> u64 {
47    const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
48    const FNV_PRIME: u64 = 0x0100_0000_01b3;
49
50    let mut hash = FNV_OFFSET;
51    for byte in name.as_bytes() {
52        hash ^= u64::from(*byte);
53        hash = hash.wrapping_mul(FNV_PRIME);
54    }
55    hash
56}
57
58// ============================================================================
59// Collection keys
60// ============================================================================
61
62/// Encode a key for collection metadata.
63///
64/// Key format: `[PREFIX_COLLECTION][collection_name_hash]`
65#[must_use]
66pub fn encode_collection_key(collection_name: &str) -> Vec<u8> {
67    let mut key = Vec::with_capacity(9);
68    key.push(PREFIX_COLLECTION);
69    key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
70    key
71}
72
73/// Encode a prefix for scanning all collections.
74#[must_use]
75pub fn encode_collection_prefix() -> Vec<u8> {
76    vec![PREFIX_COLLECTION]
77}
78
79/// A decoded collection key.
80#[derive(Debug, Clone, Copy, PartialEq, Eq)]
81pub struct CollectionKey {
82    /// The hash of the collection name.
83    pub name_hash: u64,
84}
85
86/// Decode a collection key.
87///
88/// Returns `None` if the key doesn't have the correct format.
89#[must_use]
90pub fn decode_collection_key(key: &[u8]) -> Option<CollectionKey> {
91    if key.len() != 9 || key[0] != PREFIX_COLLECTION {
92        return None;
93    }
94    let name_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
95    Some(CollectionKey { name_hash: u64::from_be_bytes(name_hash_bytes) })
96}
97
98// ============================================================================
99// Point payload keys
100// ============================================================================
101
102/// Encode a key for a point's payload.
103///
104/// Key format: `[PREFIX_POINT_PAYLOAD][collection_name_hash][point_id]`
105#[must_use]
106pub fn encode_point_payload_key(collection_name: &str, point_id: PointId) -> Vec<u8> {
107    let mut key = Vec::with_capacity(17);
108    key.push(PREFIX_POINT_PAYLOAD);
109    key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
110    key.extend_from_slice(&point_id.as_u64().to_be_bytes());
111    key
112}
113
114/// Encode a prefix for scanning all points in a collection.
115#[must_use]
116pub fn encode_point_payload_prefix(collection_name: &str) -> Vec<u8> {
117    let mut key = Vec::with_capacity(9);
118    key.push(PREFIX_POINT_PAYLOAD);
119    key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
120    key
121}
122
123/// A decoded point payload key.
124#[derive(Debug, Clone, Copy, PartialEq, Eq)]
125pub struct PointPayloadKey {
126    /// The hash of the collection name.
127    pub collection_name_hash: u64,
128    /// The point ID.
129    pub point_id: PointId,
130}
131
132/// Decode a point payload key.
133///
134/// Returns `None` if the key doesn't have the correct format.
135#[must_use]
136pub fn decode_point_payload_key(key: &[u8]) -> Option<PointPayloadKey> {
137    if key.len() != 17 || key[0] != PREFIX_POINT_PAYLOAD {
138        return None;
139    }
140    let name_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
141    let point_id_bytes: [u8; 8] = key[9..17].try_into().ok()?;
142    Some(PointPayloadKey {
143        collection_name_hash: u64::from_be_bytes(name_hash_bytes),
144        point_id: PointId::new(u64::from_be_bytes(point_id_bytes)),
145    })
146}
147
148/// Decode just the point ID from a point payload key.
149#[must_use]
150pub fn decode_point_payload_point_id(key: &[u8]) -> Option<PointId> {
151    decode_point_payload_key(key).map(|k| k.point_id)
152}
153
154// ============================================================================
155// Dense vector keys
156// ============================================================================
157
158/// Encode a key for a point's dense vector.
159///
160/// Key format: `[PREFIX_POINT_DENSE_VECTOR][collection_name_hash][point_id][vector_name_hash]`
161#[must_use]
162pub fn encode_dense_vector_key(
163    collection_name: &str,
164    point_id: PointId,
165    vector_name: &str,
166) -> Vec<u8> {
167    let mut key = Vec::with_capacity(25);
168    key.push(PREFIX_POINT_DENSE_VECTOR);
169    key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
170    key.extend_from_slice(&point_id.as_u64().to_be_bytes());
171    key.extend_from_slice(&hash_name(vector_name).to_be_bytes());
172    key
173}
174
175/// Encode a prefix for scanning all dense vectors for a point.
176#[must_use]
177pub fn encode_dense_vector_point_prefix(collection_name: &str, point_id: PointId) -> Vec<u8> {
178    let mut key = Vec::with_capacity(17);
179    key.push(PREFIX_POINT_DENSE_VECTOR);
180    key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
181    key.extend_from_slice(&point_id.as_u64().to_be_bytes());
182    key
183}
184
185/// Encode a prefix for scanning all dense vectors in a collection.
186#[must_use]
187pub fn encode_dense_vector_collection_prefix(collection_name: &str) -> Vec<u8> {
188    let mut key = Vec::with_capacity(9);
189    key.push(PREFIX_POINT_DENSE_VECTOR);
190    key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
191    key
192}
193
194/// A decoded dense vector key.
195#[derive(Debug, Clone, Copy, PartialEq, Eq)]
196pub struct DenseVectorKey {
197    /// The hash of the collection name.
198    pub collection_name_hash: u64,
199    /// The point ID.
200    pub point_id: PointId,
201    /// The hash of the vector name.
202    pub vector_name_hash: u64,
203}
204
205/// Decode a dense vector key.
206///
207/// Returns `None` if the key doesn't have the correct format.
208#[must_use]
209pub fn decode_dense_vector_key(key: &[u8]) -> Option<DenseVectorKey> {
210    if key.len() != 25 || key[0] != PREFIX_POINT_DENSE_VECTOR {
211        return None;
212    }
213    let name_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
214    let point_id_bytes: [u8; 8] = key[9..17].try_into().ok()?;
215    let vector_name_hash_bytes: [u8; 8] = key[17..25].try_into().ok()?;
216    Some(DenseVectorKey {
217        collection_name_hash: u64::from_be_bytes(name_hash_bytes),
218        point_id: PointId::new(u64::from_be_bytes(point_id_bytes)),
219        vector_name_hash: u64::from_be_bytes(vector_name_hash_bytes),
220    })
221}
222
223// ============================================================================
224// Sparse vector keys
225// ============================================================================
226
227/// Encode a key for a point's sparse vector.
228///
229/// Key format: `[PREFIX_POINT_SPARSE_VECTOR][collection_name_hash][point_id][vector_name_hash]`
230#[must_use]
231pub fn encode_sparse_vector_key(
232    collection_name: &str,
233    point_id: PointId,
234    vector_name: &str,
235) -> Vec<u8> {
236    let mut key = Vec::with_capacity(25);
237    key.push(PREFIX_POINT_SPARSE_VECTOR);
238    key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
239    key.extend_from_slice(&point_id.as_u64().to_be_bytes());
240    key.extend_from_slice(&hash_name(vector_name).to_be_bytes());
241    key
242}
243
244/// Encode a prefix for scanning all sparse vectors for a point.
245#[must_use]
246pub fn encode_sparse_vector_point_prefix(collection_name: &str, point_id: PointId) -> Vec<u8> {
247    let mut key = Vec::with_capacity(17);
248    key.push(PREFIX_POINT_SPARSE_VECTOR);
249    key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
250    key.extend_from_slice(&point_id.as_u64().to_be_bytes());
251    key
252}
253
254/// Encode a prefix for scanning all sparse vectors in a collection.
255#[must_use]
256pub fn encode_sparse_vector_collection_prefix(collection_name: &str) -> Vec<u8> {
257    let mut key = Vec::with_capacity(9);
258    key.push(PREFIX_POINT_SPARSE_VECTOR);
259    key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
260    key
261}
262
263/// A decoded sparse vector key.
264#[derive(Debug, Clone, Copy, PartialEq, Eq)]
265pub struct SparseVectorKey {
266    /// The hash of the collection name.
267    pub collection_name_hash: u64,
268    /// The point ID.
269    pub point_id: PointId,
270    /// The hash of the vector name.
271    pub vector_name_hash: u64,
272}
273
274/// Decode a sparse vector key.
275///
276/// Returns `None` if the key doesn't have the correct format.
277#[must_use]
278pub fn decode_sparse_vector_key(key: &[u8]) -> Option<SparseVectorKey> {
279    if key.len() != 25 || key[0] != PREFIX_POINT_SPARSE_VECTOR {
280        return None;
281    }
282    let name_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
283    let point_id_bytes: [u8; 8] = key[9..17].try_into().ok()?;
284    let vector_name_hash_bytes: [u8; 8] = key[17..25].try_into().ok()?;
285    Some(SparseVectorKey {
286        collection_name_hash: u64::from_be_bytes(name_hash_bytes),
287        point_id: PointId::new(u64::from_be_bytes(point_id_bytes)),
288        vector_name_hash: u64::from_be_bytes(vector_name_hash_bytes),
289    })
290}
291
292// ============================================================================
293// Multi-vector keys
294// ============================================================================
295
296/// Encode a key for a point's multi-vector.
297///
298/// Key format: `[PREFIX_POINT_MULTI_VECTOR][collection_name_hash][point_id][vector_name_hash]`
299#[must_use]
300pub fn encode_multi_vector_key(
301    collection_name: &str,
302    point_id: PointId,
303    vector_name: &str,
304) -> Vec<u8> {
305    let mut key = Vec::with_capacity(25);
306    key.push(PREFIX_POINT_MULTI_VECTOR);
307    key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
308    key.extend_from_slice(&point_id.as_u64().to_be_bytes());
309    key.extend_from_slice(&hash_name(vector_name).to_be_bytes());
310    key
311}
312
313/// Encode a prefix for scanning all multi-vectors for a point.
314#[must_use]
315pub fn encode_multi_vector_point_prefix(collection_name: &str, point_id: PointId) -> Vec<u8> {
316    let mut key = Vec::with_capacity(17);
317    key.push(PREFIX_POINT_MULTI_VECTOR);
318    key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
319    key.extend_from_slice(&point_id.as_u64().to_be_bytes());
320    key
321}
322
323/// Encode a prefix for scanning all multi-vectors in a collection.
324#[must_use]
325pub fn encode_multi_vector_collection_prefix(collection_name: &str) -> Vec<u8> {
326    let mut key = Vec::with_capacity(9);
327    key.push(PREFIX_POINT_MULTI_VECTOR);
328    key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
329    key
330}
331
332/// A decoded multi-vector key.
333#[derive(Debug, Clone, Copy, PartialEq, Eq)]
334pub struct MultiVectorKey {
335    /// The hash of the collection name.
336    pub collection_name_hash: u64,
337    /// The point ID.
338    pub point_id: PointId,
339    /// The hash of the vector name.
340    pub vector_name_hash: u64,
341}
342
343/// Decode a multi-vector key.
344///
345/// Returns `None` if the key doesn't have the correct format.
346#[must_use]
347pub fn decode_multi_vector_key(key: &[u8]) -> Option<MultiVectorKey> {
348    if key.len() != 25 || key[0] != PREFIX_POINT_MULTI_VECTOR {
349        return None;
350    }
351    let name_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
352    let point_id_bytes: [u8; 8] = key[9..17].try_into().ok()?;
353    let vector_name_hash_bytes: [u8; 8] = key[17..25].try_into().ok()?;
354    Some(MultiVectorKey {
355        collection_name_hash: u64::from_be_bytes(name_hash_bytes),
356        point_id: PointId::new(u64::from_be_bytes(point_id_bytes)),
357        vector_name_hash: u64::from_be_bytes(vector_name_hash_bytes),
358    })
359}
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364
365    #[test]
366    fn collection_key_roundtrip() {
367        let key = encode_collection_key("documents");
368        assert_eq!(key.len(), 9);
369        assert_eq!(key[0], PREFIX_COLLECTION);
370
371        let decoded = decode_collection_key(&key).unwrap();
372        assert_eq!(decoded.name_hash, hash_name("documents"));
373    }
374
375    #[test]
376    fn point_payload_key_roundtrip() {
377        let collection = "documents";
378        let point_id = PointId::new(42);
379        let key = encode_point_payload_key(collection, point_id);
380
381        assert_eq!(key.len(), 17);
382        assert_eq!(key[0], PREFIX_POINT_PAYLOAD);
383
384        let decoded = decode_point_payload_key(&key).unwrap();
385        assert_eq!(decoded.collection_name_hash, hash_name(collection));
386        assert_eq!(decoded.point_id, point_id);
387    }
388
389    #[test]
390    fn dense_vector_key_roundtrip() {
391        let collection = "documents";
392        let point_id = PointId::new(123);
393        let vector_name = "text_embedding";
394        let key = encode_dense_vector_key(collection, point_id, vector_name);
395
396        assert_eq!(key.len(), 25);
397        assert_eq!(key[0], PREFIX_POINT_DENSE_VECTOR);
398
399        let decoded = decode_dense_vector_key(&key).unwrap();
400        assert_eq!(decoded.collection_name_hash, hash_name(collection));
401        assert_eq!(decoded.point_id, point_id);
402        assert_eq!(decoded.vector_name_hash, hash_name(vector_name));
403    }
404
405    #[test]
406    fn sparse_vector_key_roundtrip() {
407        let collection = "documents";
408        let point_id = PointId::new(456);
409        let vector_name = "sparse_embedding";
410        let key = encode_sparse_vector_key(collection, point_id, vector_name);
411
412        assert_eq!(key.len(), 25);
413        assert_eq!(key[0], PREFIX_POINT_SPARSE_VECTOR);
414
415        let decoded = decode_sparse_vector_key(&key).unwrap();
416        assert_eq!(decoded.collection_name_hash, hash_name(collection));
417        assert_eq!(decoded.point_id, point_id);
418        assert_eq!(decoded.vector_name_hash, hash_name(vector_name));
419    }
420
421    #[test]
422    fn multi_vector_key_roundtrip() {
423        let collection = "documents";
424        let point_id = PointId::new(789);
425        let vector_name = "colbert";
426        let key = encode_multi_vector_key(collection, point_id, vector_name);
427
428        assert_eq!(key.len(), 25);
429        assert_eq!(key[0], PREFIX_POINT_MULTI_VECTOR);
430
431        let decoded = decode_multi_vector_key(&key).unwrap();
432        assert_eq!(decoded.collection_name_hash, hash_name(collection));
433        assert_eq!(decoded.point_id, point_id);
434        assert_eq!(decoded.vector_name_hash, hash_name(vector_name));
435    }
436
437    #[test]
438    fn keys_ordered_within_collection() {
439        let collection = "test";
440
441        let key1 = encode_point_payload_key(collection, PointId::new(1));
442        let key2 = encode_point_payload_key(collection, PointId::new(2));
443        let key3 = encode_point_payload_key(collection, PointId::new(100));
444
445        // Points should be ordered by ID within a collection
446        assert!(key1 < key2);
447        assert!(key2 < key3);
448    }
449
450    #[test]
451    fn prefix_matches_keys() {
452        let collection = "documents";
453        let prefix = encode_point_payload_prefix(collection);
454
455        let key1 = encode_point_payload_key(collection, PointId::new(1));
456        let key2 = encode_point_payload_key(collection, PointId::new(1000));
457
458        // All keys in collection should start with the prefix
459        assert!(key1.starts_with(&prefix));
460        assert!(key2.starts_with(&prefix));
461
462        // Different collection should not match
463        let key_other = encode_point_payload_key("other_collection", PointId::new(1));
464        assert!(!key_other.starts_with(&prefix));
465    }
466
467    #[test]
468    fn vector_key_prefix_matches() {
469        let collection = "docs";
470        let point_id = PointId::new(42);
471
472        let prefix = encode_dense_vector_point_prefix(collection, point_id);
473        let key1 = encode_dense_vector_key(collection, point_id, "embedding1");
474        let key2 = encode_dense_vector_key(collection, point_id, "embedding2");
475
476        assert!(key1.starts_with(&prefix));
477        assert!(key2.starts_with(&prefix));
478
479        // Different point should not match
480        let key_other = encode_dense_vector_key(collection, PointId::new(99), "embedding1");
481        assert!(!key_other.starts_with(&prefix));
482    }
483
484    #[test]
485    fn decode_invalid_keys() {
486        // Wrong prefix
487        assert!(decode_collection_key(&[PREFIX_POINT_PAYLOAD; 9]).is_none());
488        assert!(decode_point_payload_key(&[PREFIX_COLLECTION; 17]).is_none());
489        assert!(decode_dense_vector_key(&[PREFIX_POINT_SPARSE_VECTOR; 25]).is_none());
490
491        // Wrong length
492        assert!(decode_collection_key(&[PREFIX_COLLECTION; 5]).is_none());
493        assert!(decode_point_payload_key(&[PREFIX_POINT_PAYLOAD; 10]).is_none());
494        assert!(decode_dense_vector_key(&[PREFIX_POINT_DENSE_VECTOR; 20]).is_none());
495
496        // Empty
497        assert!(decode_collection_key(&[]).is_none());
498        assert!(decode_point_payload_key(&[]).is_none());
499        assert!(decode_dense_vector_key(&[]).is_none());
500    }
501}