manifoldb_vector/encoding/
collection_vector_keys.rs

1//! Key encoding for collection vector storage.
2//!
3//! This module provides key encoding for vectors stored separately from entities.
4//! The key format enables efficient access patterns:
5//!
6//! - Get a specific named vector for an entity in a collection
7//! - Get all vectors for an entity in a collection
8//! - Delete all vectors for an entity (cascade delete)
9//!
10//! # Key Format
11//!
12//! ## Collection vectors
13//! - `0x40` - Collection vector: `[0x40][collection_id][entity_id][vector_name_hash]`
14//!
15//! All numeric values are encoded in big-endian format to preserve sort order.
16
17use manifoldb_core::{CollectionId, EntityId};
18
19use super::hash_name;
20
21/// Key prefix for collection vectors.
22pub const PREFIX_COLLECTION_VECTOR: u8 = 0x40;
23
24/// Encode a key for a collection vector.
25///
26/// Key format: `[PREFIX_COLLECTION_VECTOR][collection_id][entity_id][vector_name_hash]`
27///
28/// This format enables:
29/// - Fast lookup of a specific vector by (collection, entity, name)
30/// - Prefix scan for all vectors of an entity within a collection
31/// - Ordered iteration within each collection
32#[must_use]
33pub fn encode_collection_vector_key(
34    collection_id: CollectionId,
35    entity_id: EntityId,
36    vector_name: &str,
37) -> Vec<u8> {
38    let mut key = Vec::with_capacity(25); // 1 + 8 + 8 + 8
39    key.push(PREFIX_COLLECTION_VECTOR);
40    key.extend_from_slice(&collection_id.as_u64().to_be_bytes());
41    key.extend_from_slice(&entity_id.as_u64().to_be_bytes());
42    key.extend_from_slice(&hash_name(vector_name).to_be_bytes());
43    key
44}
45
46/// Encode a prefix for all vectors of an entity within a collection.
47///
48/// This prefix can be used for range scans to get all vectors for an entity.
49#[must_use]
50pub fn encode_entity_vector_prefix(collection_id: CollectionId, entity_id: EntityId) -> Vec<u8> {
51    let mut key = Vec::with_capacity(17); // 1 + 8 + 8
52    key.push(PREFIX_COLLECTION_VECTOR);
53    key.extend_from_slice(&collection_id.as_u64().to_be_bytes());
54    key.extend_from_slice(&entity_id.as_u64().to_be_bytes());
55    key
56}
57
58/// Encode a prefix for all vectors in a collection.
59///
60/// This prefix can be used for range scans to iterate all vectors in a collection.
61#[must_use]
62pub fn encode_collection_vector_prefix(collection_id: CollectionId) -> Vec<u8> {
63    let mut key = Vec::with_capacity(9); // 1 + 8
64    key.push(PREFIX_COLLECTION_VECTOR);
65    key.extend_from_slice(&collection_id.as_u64().to_be_bytes());
66    key
67}
68
69/// A decoded collection vector key.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub struct CollectionVectorKey {
72    /// The collection ID.
73    pub collection_id: CollectionId,
74    /// The entity ID.
75    pub entity_id: EntityId,
76    /// The hash of the vector name.
77    pub vector_name_hash: u64,
78}
79
80/// Decode a collection vector key.
81///
82/// Returns `None` if the key doesn't have the correct format.
83#[must_use]
84pub fn decode_collection_vector_key(key: &[u8]) -> Option<CollectionVectorKey> {
85    if key.len() != 25 || key[0] != PREFIX_COLLECTION_VECTOR {
86        return None;
87    }
88
89    let collection_id_bytes: [u8; 8] = key[1..9].try_into().ok()?;
90    let entity_id_bytes: [u8; 8] = key[9..17].try_into().ok()?;
91    let name_hash_bytes: [u8; 8] = key[17..25].try_into().ok()?;
92
93    Some(CollectionVectorKey {
94        collection_id: CollectionId::new(u64::from_be_bytes(collection_id_bytes)),
95        entity_id: EntityId::new(u64::from_be_bytes(entity_id_bytes)),
96        vector_name_hash: u64::from_be_bytes(name_hash_bytes),
97    })
98}
99
100/// Decode the entity ID from a collection vector key.
101///
102/// Returns `None` if the key doesn't have the correct format.
103#[must_use]
104pub fn decode_collection_vector_entity_id(key: &[u8]) -> Option<EntityId> {
105    decode_collection_vector_key(key).map(|k| k.entity_id)
106}
107
108#[cfg(test)]
109mod tests {
110    use super::*;
111
112    #[test]
113    fn test_encode_collection_vector_key() {
114        let collection_id = CollectionId::new(1);
115        let entity_id = EntityId::new(42);
116        let key = encode_collection_vector_key(collection_id, entity_id, "text_embedding");
117
118        assert_eq!(key.len(), 25);
119        assert_eq!(key[0], PREFIX_COLLECTION_VECTOR);
120    }
121
122    #[test]
123    fn test_decode_collection_vector_key() {
124        let collection_id = CollectionId::new(1);
125        let entity_id = EntityId::new(42);
126        let vector_name = "text_embedding";
127
128        let key = encode_collection_vector_key(collection_id, entity_id, vector_name);
129        let decoded = decode_collection_vector_key(&key).unwrap();
130
131        assert_eq!(decoded.collection_id, collection_id);
132        assert_eq!(decoded.entity_id, entity_id);
133        assert_eq!(decoded.vector_name_hash, hash_name(vector_name));
134    }
135
136    #[test]
137    fn test_entity_vector_prefix() {
138        let collection_id = CollectionId::new(1);
139        let entity_id = EntityId::new(42);
140
141        let prefix = encode_entity_vector_prefix(collection_id, entity_id);
142        let key1 = encode_collection_vector_key(collection_id, entity_id, "text");
143        let key2 = encode_collection_vector_key(collection_id, entity_id, "image");
144        let key3 = encode_collection_vector_key(collection_id, EntityId::new(43), "text");
145
146        assert!(key1.starts_with(&prefix));
147        assert!(key2.starts_with(&prefix));
148        assert!(!key3.starts_with(&prefix));
149    }
150
151    #[test]
152    fn test_collection_vector_prefix() {
153        let collection_id = CollectionId::new(1);
154        let collection_id_2 = CollectionId::new(2);
155
156        let prefix = encode_collection_vector_prefix(collection_id);
157        let key1 = encode_collection_vector_key(collection_id, EntityId::new(1), "text");
158        let key2 = encode_collection_vector_key(collection_id, EntityId::new(2), "image");
159        let key3 = encode_collection_vector_key(collection_id_2, EntityId::new(1), "text");
160
161        assert!(key1.starts_with(&prefix));
162        assert!(key2.starts_with(&prefix));
163        assert!(!key3.starts_with(&prefix));
164    }
165
166    #[test]
167    fn test_key_ordering() {
168        let coll = CollectionId::new(1);
169
170        // Keys should be ordered by entity ID, then by vector name hash
171        let key1 = encode_collection_vector_key(coll, EntityId::new(1), "a");
172        let key2 = encode_collection_vector_key(coll, EntityId::new(2), "a");
173
174        // Different entities should be ordered
175        assert!(key1 < key2);
176
177        // Same entity, different vector names are grouped together under entity prefix
178        let key3 = encode_collection_vector_key(coll, EntityId::new(1), "b");
179        let prefix = encode_entity_vector_prefix(coll, EntityId::new(1));
180        assert!(key1.starts_with(&prefix));
181        assert!(key3.starts_with(&prefix));
182    }
183
184    #[test]
185    fn test_decode_invalid_key() {
186        // Wrong length
187        assert!(decode_collection_vector_key(&[PREFIX_COLLECTION_VECTOR; 10]).is_none());
188
189        // Wrong prefix
190        assert!(decode_collection_vector_key(&[0xFF; 25]).is_none());
191
192        // Empty
193        assert!(decode_collection_vector_key(&[]).is_none());
194    }
195}