Skip to main content

vector/serde/
key.rs

1//! Key encoding/decoding for vector database records.
2//!
3//! All keys use big-endian encoding for lexicographic ordering.
4
5use super::{EncodingError, FieldValue, KEY_VERSION, RecordKey, RecordType, record_type_from_tag};
6use bytes::{BufMut, Bytes, BytesMut};
7use common::BytesRange;
8use common::serde::key_prefix::KeyPrefix;
9use common::serde::terminated_bytes;
10
11/// CollectionMeta key - singleton record storing collection schema.
12///
13/// Key layout: `[version | tag]` (2 bytes)
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct CollectionMetaKey;
16
17impl RecordKey for CollectionMetaKey {
18    const RECORD_TYPE: RecordType = RecordType::CollectionMeta;
19}
20
21impl CollectionMetaKey {
22    pub fn encode(&self) -> Bytes {
23        let mut buf = BytesMut::with_capacity(2);
24        Self::RECORD_TYPE.prefix().write_to(&mut buf);
25        buf.freeze()
26    }
27
28    pub fn decode(buf: &[u8]) -> Result<Self, EncodingError> {
29        if buf.len() < 2 {
30            return Err(EncodingError {
31                message: "Buffer too short for CollectionMetaKey".to_string(),
32            });
33        }
34        validate_key_prefix::<Self>(buf)?;
35        Ok(CollectionMetaKey)
36    }
37}
38
39/// Deletions key - singleton record storing deleted vector IDs bitmap.
40///
41/// Key layout: `[version | tag]` (2 bytes)
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct DeletionsKey;
44
45impl RecordKey for DeletionsKey {
46    const RECORD_TYPE: RecordType = RecordType::Deletions;
47}
48
49impl DeletionsKey {
50    pub fn new() -> Self {
51        Self
52    }
53
54    pub fn encode(&self) -> Bytes {
55        let mut buf = BytesMut::with_capacity(2);
56        Self::RECORD_TYPE.prefix().write_to(&mut buf);
57        buf.freeze()
58    }
59
60    pub fn decode(buf: &[u8]) -> Result<Self, EncodingError> {
61        if buf.len() < 2 {
62            return Err(EncodingError {
63                message: "Buffer too short for DeletionsKey".to_string(),
64            });
65        }
66        validate_key_prefix::<Self>(buf)?;
67        Ok(DeletionsKey)
68    }
69}
70
71impl Default for DeletionsKey {
72    fn default() -> Self {
73        Self::new()
74    }
75}
76
77/// CentroidChunk key - stores a chunk of cluster centroids.
78///
79/// Key layout: `[version | tag | chunk_id:u32-BE]` (6 bytes)
80#[derive(Debug, Clone, PartialEq, Eq)]
81pub struct CentroidChunkKey {
82    pub chunk_id: u32,
83}
84
85impl RecordKey for CentroidChunkKey {
86    const RECORD_TYPE: RecordType = RecordType::CentroidChunk;
87}
88
89impl CentroidChunkKey {
90    pub fn new(chunk_id: u32) -> Self {
91        Self { chunk_id }
92    }
93
94    pub fn encode(&self) -> Bytes {
95        let mut buf = BytesMut::with_capacity(6);
96        Self::RECORD_TYPE.prefix().write_to(&mut buf);
97        buf.put_u32(self.chunk_id); // Big-endian
98        buf.freeze()
99    }
100
101    pub fn decode(buf: &[u8]) -> Result<Self, EncodingError> {
102        if buf.len() < 6 {
103            return Err(EncodingError {
104                message: "Buffer too short for CentroidChunkKey".to_string(),
105            });
106        }
107        validate_key_prefix::<Self>(buf)?;
108        let chunk_id = u32::from_be_bytes([buf[2], buf[3], buf[4], buf[5]]);
109        Ok(CentroidChunkKey { chunk_id })
110    }
111
112    /// Returns a range covering all centroid chunk keys.
113    pub fn all_chunks_range() -> BytesRange {
114        let mut buf = BytesMut::with_capacity(2);
115        Self::RECORD_TYPE.prefix().write_to(&mut buf);
116        BytesRange::prefix(buf.freeze())
117    }
118}
119
120/// PostingList key - maps centroid ID to vector IDs.
121///
122/// Key layout: `[version | tag | centroid_id:u64-BE]` (10 bytes)
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct PostingListKey {
125    pub centroid_id: u64,
126}
127
128impl RecordKey for PostingListKey {
129    const RECORD_TYPE: RecordType = RecordType::PostingList;
130}
131
132impl PostingListKey {
133    pub fn new(centroid_id: u64) -> Self {
134        Self { centroid_id }
135    }
136
137    pub fn encode(&self) -> Bytes {
138        let mut buf = BytesMut::with_capacity(10);
139        Self::RECORD_TYPE.prefix().write_to(&mut buf);
140        buf.put_u64(self.centroid_id); // Big-endian
141        buf.freeze()
142    }
143
144    pub fn decode(buf: &[u8]) -> Result<Self, EncodingError> {
145        if buf.len() < 10 {
146            return Err(EncodingError {
147                message: "Buffer too short for PostingListKey".to_string(),
148            });
149        }
150        validate_key_prefix::<Self>(buf)?;
151        let centroid_id = u64::from_be_bytes([
152            buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9],
153        ]);
154        Ok(PostingListKey { centroid_id })
155    }
156
157    /// Returns a range covering all posting list keys.
158    pub fn all_posting_lists_range() -> BytesRange {
159        let mut buf = BytesMut::with_capacity(2);
160        Self::RECORD_TYPE.prefix().write_to(&mut buf);
161        BytesRange::prefix(buf.freeze())
162    }
163}
164
165/// IdDictionary key - maps external string IDs to internal u64 vector IDs.
166///
167/// Key layout: `[version | tag | external_id:TerminatedBytes]` (variable)
168#[derive(Debug, Clone, PartialEq, Eq)]
169pub struct IdDictionaryKey {
170    pub external_id: String,
171}
172
173impl RecordKey for IdDictionaryKey {
174    const RECORD_TYPE: RecordType = RecordType::IdDictionary;
175}
176
177impl IdDictionaryKey {
178    pub fn new(external_id: impl Into<String>) -> Self {
179        Self {
180            external_id: external_id.into(),
181        }
182    }
183
184    pub fn encode(&self) -> Bytes {
185        let mut buf = BytesMut::new();
186        Self::RECORD_TYPE.prefix().write_to(&mut buf);
187        terminated_bytes::serialize(self.external_id.as_bytes(), &mut buf);
188        buf.freeze()
189    }
190
191    pub fn decode(buf: &[u8]) -> Result<Self, EncodingError> {
192        if buf.len() < 3 {
193            // At minimum: version + tag + terminator
194            return Err(EncodingError {
195                message: "Buffer too short for IdDictionaryKey".to_string(),
196            });
197        }
198        validate_key_prefix::<Self>(buf)?;
199
200        let mut slice = &buf[2..];
201        let external_id_bytes =
202            terminated_bytes::deserialize(&mut slice).map_err(|e| EncodingError {
203                message: format!("Failed to decode external_id: {}", e),
204            })?;
205
206        let external_id =
207            String::from_utf8(external_id_bytes.to_vec()).map_err(|e| EncodingError {
208                message: format!("Invalid UTF-8 in external_id: {}", e),
209            })?;
210
211        Ok(IdDictionaryKey { external_id })
212    }
213
214    /// Returns a range covering all ID dictionary keys.
215    pub fn all_ids_range() -> BytesRange {
216        let mut buf = BytesMut::with_capacity(2);
217        Self::RECORD_TYPE.prefix().write_to(&mut buf);
218        BytesRange::prefix(buf.freeze())
219    }
220
221    /// Returns a range covering all IDs with the given prefix.
222    ///
223    /// Note: This creates a range over the serialized key format. The prefix
224    /// is serialized using TerminatedBytes encoding, so the range will include
225    /// all IDs that start with the given string prefix.
226    pub fn prefix_range(prefix: &str) -> BytesRange {
227        let mut buf = BytesMut::new();
228        Self::RECORD_TYPE.prefix().write_to(&mut buf);
229        terminated_bytes::serialize(prefix.as_bytes(), &mut buf);
230        BytesRange::prefix(buf.freeze())
231    }
232}
233
234/// VectorData key - stores raw vector bytes.
235///
236/// Key layout: `[version | tag | vector_id:u64-BE]` (10 bytes)
237#[derive(Debug, Clone, PartialEq, Eq)]
238pub struct VectorDataKey {
239    pub vector_id: u64,
240}
241
242impl RecordKey for VectorDataKey {
243    const RECORD_TYPE: RecordType = RecordType::VectorData;
244}
245
246impl VectorDataKey {
247    pub fn new(vector_id: u64) -> Self {
248        Self { vector_id }
249    }
250
251    pub fn encode(&self) -> Bytes {
252        let mut buf = BytesMut::with_capacity(10);
253        Self::RECORD_TYPE.prefix().write_to(&mut buf);
254        buf.put_u64(self.vector_id); // Big-endian
255        buf.freeze()
256    }
257
258    pub fn decode(buf: &[u8]) -> Result<Self, EncodingError> {
259        if buf.len() < 10 {
260            return Err(EncodingError {
261                message: "Buffer too short for VectorDataKey".to_string(),
262            });
263        }
264        validate_key_prefix::<Self>(buf)?;
265        let vector_id = u64::from_be_bytes([
266            buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9],
267        ]);
268        Ok(VectorDataKey { vector_id })
269    }
270
271    /// Returns a range covering all vector data keys.
272    pub fn all_vectors_range() -> BytesRange {
273        let mut buf = BytesMut::with_capacity(2);
274        Self::RECORD_TYPE.prefix().write_to(&mut buf);
275        BytesRange::prefix(buf.freeze())
276    }
277}
278
279/// MetadataIndex key - inverted index mapping metadata values to vector IDs.
280///
281/// Key layout: `[version | tag | field:TerminatedBytes | value:FieldValue]` (variable)
282#[derive(Debug, Clone, PartialEq)]
283pub struct MetadataIndexKey {
284    pub field: String,
285    pub value: FieldValue,
286}
287
288impl RecordKey for MetadataIndexKey {
289    const RECORD_TYPE: RecordType = RecordType::MetadataIndex;
290}
291
292impl MetadataIndexKey {
293    pub fn new(field: impl Into<String>, value: FieldValue) -> Self {
294        Self {
295            field: field.into(),
296            value,
297        }
298    }
299
300    pub fn encode(&self) -> Bytes {
301        let mut buf = BytesMut::new();
302        Self::RECORD_TYPE.prefix().write_to(&mut buf);
303        terminated_bytes::serialize(self.field.as_bytes(), &mut buf);
304        self.value.encode_sortable(&mut buf);
305        buf.freeze()
306    }
307
308    pub fn decode(buf: &[u8]) -> Result<Self, EncodingError> {
309        if buf.len() < 4 {
310            // Minimum: version + tag + field terminator + value type
311            return Err(EncodingError {
312                message: "Buffer too short for MetadataIndexKey".to_string(),
313            });
314        }
315        validate_key_prefix::<Self>(buf)?;
316
317        let mut slice = &buf[2..];
318
319        let field_bytes = terminated_bytes::deserialize(&mut slice).map_err(|e| EncodingError {
320            message: format!("Failed to decode field: {}", e),
321        })?;
322
323        let field = String::from_utf8(field_bytes.to_vec()).map_err(|e| EncodingError {
324            message: format!("Invalid UTF-8 in field: {}", e),
325        })?;
326
327        let value = FieldValue::decode_sortable(&mut slice)?;
328
329        Ok(MetadataIndexKey { field, value })
330    }
331
332    /// Returns a range covering all metadata index keys for a specific field.
333    pub fn field_range(field: &str) -> BytesRange {
334        let mut buf = BytesMut::new();
335        Self::RECORD_TYPE.prefix().write_to(&mut buf);
336        terminated_bytes::serialize(field.as_bytes(), &mut buf);
337        BytesRange::prefix(buf.freeze())
338    }
339
340    /// Returns a range covering all metadata index keys.
341    pub fn all_indexes_range() -> BytesRange {
342        let mut buf = BytesMut::with_capacity(2);
343        Self::RECORD_TYPE.prefix().write_to(&mut buf);
344        BytesRange::prefix(buf.freeze())
345    }
346}
347
348/// SeqBlock key - singleton record storing sequence allocation state.
349///
350/// Key layout: `[version | tag]` (2 bytes)
351#[derive(Debug, Clone, PartialEq, Eq)]
352pub struct SeqBlockKey;
353
354impl RecordKey for SeqBlockKey {
355    const RECORD_TYPE: RecordType = RecordType::SeqBlock;
356}
357
358impl SeqBlockKey {
359    pub fn encode(&self) -> Bytes {
360        let mut buf = BytesMut::with_capacity(2);
361        Self::RECORD_TYPE.prefix().write_to(&mut buf);
362        buf.freeze()
363    }
364
365    pub fn decode(buf: &[u8]) -> Result<Self, EncodingError> {
366        if buf.len() < 2 {
367            return Err(EncodingError {
368                message: "Buffer too short for SeqBlockKey".to_string(),
369            });
370        }
371        validate_key_prefix::<Self>(buf)?;
372        Ok(SeqBlockKey)
373    }
374}
375
376/// CentroidStats key - per-centroid vector count for rebalance triggers.
377///
378/// Key layout: `[version | tag | centroid_id:u64-BE]` (10 bytes)
379#[derive(Debug, Clone, PartialEq, Eq)]
380pub struct CentroidStatsKey {
381    pub centroid_id: u64,
382}
383
384impl RecordKey for CentroidStatsKey {
385    const RECORD_TYPE: RecordType = RecordType::CentroidStats;
386}
387
388impl CentroidStatsKey {
389    pub fn new(centroid_id: u64) -> Self {
390        Self { centroid_id }
391    }
392
393    pub fn encode(&self) -> Bytes {
394        let mut buf = BytesMut::with_capacity(10);
395        Self::RECORD_TYPE.prefix().write_to(&mut buf);
396        buf.put_u64(self.centroid_id); // Big-endian
397        buf.freeze()
398    }
399
400    pub fn decode(buf: &[u8]) -> Result<Self, EncodingError> {
401        if buf.len() < 10 {
402            return Err(EncodingError {
403                message: "Buffer too short for CentroidStatsKey".to_string(),
404            });
405        }
406        validate_key_prefix::<Self>(buf)?;
407        let centroid_id = u64::from_be_bytes([
408            buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9],
409        ]);
410        Ok(CentroidStatsKey { centroid_id })
411    }
412}
413
414/// Validates the key prefix (version and record tag).
415fn validate_key_prefix<T: RecordKey>(buf: &[u8]) -> Result<(), EncodingError> {
416    let prefix = KeyPrefix::from_bytes_versioned(buf, KEY_VERSION)?;
417    let record_type = record_type_from_tag(prefix.tag())?;
418
419    if record_type != T::RECORD_TYPE {
420        return Err(EncodingError {
421            message: format!(
422                "Invalid record type: expected {:?}, got {:?}",
423                T::RECORD_TYPE,
424                record_type
425            ),
426        });
427    }
428
429    Ok(())
430}
431
432#[cfg(test)]
433mod tests {
434    use super::*;
435
436    #[test]
437    fn should_encode_and_decode_collection_meta_key() {
438        // given
439        let key = CollectionMetaKey;
440
441        // when
442        let encoded = key.encode();
443        let decoded = CollectionMetaKey::decode(&encoded).unwrap();
444
445        // then
446        assert_eq!(decoded, key);
447        assert_eq!(encoded.len(), 2);
448    }
449
450    #[test]
451    fn should_encode_and_decode_centroid_chunk_key() {
452        // given
453        let key = CentroidChunkKey::new(42);
454
455        // when
456        let encoded = key.encode();
457        let decoded = CentroidChunkKey::decode(&encoded).unwrap();
458
459        // then
460        assert_eq!(decoded, key);
461        assert_eq!(encoded.len(), 6);
462    }
463
464    #[test]
465    fn should_preserve_centroid_chunk_key_ordering() {
466        // given
467        let key1 = CentroidChunkKey::new(1);
468        let key2 = CentroidChunkKey::new(2);
469        let key3 = CentroidChunkKey::new(100);
470
471        // when
472        let encoded1 = key1.encode();
473        let encoded2 = key2.encode();
474        let encoded3 = key3.encode();
475
476        // then
477        assert!(encoded1 < encoded2);
478        assert!(encoded2 < encoded3);
479    }
480
481    #[test]
482    fn should_encode_and_decode_posting_list_key() {
483        // given
484        let key = PostingListKey::new(123);
485
486        // when
487        let encoded = key.encode();
488        let decoded = PostingListKey::decode(&encoded).unwrap();
489
490        // then
491        assert_eq!(decoded, key);
492    }
493
494    #[test]
495    fn should_encode_and_decode_deletions_key() {
496        // given
497        let key = DeletionsKey::new();
498
499        // when
500        let encoded = key.encode();
501        let decoded = DeletionsKey::decode(&encoded).unwrap();
502
503        // then
504        assert_eq!(decoded, key);
505    }
506
507    #[test]
508    fn should_encode_and_decode_id_dictionary_key() {
509        // given
510        let key = IdDictionaryKey::new("my-vector-id");
511
512        // when
513        let encoded = key.encode();
514        let decoded = IdDictionaryKey::decode(&encoded).unwrap();
515
516        // then
517        assert_eq!(decoded, key);
518    }
519
520    #[test]
521    fn should_preserve_id_dictionary_key_ordering() {
522        // given
523        let key1 = IdDictionaryKey::new("aaa");
524        let key2 = IdDictionaryKey::new("aab");
525        let key3 = IdDictionaryKey::new("bbb");
526
527        // when
528        let encoded1 = key1.encode();
529        let encoded2 = key2.encode();
530        let encoded3 = key3.encode();
531
532        // then
533        assert!(encoded1 < encoded2);
534        assert!(encoded2 < encoded3);
535    }
536
537    #[test]
538    fn should_encode_and_decode_vector_data_key() {
539        // given
540        let key = VectorDataKey::new(0xDEADBEEF_CAFEBABE);
541
542        // when
543        let encoded = key.encode();
544        let decoded = VectorDataKey::decode(&encoded).unwrap();
545
546        // then
547        assert_eq!(decoded, key);
548        assert_eq!(encoded.len(), 10);
549    }
550
551    #[test]
552    fn should_preserve_vector_data_key_ordering() {
553        // given
554        let key1 = VectorDataKey::new(1);
555        let key2 = VectorDataKey::new(2);
556        let key3 = VectorDataKey::new(u64::MAX);
557
558        // when
559        let encoded1 = key1.encode();
560        let encoded2 = key2.encode();
561        let encoded3 = key3.encode();
562
563        // then
564        assert!(encoded1 < encoded2);
565        assert!(encoded2 < encoded3);
566    }
567
568    #[test]
569    fn should_encode_and_decode_metadata_index_key_string() {
570        // given
571        let key = MetadataIndexKey::new("category", FieldValue::String("shoes".to_string()));
572
573        // when
574        let encoded = key.encode();
575        let decoded = MetadataIndexKey::decode(&encoded).unwrap();
576
577        // then
578        assert_eq!(decoded, key);
579    }
580
581    #[test]
582    fn should_encode_and_decode_metadata_index_key_int64() {
583        // given
584        let key = MetadataIndexKey::new("price", FieldValue::Int64(99));
585
586        // when
587        let encoded = key.encode();
588        let decoded = MetadataIndexKey::decode(&encoded).unwrap();
589
590        // then
591        assert_eq!(decoded, key);
592    }
593
594    #[test]
595    fn should_encode_and_decode_metadata_index_key_float64() {
596        // given
597        let key = MetadataIndexKey::new("score", FieldValue::Float64(1.23));
598
599        // when
600        let encoded = key.encode();
601        let decoded = MetadataIndexKey::decode(&encoded).unwrap();
602
603        // then
604        assert_eq!(decoded, key);
605    }
606
607    #[test]
608    fn should_encode_and_decode_metadata_index_key_bool() {
609        // given
610        let key = MetadataIndexKey::new("active", FieldValue::Bool(true));
611
612        // when
613        let encoded = key.encode();
614        let decoded = MetadataIndexKey::decode(&encoded).unwrap();
615
616        // then
617        assert_eq!(decoded, key);
618    }
619
620    #[test]
621    fn should_preserve_int64_ordering_in_metadata_index() {
622        // given
623        let key_neg = MetadataIndexKey::new("price", FieldValue::Int64(-100));
624        let key_zero = MetadataIndexKey::new("price", FieldValue::Int64(0));
625        let key_pos = MetadataIndexKey::new("price", FieldValue::Int64(100));
626
627        // when
628        let encoded_neg = key_neg.encode();
629        let encoded_zero = key_zero.encode();
630        let encoded_pos = key_pos.encode();
631
632        // then
633        assert!(encoded_neg < encoded_zero);
634        assert!(encoded_zero < encoded_pos);
635    }
636
637    #[test]
638    fn should_preserve_float64_ordering_in_metadata_index() {
639        // given
640        let key_neg = MetadataIndexKey::new("score", FieldValue::Float64(-1.0));
641        let key_zero = MetadataIndexKey::new("score", FieldValue::Float64(0.0));
642        let key_pos = MetadataIndexKey::new("score", FieldValue::Float64(1.0));
643
644        // when
645        let encoded_neg = key_neg.encode();
646        let encoded_zero = key_zero.encode();
647        let encoded_pos = key_pos.encode();
648
649        // then
650        assert!(encoded_neg < encoded_zero);
651        assert!(encoded_zero < encoded_pos);
652    }
653
654    #[test]
655    fn should_encode_and_decode_seq_block_key() {
656        // given
657        let key = SeqBlockKey;
658
659        // when
660        let encoded = key.encode();
661        let decoded = SeqBlockKey::decode(&encoded).unwrap();
662
663        // then
664        assert_eq!(decoded, key);
665        assert_eq!(encoded.len(), 2);
666    }
667
668    #[test]
669    fn should_reject_wrong_record_type() {
670        // given
671        let collection_meta_key = CollectionMetaKey;
672        let encoded = collection_meta_key.encode();
673
674        // when
675        let result = SeqBlockKey::decode(&encoded);
676
677        // then
678        assert!(result.is_err());
679        assert!(result.unwrap_err().message.contains("Invalid record type"));
680    }
681
682    #[test]
683    fn should_encode_and_decode_centroid_stats_key() {
684        // given
685        let key = CentroidStatsKey::new(42);
686
687        // when
688        let encoded = key.encode();
689        let decoded = CentroidStatsKey::decode(&encoded).unwrap();
690
691        // then
692        assert_eq!(decoded, key);
693        assert_eq!(encoded.len(), 10);
694    }
695
696    #[test]
697    fn should_preserve_centroid_stats_key_ordering() {
698        // given
699        let key1 = CentroidStatsKey::new(1);
700        let key2 = CentroidStatsKey::new(2);
701        let key3 = CentroidStatsKey::new(u64::MAX);
702
703        // when
704        let encoded1 = key1.encode();
705        let encoded2 = key2.encode();
706        let encoded3 = key3.encode();
707
708        // then
709        assert!(encoded1 < encoded2);
710        assert!(encoded2 < encoded3);
711    }
712
713    #[test]
714    fn should_reject_wrong_version() {
715        // given
716        let mut buf = BytesMut::new();
717        buf.put_u8(0x99); // Wrong version
718        buf.put_u8(RecordType::CollectionMeta.tag().as_byte());
719
720        // when
721        let result = CollectionMetaKey::decode(&buf);
722
723        // then
724        assert!(result.is_err());
725        assert!(result.unwrap_err().message.contains("version"));
726    }
727}