Skip to main content

kora_doc/
key.rs

1//! Binary key encoding and decoding for document and index records.
2//!
3//! Every persisted record in kora-doc is addressed by a compact binary key
4//! whose first byte is a [`KeyTag`] discriminant. The remaining bytes encode
5//! collection IDs, document IDs, field IDs, value hashes, or sequence numbers
6//! in little-endian order.
7//!
8//! ## Key Layouts
9//!
10//! | Tag | Layout | Size | Purpose |
11//! |-----|--------|------|---------|
12//! | `0x01` | `[tag][col:2][doc:4]` | 7 | Hot document |
13//! | `0x02`..`0x05`, `0x20` | `[tag][col:2]` | 3 | Collection metadata / schema / dictionary / registry / CDC head |
14//! | `0x06` | `[tag][col:2][doc:4]` | 7 | Cold-tier document |
15//! | `0x10`, `0x12`, `0x13` | `[tag][col:2][field:2][vhash:4]` | 10 | Hash / array / unique index bucket |
16//! | `0x11` | `[tag][col:2][field:2]` | 5 | Sorted index |
17//! | `0x14` | `[tag][col:2][f1:2][f2:2][vhash:4]` | 11 | Compound index bucket |
18//! | `0x21` | `[tag][col:2][seq:8]` | 11 | CDC event |
19//!
20//! All encode/decode functions are symmetric: encoding produces a fixed-size
21//! byte array, and decoding validates length and tag before extracting fields.
22//! Malformed keys surface as [`KeyDecodeError`].
23
24use thiserror::Error;
25
26use crate::registry::{CollectionId, DocId, FieldId};
27
28/// Key tags used in compact binary key encoding.
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30#[repr(u8)]
31pub enum KeyTag {
32    /// Packed document key.
33    Doc = 0x01,
34    /// Collection metadata key.
35    Collection = 0x02,
36    /// Collection schema key.
37    Schema = 0x03,
38    /// Collection dictionary key.
39    Dictionary = 0x04,
40    /// Collection registry key.
41    Registry = 0x05,
42    /// Cold pack key for tiered storage.
43    ColdDoc = 0x06,
44    /// Hash index bucket key.
45    HashIndex = 0x10,
46    /// Sorted index key.
47    SortedIndex = 0x11,
48    /// Array index bucket key.
49    ArrayIndex = 0x12,
50    /// Unique index bucket key.
51    UniqueIndex = 0x13,
52    /// Compound index bucket key.
53    CompoundIndex = 0x14,
54    /// CDC stream head key.
55    CdcHead = 0x20,
56    /// CDC event key.
57    CdcEvent = 0x21,
58}
59
60/// Binary key decode errors.
61#[derive(Debug, Error, PartialEq, Eq)]
62pub enum KeyDecodeError {
63    /// Key has wrong length for the expected type.
64    #[error("invalid key length: expected {expected}, got {actual}")]
65    InvalidLength {
66        /// Required key length.
67        expected: usize,
68        /// Actual key length.
69        actual: usize,
70    },
71    /// Key has an unexpected tag.
72    #[error("unexpected key tag: expected 0x{expected:02x}, got 0x{actual:02x}")]
73    UnexpectedTag {
74        /// Expected tag byte.
75        expected: u8,
76        /// Actual tag byte.
77        actual: u8,
78    },
79}
80
81/// Encode `[0x01][col:2][doc:4]`.
82#[must_use]
83pub fn encode_doc_key(collection_id: CollectionId, doc_id: DocId) -> [u8; 7] {
84    let mut key = [0u8; 7];
85    key[0] = KeyTag::Doc as u8;
86    key[1..3].copy_from_slice(&collection_id.to_le_bytes());
87    key[3..7].copy_from_slice(&doc_id.to_le_bytes());
88    key
89}
90
91/// Decode `[0x01][col:2][doc:4]`.
92pub fn decode_doc_key(key: &[u8]) -> Result<(CollectionId, DocId), KeyDecodeError> {
93    decode_collection_doc_key(key, KeyTag::Doc)
94}
95
96/// Encode `[0x06][col:2][doc:4]`.
97#[must_use]
98pub fn encode_cold_doc_key(collection_id: CollectionId, doc_id: DocId) -> [u8; 7] {
99    let mut key = [0u8; 7];
100    key[0] = KeyTag::ColdDoc as u8;
101    key[1..3].copy_from_slice(&collection_id.to_le_bytes());
102    key[3..7].copy_from_slice(&doc_id.to_le_bytes());
103    key
104}
105
106/// Decode `[0x06][col:2][doc:4]`.
107pub fn decode_cold_doc_key(key: &[u8]) -> Result<(CollectionId, DocId), KeyDecodeError> {
108    decode_collection_doc_key(key, KeyTag::ColdDoc)
109}
110
111/// Encode `[tag][col:2]` keys (`0x02`..`0x05`, `0x20`).
112#[must_use]
113pub fn encode_collection_key(tag: KeyTag, collection_id: CollectionId) -> [u8; 3] {
114    let mut key = [0u8; 3];
115    key[0] = tag as u8;
116    key[1..3].copy_from_slice(&collection_id.to_le_bytes());
117    key
118}
119
120/// Decode `[tag][col:2]`.
121pub fn decode_collection_key(
122    key: &[u8],
123    expected_tag: KeyTag,
124) -> Result<CollectionId, KeyDecodeError> {
125    ensure_key_shape(key, 3, expected_tag)?;
126    Ok(u16::from_le_bytes([key[1], key[2]]))
127}
128
129/// Encode `[tag][col:2][field:2][vhash:4]`.
130#[must_use]
131pub fn encode_hashed_bucket_key(
132    tag: KeyTag,
133    collection_id: CollectionId,
134    field_id: FieldId,
135    value_hash: u32,
136) -> [u8; 10] {
137    let mut key = [0u8; 10];
138    key[0] = tag as u8;
139    key[1..3].copy_from_slice(&collection_id.to_le_bytes());
140    key[3..5].copy_from_slice(&field_id.to_le_bytes());
141    key[5..9].copy_from_slice(&value_hash.to_le_bytes());
142    key
143}
144
145/// Decode `[tag][col:2][field:2][vhash:4]`.
146pub fn decode_hashed_bucket_key(
147    key: &[u8],
148    expected_tag: KeyTag,
149) -> Result<(CollectionId, FieldId, u32), KeyDecodeError> {
150    ensure_key_shape(key, 10, expected_tag)?;
151    let collection_id = u16::from_le_bytes([key[1], key[2]]);
152    let field_id = u16::from_le_bytes([key[3], key[4]]);
153    let value_hash = u32::from_le_bytes([key[5], key[6], key[7], key[8]]);
154    Ok((collection_id, field_id, value_hash))
155}
156
157/// Encode `[0x11][col:2][field:2]`.
158#[must_use]
159pub fn encode_sorted_index_key(collection_id: CollectionId, field_id: FieldId) -> [u8; 5] {
160    let mut key = [0u8; 5];
161    key[0] = KeyTag::SortedIndex as u8;
162    key[1..3].copy_from_slice(&collection_id.to_le_bytes());
163    key[3..5].copy_from_slice(&field_id.to_le_bytes());
164    key
165}
166
167/// Decode `[0x11][col:2][field:2]`.
168pub fn decode_sorted_index_key(key: &[u8]) -> Result<(CollectionId, FieldId), KeyDecodeError> {
169    ensure_key_shape(key, 5, KeyTag::SortedIndex)?;
170    Ok((
171        u16::from_le_bytes([key[1], key[2]]),
172        u16::from_le_bytes([key[3], key[4]]),
173    ))
174}
175
176/// Encode `[0x14][col:2][f1:2][f2:2][vhash:4]`.
177#[must_use]
178pub fn encode_compound_index_key(
179    collection_id: CollectionId,
180    first_field_id: FieldId,
181    second_field_id: FieldId,
182    value_hash: u32,
183) -> [u8; 11] {
184    let mut key = [0u8; 11];
185    key[0] = KeyTag::CompoundIndex as u8;
186    key[1..3].copy_from_slice(&collection_id.to_le_bytes());
187    key[3..5].copy_from_slice(&first_field_id.to_le_bytes());
188    key[5..7].copy_from_slice(&second_field_id.to_le_bytes());
189    key[7..11].copy_from_slice(&value_hash.to_le_bytes());
190    key
191}
192
193/// Decode `[0x14][col:2][f1:2][f2:2][vhash:4]`.
194pub fn decode_compound_index_key(
195    key: &[u8],
196) -> Result<(CollectionId, FieldId, FieldId, u32), KeyDecodeError> {
197    ensure_key_shape(key, 11, KeyTag::CompoundIndex)?;
198    Ok((
199        u16::from_le_bytes([key[1], key[2]]),
200        u16::from_le_bytes([key[3], key[4]]),
201        u16::from_le_bytes([key[5], key[6]]),
202        u32::from_le_bytes([key[7], key[8], key[9], key[10]]),
203    ))
204}
205
206/// Encode `[0x21][col:2][seq:8]`.
207#[must_use]
208pub fn encode_cdc_event_key(collection_id: CollectionId, sequence: u64) -> [u8; 11] {
209    let mut key = [0u8; 11];
210    key[0] = KeyTag::CdcEvent as u8;
211    key[1..3].copy_from_slice(&collection_id.to_le_bytes());
212    key[3..11].copy_from_slice(&sequence.to_le_bytes());
213    key
214}
215
216/// Decode `[0x21][col:2][seq:8]`.
217pub fn decode_cdc_event_key(key: &[u8]) -> Result<(CollectionId, u64), KeyDecodeError> {
218    ensure_key_shape(key, 11, KeyTag::CdcEvent)?;
219    Ok((
220        u16::from_le_bytes([key[1], key[2]]),
221        u64::from_le_bytes([
222            key[3], key[4], key[5], key[6], key[7], key[8], key[9], key[10],
223        ]),
224    ))
225}
226
227fn decode_collection_doc_key(
228    key: &[u8],
229    expected_tag: KeyTag,
230) -> Result<(CollectionId, DocId), KeyDecodeError> {
231    ensure_key_shape(key, 7, expected_tag)?;
232    let collection_id = u16::from_le_bytes([key[1], key[2]]);
233    let doc_id = u32::from_le_bytes([key[3], key[4], key[5], key[6]]);
234    Ok((collection_id, doc_id))
235}
236
237fn ensure_key_shape(
238    key: &[u8],
239    expected_len: usize,
240    expected_tag: KeyTag,
241) -> Result<(), KeyDecodeError> {
242    if key.len() != expected_len {
243        return Err(KeyDecodeError::InvalidLength {
244            expected: expected_len,
245            actual: key.len(),
246        });
247    }
248    if key[0] != expected_tag as u8 {
249        return Err(KeyDecodeError::UnexpectedTag {
250            expected: expected_tag as u8,
251            actual: key[0],
252        });
253    }
254    Ok(())
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    #[test]
262    fn round_trip_doc_key() {
263        let encoded = encode_doc_key(10, 1234);
264        let decoded = decode_doc_key(&encoded).expect("doc key should decode");
265        assert_eq!(decoded, (10, 1234));
266    }
267
268    #[test]
269    fn round_trip_hashed_bucket_key() {
270        let encoded = encode_hashed_bucket_key(KeyTag::HashIndex, 4, 9, 0xAABBCCDD);
271        let decoded =
272            decode_hashed_bucket_key(&encoded, KeyTag::HashIndex).expect("hash key should decode");
273        assert_eq!(decoded, (4, 9, 0xAABBCCDD));
274    }
275
276    #[test]
277    fn rejects_invalid_tag() {
278        let mut encoded = encode_doc_key(1, 2);
279        encoded[0] = KeyTag::ColdDoc as u8;
280        let err = decode_doc_key(&encoded).expect_err("wrong tag should fail");
281        assert_eq!(
282            err,
283            KeyDecodeError::UnexpectedTag {
284                expected: KeyTag::Doc as u8,
285                actual: KeyTag::ColdDoc as u8
286            }
287        );
288    }
289}