Skip to main content

icydb_core/db/index/
entry.rs

1use crate::{
2    db::{
3        index::RawIndexKey,
4        store::{StorageKey, StorageKeyEncodeError},
5    },
6    traits::{EntityKind, FieldValue, Storable},
7    value::Value,
8};
9use canic_cdk::structures::storable::Bound;
10use std::{borrow::Cow, collections::BTreeSet};
11use thiserror::Error as ThisError;
12
13///
14/// Constants
15///
16
17const INDEX_ENTRY_LEN_BYTES: usize = 4;
18pub const MAX_INDEX_ENTRY_KEYS: usize = 65_535;
19
20#[allow(clippy::cast_possible_truncation)]
21pub const MAX_INDEX_ENTRY_BYTES: u32 =
22    (INDEX_ENTRY_LEN_BYTES + (MAX_INDEX_ENTRY_KEYS * StorageKey::STORED_SIZE_USIZE)) as u32;
23
24///
25/// IndexEntryCorruption
26///
27
28#[derive(Debug, ThisError)]
29pub enum IndexEntryCorruption {
30    #[error("index entry exceeds max size")]
31    TooLarge { len: usize },
32
33    #[error("index entry missing key count")]
34    MissingLength,
35
36    #[error("index entry key count exceeds limit")]
37    TooManyKeys { count: usize },
38
39    #[error("index entry length does not match key count")]
40    LengthMismatch,
41
42    #[error("index entry contains invalid key bytes")]
43    InvalidKey,
44
45    #[error("index entry contains duplicate key")]
46    DuplicateKey,
47
48    #[error("index entry contains zero keys")]
49    EmptyEntry,
50
51    #[error("unique index entry contains {keys} keys")]
52    NonUniqueEntry { keys: usize },
53
54    #[error("index entry missing expected entity key: {entity_key:?} (index {index_key:?})")]
55    MissingKey {
56        index_key: Box<RawIndexKey>,
57        entity_key: Value,
58    },
59
60    #[error("index entry points at key {indexed_key:?} but stored row key is {row_key:?}")]
61    RowKeyMismatch {
62        indexed_key: Box<Value>,
63        row_key: Box<Value>,
64    },
65}
66
67impl IndexEntryCorruption {
68    #[must_use]
69    pub fn missing_key(index_key: RawIndexKey, entity_key: impl FieldValue) -> Self {
70        Self::MissingKey {
71            index_key: Box::new(index_key),
72            entity_key: entity_key.to_value(),
73        }
74    }
75}
76
77///
78/// IndexEntryEncodeError
79///
80
81#[derive(Debug, ThisError)]
82pub enum IndexEntryEncodeError {
83    #[error("index entry exceeds max keys: {keys} (limit {MAX_INDEX_ENTRY_KEYS})")]
84    TooManyKeys { keys: usize },
85
86    #[error("index entry key encoding failed: {0}")]
87    KeyEncoding(#[from] StorageKeyEncodeError),
88}
89
90impl IndexEntryEncodeError {
91    #[must_use]
92    pub const fn keys(&self) -> usize {
93        match self {
94            Self::TooManyKeys { keys } => *keys,
95            Self::KeyEncoding(_) => 0,
96        }
97    }
98}
99
100///
101/// IndexEntry
102///
103
104#[derive(Clone, Debug)]
105pub struct IndexEntry<E: EntityKind> {
106    ids: BTreeSet<E::Key>,
107}
108
109impl<E: EntityKind> IndexEntry<E> {
110    #[must_use]
111    pub fn new(id: E::Key) -> Self {
112        let mut ids = BTreeSet::new();
113        ids.insert(id);
114        Self { ids }
115    }
116
117    pub fn insert(&mut self, id: E::Key) {
118        self.ids.insert(id);
119    }
120
121    pub fn remove(&mut self, id: E::Key) {
122        self.ids.remove(&id);
123    }
124
125    #[must_use]
126    pub fn contains(&self, id: E::Key) -> bool {
127        self.ids.contains(&id)
128    }
129
130    #[must_use]
131    pub fn is_empty(&self) -> bool {
132        self.ids.is_empty()
133    }
134
135    #[must_use]
136    pub fn len(&self) -> usize {
137        self.ids.len()
138    }
139
140    pub fn iter_ids(&self) -> impl Iterator<Item = E::Key> + '_ {
141        self.ids.iter().copied()
142    }
143
144    #[must_use]
145    pub fn single_id(&self) -> Option<E::Key> {
146        if self.ids.len() == 1 {
147            self.ids.iter().copied().next()
148        } else {
149            None
150        }
151    }
152
153    pub fn try_to_raw(&self) -> Result<RawIndexEntry, IndexEntryEncodeError> {
154        RawIndexEntry::try_from_entry(self)
155    }
156}
157
158///
159/// RawIndexEntry
160///
161
162#[derive(Clone, Debug, Eq, PartialEq)]
163pub struct RawIndexEntry(Vec<u8>);
164
165impl RawIndexEntry {
166    pub fn try_from_entry<E: EntityKind>(
167        entry: &IndexEntry<E>,
168    ) -> Result<Self, IndexEntryEncodeError> {
169        let mut keys = Vec::with_capacity(entry.ids.len());
170        for id in &entry.ids {
171            let value = id.to_value();
172            let key = StorageKey::try_from_value(&value)?;
173            keys.push(key);
174        }
175
176        Self::try_from_keys(keys)
177    }
178
179    pub fn try_decode<E: EntityKind>(&self) -> Result<IndexEntry<E>, IndexEntryCorruption> {
180        let storage_keys = self.decode_keys()?;
181        let mut ids = BTreeSet::new();
182
183        for key in storage_keys {
184            let value = key.as_value();
185            let Some(id) = <E::Key as FieldValue>::from_value(&value) else {
186                return Err(IndexEntryCorruption::InvalidKey);
187            };
188            ids.insert(id);
189        }
190
191        if ids.is_empty() {
192            return Err(IndexEntryCorruption::EmptyEntry);
193        }
194
195        Ok(IndexEntry { ids })
196    }
197
198    pub fn try_from_keys<I>(keys: I) -> Result<Self, IndexEntryEncodeError>
199    where
200        I: IntoIterator<Item = StorageKey>,
201    {
202        let keys: Vec<StorageKey> = keys.into_iter().collect();
203        let count = keys.len();
204
205        if count > MAX_INDEX_ENTRY_KEYS {
206            return Err(IndexEntryEncodeError::TooManyKeys { keys: count });
207        }
208
209        let mut out =
210            Vec::with_capacity(INDEX_ENTRY_LEN_BYTES + count * StorageKey::STORED_SIZE_USIZE);
211
212        let count_u32 =
213            u32::try_from(count).map_err(|_| IndexEntryEncodeError::TooManyKeys { keys: count })?;
214        out.extend_from_slice(&count_u32.to_be_bytes());
215
216        for sk in keys {
217            out.extend_from_slice(&sk.to_bytes()?);
218        }
219
220        Ok(Self(out))
221    }
222
223    pub fn decode_keys(&self) -> Result<Vec<StorageKey>, IndexEntryCorruption> {
224        self.validate()?;
225
226        let bytes = self.0.as_slice();
227
228        let mut len_buf = [0u8; INDEX_ENTRY_LEN_BYTES];
229        len_buf.copy_from_slice(&bytes[..INDEX_ENTRY_LEN_BYTES]);
230        let count = u32::from_be_bytes(len_buf) as usize;
231
232        let mut keys = Vec::with_capacity(count);
233        let mut offset = INDEX_ENTRY_LEN_BYTES;
234
235        for _ in 0..count {
236            let end = offset + StorageKey::STORED_SIZE_USIZE;
237            let sk = StorageKey::try_from(&bytes[offset..end])
238                .map_err(|_| IndexEntryCorruption::InvalidKey)?;
239
240            keys.push(sk);
241            offset = end;
242        }
243
244        Ok(keys)
245    }
246
247    /// Validate the raw index entry structure without binding to an entity.
248    pub fn validate(&self) -> Result<(), IndexEntryCorruption> {
249        let bytes = self.0.as_slice();
250
251        if bytes.len() > MAX_INDEX_ENTRY_BYTES as usize {
252            return Err(IndexEntryCorruption::TooLarge { len: bytes.len() });
253        }
254        if bytes.len() < INDEX_ENTRY_LEN_BYTES {
255            return Err(IndexEntryCorruption::MissingLength);
256        }
257
258        let mut len_buf = [0u8; INDEX_ENTRY_LEN_BYTES];
259        len_buf.copy_from_slice(&bytes[..INDEX_ENTRY_LEN_BYTES]);
260        let count = u32::from_be_bytes(len_buf) as usize;
261
262        if count == 0 {
263            return Err(IndexEntryCorruption::EmptyEntry);
264        }
265        if count > MAX_INDEX_ENTRY_KEYS {
266            return Err(IndexEntryCorruption::TooManyKeys { count });
267        }
268
269        let expected = INDEX_ENTRY_LEN_BYTES
270            + count
271                .checked_mul(StorageKey::STORED_SIZE_USIZE)
272                .ok_or(IndexEntryCorruption::LengthMismatch)?;
273
274        if bytes.len() != expected {
275            return Err(IndexEntryCorruption::LengthMismatch);
276        }
277
278        // Validate each StorageKey structurally and detect duplicates
279        let mut keys = BTreeSet::new();
280        let mut offset = INDEX_ENTRY_LEN_BYTES;
281
282        for _ in 0..count {
283            let end = offset + StorageKey::STORED_SIZE_USIZE;
284
285            let sk = StorageKey::try_from(&bytes[offset..end])
286                .map_err(|_| IndexEntryCorruption::InvalidKey)?;
287
288            if !keys.insert(sk) {
289                return Err(IndexEntryCorruption::DuplicateKey);
290            }
291
292            offset = end;
293        }
294
295        Ok(())
296    }
297
298    #[must_use]
299    pub fn as_bytes(&self) -> &[u8] {
300        &self.0
301    }
302
303    #[must_use]
304    pub const fn len(&self) -> usize {
305        self.0.len()
306    }
307
308    #[must_use]
309    pub const fn is_empty(&self) -> bool {
310        self.0.is_empty()
311    }
312}
313
314impl<E: EntityKind> TryFrom<&IndexEntry<E>> for RawIndexEntry {
315    type Error = IndexEntryEncodeError;
316
317    fn try_from(entry: &IndexEntry<E>) -> Result<Self, Self::Error> {
318        Self::try_from_entry(entry)
319    }
320}
321
322impl Storable for RawIndexEntry {
323    fn to_bytes(&self) -> Cow<'_, [u8]> {
324        Cow::Borrowed(&self.0)
325    }
326
327    fn from_bytes(bytes: Cow<'_, [u8]>) -> Self {
328        Self(bytes.into_owned())
329    }
330
331    fn into_bytes(self) -> Vec<u8> {
332        self.0
333    }
334
335    const BOUND: Bound = Bound::Bounded {
336        max_size: MAX_INDEX_ENTRY_BYTES,
337        is_fixed_size: false,
338    };
339}
340
341///
342/// TESTS
343///
344
345#[cfg(test)]
346mod tests {
347    use crate::{
348        db::{
349            index::{
350                IndexEntryCorruption, MAX_INDEX_ENTRY_BYTES, MAX_INDEX_ENTRY_KEYS, RawIndexEntry,
351            },
352            store::StorageKey,
353        },
354        traits::Storable,
355    };
356    use std::borrow::Cow;
357
358    #[test]
359    fn raw_index_entry_round_trip() {
360        let keys = vec![StorageKey::Int(1), StorageKey::Uint(2)];
361
362        let raw = RawIndexEntry::try_from_keys(keys.clone()).expect("encode index entry");
363        let decoded = raw.decode_keys().expect("decode index entry");
364
365        assert_eq!(decoded.len(), keys.len());
366        assert!(decoded.contains(&StorageKey::Int(1)));
367        assert!(decoded.contains(&StorageKey::Uint(2)));
368    }
369
370    #[test]
371    fn raw_index_entry_roundtrip_via_bytes() {
372        let keys = vec![StorageKey::Int(9), StorageKey::Uint(10)];
373
374        let raw = RawIndexEntry::try_from_keys(keys.clone()).expect("encode index entry");
375        let encoded = Storable::to_bytes(&raw);
376        let raw = RawIndexEntry::from_bytes(encoded);
377        let decoded = raw.decode_keys().expect("decode index entry");
378
379        assert_eq!(decoded.len(), keys.len());
380        assert!(decoded.contains(&StorageKey::Int(9)));
381        assert!(decoded.contains(&StorageKey::Uint(10)));
382    }
383
384    #[test]
385    fn raw_index_entry_rejects_empty() {
386        let bytes = vec![0, 0, 0, 0];
387        let raw = RawIndexEntry::from_bytes(Cow::Owned(bytes));
388        assert!(matches!(
389            raw.decode_keys(),
390            Err(IndexEntryCorruption::EmptyEntry)
391        ));
392    }
393
394    #[test]
395    fn raw_index_entry_rejects_truncated_payload() {
396        let key = StorageKey::Int(1);
397        let mut bytes = Vec::new();
398        bytes.extend_from_slice(&1u32.to_be_bytes());
399        bytes.extend_from_slice(&key.to_bytes().expect("encode"));
400        bytes.truncate(bytes.len() - 1);
401
402        let raw = RawIndexEntry::from_bytes(Cow::Owned(bytes));
403        assert!(matches!(
404            raw.decode_keys(),
405            Err(IndexEntryCorruption::LengthMismatch)
406        ));
407    }
408
409    #[test]
410    fn raw_index_entry_rejects_oversized_payload() {
411        let bytes = vec![0u8; MAX_INDEX_ENTRY_BYTES as usize + 1];
412        let raw = RawIndexEntry::from_bytes(Cow::Owned(bytes));
413        assert!(matches!(
414            raw.decode_keys(),
415            Err(IndexEntryCorruption::TooLarge { .. })
416        ));
417    }
418
419    #[test]
420    #[expect(clippy::cast_possible_truncation)]
421    fn raw_index_entry_rejects_corrupted_length_field() {
422        let count = (MAX_INDEX_ENTRY_KEYS + 1) as u32;
423        let raw = RawIndexEntry::from_bytes(Cow::Owned(count.to_be_bytes().to_vec()));
424        assert!(matches!(
425            raw.decode_keys(),
426            Err(IndexEntryCorruption::TooManyKeys { .. })
427        ));
428    }
429
430    #[test]
431    fn raw_index_entry_rejects_duplicate_keys() {
432        let key = StorageKey::Int(1);
433        let mut bytes = Vec::new();
434        bytes.extend_from_slice(&2u32.to_be_bytes());
435        bytes.extend_from_slice(&key.to_bytes().expect("encode"));
436        bytes.extend_from_slice(&key.to_bytes().expect("encode"));
437
438        let raw = RawIndexEntry::from_bytes(Cow::Owned(bytes));
439        assert!(matches!(
440            raw.decode_keys(),
441            Err(IndexEntryCorruption::DuplicateKey)
442        ));
443    }
444
445    #[test]
446    #[expect(clippy::cast_possible_truncation)]
447    fn raw_index_entry_decode_fuzz_does_not_panic() {
448        const RUNS: u64 = 1_000;
449        const MAX_LEN: usize = 256;
450
451        let mut seed = 0xA5A5_5A5A_u64;
452        for _ in 0..RUNS {
453            seed = seed.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
454            let len = (seed as usize) % MAX_LEN;
455
456            let mut bytes = vec![0u8; len];
457            for byte in &mut bytes {
458                seed = seed.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
459                *byte = (seed >> 24) as u8;
460            }
461
462            let raw = RawIndexEntry::from_bytes(Cow::Owned(bytes));
463            let _ = raw.decode_keys();
464        }
465    }
466}