Skip to main content

icydb_core/db/index/
entry.rs

1use crate::{
2    db::{
3        index::RawIndexKey,
4        store::{StorageKey, StorageKeyEncodeError},
5    },
6    traits::{EntityKind, FieldValue, Storable},
7    value::Value,
8};
9use canic_cdk::structures::storable::Bound;
10use std::{borrow::Cow, collections::BTreeSet};
11use thiserror::Error as ThisError;
12
13///
14/// Constants
15///
16
17const INDEX_ENTRY_LEN_BYTES: usize = 4;
18pub const MAX_INDEX_ENTRY_KEYS: usize = 65_535;
19
20#[expect(clippy::cast_possible_truncation)]
21pub const MAX_INDEX_ENTRY_BYTES: u32 =
22    (INDEX_ENTRY_LEN_BYTES + (MAX_INDEX_ENTRY_KEYS * StorageKey::STORED_SIZE_USIZE)) as u32;
23
24///
25/// IndexEntryCorruption
26///
27
28#[derive(Debug, ThisError)]
29pub enum IndexEntryCorruption {
30    #[error("index entry exceeds max size")]
31    TooLarge { len: usize },
32
33    #[error("index entry missing key count")]
34    MissingLength,
35
36    #[error("index entry key count exceeds limit")]
37    TooManyKeys { count: usize },
38
39    #[error("index entry length does not match key count")]
40    LengthMismatch,
41
42    #[error("index entry contains invalid key bytes")]
43    InvalidKey,
44
45    #[error("index entry contains duplicate key")]
46    DuplicateKey,
47
48    #[error("index entry contains zero keys")]
49    EmptyEntry,
50
51    #[error("unique index entry contains {keys} keys")]
52    NonUniqueEntry { keys: usize },
53
54    #[error("index entry missing expected entity key: {entity_key:?} (index {index_key:?})")]
55    MissingKey {
56        index_key: Box<RawIndexKey>,
57        entity_key: Value,
58    },
59
60    #[error("index entry points at key {indexed_key:?} but stored row key is {row_key:?}")]
61    RowKeyMismatch {
62        indexed_key: Box<Value>,
63        row_key: Box<Value>,
64    },
65}
66
67impl IndexEntryCorruption {
68    #[must_use]
69    pub fn missing_key(index_key: RawIndexKey, entity_key: impl FieldValue) -> Self {
70        Self::MissingKey {
71            index_key: Box::new(index_key),
72            entity_key: entity_key.to_value(),
73        }
74    }
75}
76
77///
78/// IndexEntryEncodeError
79///
80
81#[derive(Debug, ThisError)]
82pub enum IndexEntryEncodeError {
83    #[error("index entry exceeds max keys: {keys} (limit {MAX_INDEX_ENTRY_KEYS})")]
84    TooManyKeys { keys: usize },
85
86    #[error("index entry key encoding failed: {0}")]
87    KeyEncoding(#[from] StorageKeyEncodeError),
88}
89
90impl IndexEntryEncodeError {
91    #[must_use]
92    pub const fn keys(&self) -> usize {
93        match self {
94            Self::TooManyKeys { keys } => *keys,
95            Self::KeyEncoding(_) => 0,
96        }
97    }
98}
99
100///
101/// IndexEntry
102///
103
104#[derive(Clone, Debug)]
105pub struct IndexEntry<E: EntityKind> {
106    ids: BTreeSet<E::Key>,
107}
108
109impl<E: EntityKind> IndexEntry<E> {
110    #[must_use]
111    pub fn new(id: E::Key) -> Self {
112        let mut ids = BTreeSet::new();
113        ids.insert(id);
114        Self { ids }
115    }
116
117    pub fn insert(&mut self, id: E::Key) {
118        self.ids.insert(id);
119    }
120
121    pub fn remove(&mut self, id: E::Key) {
122        self.ids.remove(&id);
123    }
124
125    #[must_use]
126    pub fn contains(&self, id: E::Key) -> bool {
127        self.ids.contains(&id)
128    }
129
130    #[must_use]
131    pub fn is_empty(&self) -> bool {
132        self.ids.is_empty()
133    }
134
135    #[must_use]
136    pub fn len(&self) -> usize {
137        self.ids.len()
138    }
139
140    pub fn iter_ids(&self) -> impl Iterator<Item = E::Key> + '_ {
141        self.ids.iter().copied()
142    }
143
144    #[must_use]
145    pub fn single_id(&self) -> Option<E::Key> {
146        if self.ids.len() == 1 {
147            self.ids.iter().copied().next()
148        } else {
149            None
150        }
151    }
152
153    pub fn try_to_raw(&self) -> Result<RawIndexEntry, IndexEntryEncodeError> {
154        RawIndexEntry::try_from_entry(self)
155    }
156}
157
158///
159/// RawIndexEntry
160///
161
162#[derive(Clone, Debug, Eq, PartialEq)]
163pub struct RawIndexEntry(Vec<u8>);
164
165impl RawIndexEntry {
166    pub fn try_from_entry<E: EntityKind>(
167        entry: &IndexEntry<E>,
168    ) -> Result<Self, IndexEntryEncodeError> {
169        let mut keys = Vec::with_capacity(entry.ids.len());
170        for id in &entry.ids {
171            let value = id.to_value();
172            let key = StorageKey::try_from_value(&value)?;
173            keys.push(key);
174        }
175
176        Self::try_from_keys(keys)
177    }
178
179    pub fn try_decode<E: EntityKind>(&self) -> Result<IndexEntry<E>, IndexEntryCorruption> {
180        let storage_keys = self.decode_keys()?;
181        let mut ids = BTreeSet::new();
182
183        for key in storage_keys {
184            let value = key.as_value();
185            let Some(id) = <E::Key as FieldValue>::from_value(&value) else {
186                return Err(IndexEntryCorruption::InvalidKey);
187            };
188            ids.insert(id);
189        }
190
191        if ids.is_empty() {
192            return Err(IndexEntryCorruption::EmptyEntry);
193        }
194
195        Ok(IndexEntry { ids })
196    }
197
198    pub fn try_from_keys<I>(keys: I) -> Result<Self, IndexEntryEncodeError>
199    where
200        I: IntoIterator<Item = StorageKey>,
201    {
202        let keys: Vec<StorageKey> = keys.into_iter().collect();
203        let count = keys.len();
204
205        if count > MAX_INDEX_ENTRY_KEYS {
206            return Err(IndexEntryEncodeError::TooManyKeys { keys: count });
207        }
208
209        let mut out =
210            Vec::with_capacity(INDEX_ENTRY_LEN_BYTES + count * StorageKey::STORED_SIZE_USIZE);
211
212        let count_u32 =
213            u32::try_from(count).map_err(|_| IndexEntryEncodeError::TooManyKeys { keys: count })?;
214        out.extend_from_slice(&count_u32.to_be_bytes());
215
216        for sk in keys {
217            out.extend_from_slice(&sk.to_bytes()?);
218        }
219
220        Ok(Self(out))
221    }
222
223    pub fn decode_keys(&self) -> Result<Vec<StorageKey>, IndexEntryCorruption> {
224        self.validate()?;
225
226        let bytes = self.0.as_slice();
227
228        let mut len_buf = [0u8; INDEX_ENTRY_LEN_BYTES];
229        len_buf.copy_from_slice(&bytes[..INDEX_ENTRY_LEN_BYTES]);
230        let count = u32::from_be_bytes(len_buf) as usize;
231
232        let mut keys = Vec::with_capacity(count);
233        let mut offset = INDEX_ENTRY_LEN_BYTES;
234
235        for _ in 0..count {
236            let end = offset + StorageKey::STORED_SIZE_USIZE;
237            let sk = StorageKey::try_from(&bytes[offset..end])
238                .map_err(|_| IndexEntryCorruption::InvalidKey)?;
239
240            keys.push(sk);
241            offset = end;
242        }
243
244        Ok(keys)
245    }
246
247    /// Validate the raw index entry structure without binding to an entity.
248    pub fn validate(&self) -> Result<(), IndexEntryCorruption> {
249        let bytes = self.0.as_slice();
250
251        if bytes.len() > MAX_INDEX_ENTRY_BYTES as usize {
252            return Err(IndexEntryCorruption::TooLarge { len: bytes.len() });
253        }
254        if bytes.len() < INDEX_ENTRY_LEN_BYTES {
255            return Err(IndexEntryCorruption::MissingLength);
256        }
257
258        let mut len_buf = [0u8; INDEX_ENTRY_LEN_BYTES];
259        len_buf.copy_from_slice(&bytes[..INDEX_ENTRY_LEN_BYTES]);
260        let count = u32::from_be_bytes(len_buf) as usize;
261
262        if count == 0 {
263            return Err(IndexEntryCorruption::EmptyEntry);
264        }
265        if count > MAX_INDEX_ENTRY_KEYS {
266            return Err(IndexEntryCorruption::TooManyKeys { count });
267        }
268
269        let expected = INDEX_ENTRY_LEN_BYTES
270            + count
271                .checked_mul(StorageKey::STORED_SIZE_USIZE)
272                .ok_or(IndexEntryCorruption::LengthMismatch)?;
273
274        if bytes.len() != expected {
275            return Err(IndexEntryCorruption::LengthMismatch);
276        }
277
278        // Validate each StorageKey structurally and detect duplicates
279        let mut keys = BTreeSet::new();
280        let mut offset = INDEX_ENTRY_LEN_BYTES;
281
282        for _ in 0..count {
283            let end = offset + StorageKey::STORED_SIZE_USIZE;
284
285            let sk = StorageKey::try_from(&bytes[offset..end])
286                .map_err(|_| IndexEntryCorruption::InvalidKey)?;
287
288            if !keys.insert(sk) {
289                return Err(IndexEntryCorruption::DuplicateKey);
290            }
291
292            offset = end;
293        }
294
295        Ok(())
296    }
297
298    #[must_use]
299    pub fn as_bytes(&self) -> &[u8] {
300        &self.0
301    }
302
303    #[must_use]
304    pub const fn len(&self) -> usize {
305        self.0.len()
306    }
307
308    #[must_use]
309    pub const fn is_empty(&self) -> bool {
310        self.0.is_empty()
311    }
312}
313
314impl<E: EntityKind> TryFrom<&IndexEntry<E>> for RawIndexEntry {
315    type Error = IndexEntryEncodeError;
316
317    fn try_from(entry: &IndexEntry<E>) -> Result<Self, Self::Error> {
318        Self::try_from_entry(entry)
319    }
320}
321
322impl Storable for RawIndexEntry {
323    fn to_bytes(&self) -> Cow<'_, [u8]> {
324        Cow::Borrowed(&self.0)
325    }
326
327    fn from_bytes(bytes: Cow<'_, [u8]>) -> Self {
328        Self(bytes.into_owned())
329    }
330
331    fn into_bytes(self) -> Vec<u8> {
332        self.0
333    }
334
335    const BOUND: Bound = Bound::Bounded {
336        max_size: MAX_INDEX_ENTRY_BYTES,
337        is_fixed_size: false,
338    };
339}
340
341///
342/// TESTS
343///
344
345#[cfg(test)]
346mod tests {
347    use super::{IndexEntryCorruption, MAX_INDEX_ENTRY_BYTES, MAX_INDEX_ENTRY_KEYS, RawIndexEntry};
348    use crate::{db::store::StorageKey, traits::Storable};
349    use std::borrow::Cow;
350
351    #[test]
352    fn raw_index_entry_round_trip() {
353        let keys = vec![StorageKey::Int(1), StorageKey::Uint(2)];
354
355        let raw = RawIndexEntry::try_from_keys(keys.clone()).expect("encode index entry");
356        let decoded = raw.decode_keys().expect("decode index entry");
357
358        assert_eq!(decoded.len(), keys.len());
359        assert!(decoded.contains(&StorageKey::Int(1)));
360        assert!(decoded.contains(&StorageKey::Uint(2)));
361    }
362
363    #[test]
364    fn raw_index_entry_roundtrip_via_bytes() {
365        let keys = vec![StorageKey::Int(9), StorageKey::Uint(10)];
366
367        let raw = RawIndexEntry::try_from_keys(keys.clone()).expect("encode index entry");
368        let encoded = Storable::to_bytes(&raw);
369        let raw = RawIndexEntry::from_bytes(encoded);
370        let decoded = raw.decode_keys().expect("decode index entry");
371
372        assert_eq!(decoded.len(), keys.len());
373        assert!(decoded.contains(&StorageKey::Int(9)));
374        assert!(decoded.contains(&StorageKey::Uint(10)));
375    }
376
377    #[test]
378    fn raw_index_entry_rejects_empty() {
379        let bytes = vec![0, 0, 0, 0];
380        let raw = RawIndexEntry::from_bytes(Cow::Owned(bytes));
381        assert!(matches!(
382            raw.decode_keys(),
383            Err(IndexEntryCorruption::EmptyEntry)
384        ));
385    }
386
387    #[test]
388    fn raw_index_entry_rejects_truncated_payload() {
389        let key = StorageKey::Int(1);
390        let mut bytes = Vec::new();
391        bytes.extend_from_slice(&1u32.to_be_bytes());
392        bytes.extend_from_slice(&key.to_bytes().expect("encode"));
393        bytes.truncate(bytes.len() - 1);
394
395        let raw = RawIndexEntry::from_bytes(Cow::Owned(bytes));
396        assert!(matches!(
397            raw.decode_keys(),
398            Err(IndexEntryCorruption::LengthMismatch)
399        ));
400    }
401
402    #[test]
403    fn raw_index_entry_rejects_oversized_payload() {
404        let bytes = vec![0u8; MAX_INDEX_ENTRY_BYTES as usize + 1];
405        let raw = RawIndexEntry::from_bytes(Cow::Owned(bytes));
406        assert!(matches!(
407            raw.decode_keys(),
408            Err(IndexEntryCorruption::TooLarge { .. })
409        ));
410    }
411
412    #[test]
413    #[expect(clippy::cast_possible_truncation)]
414    fn raw_index_entry_rejects_corrupted_length_field() {
415        let count = (MAX_INDEX_ENTRY_KEYS + 1) as u32;
416        let raw = RawIndexEntry::from_bytes(Cow::Owned(count.to_be_bytes().to_vec()));
417        assert!(matches!(
418            raw.decode_keys(),
419            Err(IndexEntryCorruption::TooManyKeys { .. })
420        ));
421    }
422
423    #[test]
424    fn raw_index_entry_rejects_duplicate_keys() {
425        let key = StorageKey::Int(1);
426        let mut bytes = Vec::new();
427        bytes.extend_from_slice(&2u32.to_be_bytes());
428        bytes.extend_from_slice(&key.to_bytes().expect("encode"));
429        bytes.extend_from_slice(&key.to_bytes().expect("encode"));
430
431        let raw = RawIndexEntry::from_bytes(Cow::Owned(bytes));
432        assert!(matches!(
433            raw.decode_keys(),
434            Err(IndexEntryCorruption::DuplicateKey)
435        ));
436    }
437
438    #[test]
439    #[expect(clippy::cast_possible_truncation)]
440    fn raw_index_entry_decode_fuzz_does_not_panic() {
441        const RUNS: u64 = 1_000;
442        const MAX_LEN: usize = 256;
443
444        let mut seed = 0xA5A5_5A5A_u64;
445        for _ in 0..RUNS {
446            seed = seed.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
447            let len = (seed as usize) % MAX_LEN;
448
449            let mut bytes = vec![0u8; len];
450            for byte in &mut bytes {
451                seed = seed.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
452                *byte = (seed >> 24) as u8;
453            }
454
455            let raw = RawIndexEntry::from_bytes(Cow::Owned(bytes));
456            let _ = raw.decode_keys();
457        }
458    }
459}