icydb_core/db/store/
index.rs

1use crate::{
2    Error, IndexSpec, Key, MAX_INDEX_FIELDS, Value,
3    db::{
4        executor::ExecutorError,
5        store::{DataKey, StoreRegistry},
6    },
7    obs::metrics,
8    traits::EntityKind,
9};
10use candid::CandidType;
11use canic::{
12    cdk::structures::{BTreeMap, DefaultMemoryImpl, memory::VirtualMemory},
13    impl_storable_bounded, impl_storable_unbounded,
14    utils::hash::hash_u64,
15};
16use derive_more::{Deref, DerefMut, Display};
17use serde::{Deserialize, Serialize};
18use std::{
19    collections::HashSet,
20    fmt::{self, Display},
21};
22
23///
24/// IndexStoreRegistry
25///
26
27#[derive(Deref, DerefMut)]
28pub struct IndexStoreRegistry(StoreRegistry<IndexStore>);
29
30impl IndexStoreRegistry {
31    #[must_use]
32    #[allow(clippy::new_without_default)]
33    /// Create an empty index store registry.
34    pub fn new() -> Self {
35        Self(StoreRegistry::new())
36    }
37}
38
39///
40/// IndexStore
41///
42
43#[derive(Deref, DerefMut)]
44pub struct IndexStore(BTreeMap<IndexKey, IndexEntry, VirtualMemory<DefaultMemoryImpl>>);
45
46impl IndexStore {
47    #[must_use]
48    /// Initialize an index store with the provided backing memory.
49    pub fn init(memory: VirtualMemory<DefaultMemoryImpl>) -> Self {
50        Self(BTreeMap::init(memory))
51    }
52
53    /// Inserts the given entity into the index defined by `I`.
54    /// - If `I::UNIQUE`, insertion will fail if a conflicting entry already exists.
55    /// - If the entity is missing required fields for this index, insertion is skipped.
56    /// - Insert an entity's index entry, enforcing uniqueness where required.
57    pub fn insert_index_entry<E: EntityKind>(
58        &mut self,
59        entity: &E,
60        index: &IndexSpec,
61    ) -> Result<(), Error> {
62        // Skip if index key can't be built (e.g. optional fields missing)
63        let Some(index_key) = IndexKey::new(entity, index) else {
64            return Ok(());
65        };
66        let key = entity.key();
67
68        if let Some(mut existing) = self.get(&index_key) {
69            if index.unique {
70                // Already present → skip redundant write
71                if existing.contains(&key) {
72                    return Ok(());
73                }
74
75                // Any different existing key violates UNIQUE
76                if !existing.is_empty() {
77                    metrics::with_state_mut(|m| metrics::record_unique_violation_for::<E>(m));
78
79                    return Err(ExecutorError::index_violation(E::PATH, index.fields))?;
80                }
81
82                self.insert(index_key.clone(), IndexEntry::new(index.fields, key));
83            } else {
84                existing.insert_key(key); // <-- add to the set
85                self.insert(index_key.clone(), existing);
86            }
87        } else {
88            self.insert(index_key, IndexEntry::new(index.fields, key));
89        }
90        metrics::with_state_mut(|m| {
91            m.ops.index_inserts += 1;
92            let entry = m.entities.entry(E::PATH.to_string()).or_default();
93            entry.index_inserts = entry.index_inserts.saturating_add(1);
94        });
95
96        Ok(())
97    }
98
99    // remove_index_entry
100    /// Remove an entity's entry for the given index if present.
101    pub fn remove_index_entry<E: EntityKind>(&mut self, entity: &E, index: &IndexSpec) {
102        // Skip if index key can't be built (e.g. optional fields missing)
103        let Some(index_key) = IndexKey::new(entity, index) else {
104            return;
105        };
106
107        if let Some(mut existing) = self.get(&index_key) {
108            existing.remove_key(&entity.key()); // remove from the set
109
110            if existing.is_empty() {
111                self.remove(&index_key);
112            } else {
113                // Move the updated entry back without cloning
114                self.insert(index_key, existing);
115            }
116            metrics::with_state_mut(|m| {
117                m.ops.index_removes += 1;
118                let entry = m.entities.entry(E::PATH.to_string()).or_default();
119                entry.index_removes = entry.index_removes.saturating_add(1);
120            });
121        }
122    }
123
124    #[must_use]
125    /// Resolve data keys for a given index prefix.
126    pub fn resolve_data_values<E: EntityKind>(
127        &self,
128        index: &IndexSpec,
129        prefix: &[Value],
130    ) -> Vec<DataKey> {
131        let mut out = Vec::new();
132
133        for (_, entry) in self.iter_with_hashed_prefix::<E>(index, prefix) {
134            out.extend(entry.keys.iter().map(|&k| DataKey::new::<E>(k)));
135        }
136
137        out
138    }
139
140    /// Sum of bytes used by all index entries.
141    pub fn memory_bytes(&self) -> u64 {
142        self.iter()
143            .map(|entry| u64::from(IndexKey::STORABLE_MAX_SIZE) + entry.value().len() as u64)
144            .sum()
145    }
146
147    /// Internal: iterate entries for this index whose `hashed_values` start with the hashed `prefix`.
148    /// Uses a bounded range for efficient scanning.
149    fn iter_with_hashed_prefix<E: EntityKind>(
150        &self,
151        index: &IndexSpec,
152        prefix: &[Value],
153    ) -> impl Iterator<Item = (IndexKey, IndexEntry)> {
154        let index_id = IndexId::new::<E>(index);
155        let hashed_prefix_opt = Self::index_fingerprints(prefix); // Option<Vec<[u8;16]>>
156
157        // Compute start..end bounds. If the prefix isn't indexable, construct an empty range
158        // (same iterator type) by using identical start==end under the same index_id.
159        let (start_key, end_key) = if let Some(hp) = hashed_prefix_opt {
160            IndexKey::bounds_for_prefix(index_id, hp)
161        } else {
162            (
163                IndexKey {
164                    index_id,
165                    hashed_values: Vec::new(),
166                },
167                IndexKey {
168                    index_id,
169                    hashed_values: Vec::new(),
170                },
171            )
172        };
173
174        self.range(start_key..end_key)
175            .map(|entry| (entry.key().clone(), entry.value()))
176    }
177
178    fn index_fingerprints(values: &[Value]) -> Option<Vec<[u8; 16]>> {
179        // collects to Option<Vec<_>>: None if any element was non-indexable
180        values.iter().map(Value::to_index_fingerprint).collect()
181    }
182}
183
184///
185/// IndexId
186///
187
188#[derive(
189    Clone,
190    Debug,
191    Display,
192    Copy,
193    PartialEq,
194    Eq,
195    Hash,
196    PartialOrd,
197    Ord,
198    CandidType,
199    Serialize,
200    Deserialize,
201)]
202pub struct IndexId(u64);
203
204impl IndexId {
205    #[must_use]
206    /// Deterministic index identifier derived from entity path and field list.
207    pub fn new<E: EntityKind>(index: &IndexSpec) -> Self {
208        Self::from_path_and_fields(E::PATH, index.fields)
209    }
210
211    fn from_path_and_fields(path: &str, fields: &[&str]) -> Self {
212        let cap = path.len() + fields.iter().map(|f| f.len() + 1).sum::<usize>();
213        let mut buffer = Vec::with_capacity(cap);
214
215        // much more efficient than format
216        buffer.extend_from_slice(path.as_bytes());
217        for field in fields {
218            buffer.extend_from_slice(field.as_bytes());
219            buffer.extend_from_slice(b"|");
220        }
221
222        Self(hash_u64(&buffer))
223    }
224
225    #[must_use]
226    /// Worst-case index identifier used for sizing tests.
227    pub fn max_storable() -> Self {
228        Self::from_path_and_fields(
229            "path::to::long::entity::name::Entity",
230            &[
231                "long_field_one",
232                "long_field_two",
233                "long_field_three",
234                "long_field_four",
235            ],
236        )
237    }
238}
239
240///
241/// IndexKey
242///
243
244#[derive(Clone, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
245pub struct IndexKey {
246    pub index_id: IndexId,
247    pub hashed_values: Vec<[u8; 16]>,
248}
249
250impl IndexKey {
251    // Sized with headroom for worst‑case hashed key payload
252    pub const STORABLE_MAX_SIZE: u32 = 180;
253
254    #[must_use]
255    /// Build an index key from an entity and spec, returning None if a component is missing/non-indexable.
256    pub fn new<E: EntityKind>(entity: &E, index: &IndexSpec) -> Option<Self> {
257        let mut hashed_values = Vec::<[u8; 16]>::with_capacity(index.fields.len());
258
259        // get each value and convert to key
260        for field in index.fields {
261            let value = entity.get_value(field)?;
262            let fp = value.to_index_fingerprint()?; // bail if any component is non-indexable
263
264            hashed_values.push(fp);
265        }
266
267        Some(Self {
268            index_id: IndexId::new::<E>(index),
269            hashed_values,
270        })
271    }
272
273    // max_storable
274    #[must_use]
275    /// Largest representable index key (for sizing).
276    pub fn max_storable() -> Self {
277        Self {
278            index_id: IndexId::max_storable(),
279            hashed_values: (0..MAX_INDEX_FIELDS).map(|_| [u8::MAX; 16]).collect(),
280        }
281    }
282
283    /// Compute the bounded start..end keys for a given hashed prefix under an index id.
284    /// End is exclusive and created by appending a single 0xFF..0xFF block to the prefix.
285    #[must_use]
286    /// The returned range can be used with `BTreeMap::range`.
287    pub fn bounds_for_prefix(index_id: IndexId, mut prefix: Vec<[u8; 16]>) -> (Self, Self) {
288        let start = Self {
289            index_id,
290            hashed_values: prefix.clone(),
291        };
292        prefix.push([0xFF; 16]);
293        let end = Self {
294            index_id,
295            hashed_values: prefix,
296        };
297        (start, end)
298    }
299}
300
301impl Display for IndexKey {
302    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
303        write!(
304            f,
305            "id: {}, values: {}",
306            self.index_id,
307            self.hashed_values.len()
308        )
309    }
310}
311
312impl_storable_bounded!(IndexKey, IndexKey::STORABLE_MAX_SIZE, false);
313
314///
315/// IndexEntry
316///
317
318#[derive(CandidType, Clone, Debug, Deserialize, Serialize)]
319pub struct IndexEntry {
320    fields: Vec<String>,
321    keys: HashSet<Key>,
322}
323
324impl IndexEntry {
325    #[must_use]
326    /// Create an index entry with the initial key.
327    pub fn new(fields: &[&str], key: Key) -> Self {
328        let mut key_set = HashSet::with_capacity(1);
329        key_set.insert(key);
330
331        Self {
332            fields: fields.iter().map(ToString::to_string).collect(),
333            keys: key_set,
334        }
335    }
336
337    /// Insert a key into the entry's key set.
338    pub fn insert_key(&mut self, key: Key) {
339        let _ = self.keys.insert(key);
340    }
341
342    /// Removes the key from the set.
343    pub fn remove_key(&mut self, key: &Key) {
344        let _ = self.keys.remove(key);
345    }
346
347    /// Checks if the set contains the key.
348    #[must_use]
349    pub fn contains(&self, key: &Key) -> bool {
350        self.keys.contains(key)
351    }
352
353    /// Returns `true` if the set is empty.
354    #[must_use]
355    pub fn is_empty(&self) -> bool {
356        self.keys.is_empty()
357    }
358
359    /// Returns number of keys in the entry.
360    #[must_use]
361    pub fn len(&self) -> usize {
362        self.keys.len()
363    }
364
365    /// Returns keys as a sorted `Vec<Key>` (useful for serialization/debug).
366    #[must_use]
367    pub fn to_sorted_vec(&self) -> Vec<Key> {
368        let mut keys: Vec<_> = self.keys.iter().copied().collect();
369        keys.sort_unstable(); // uses Ord, fast
370        keys
371    }
372}
373
374impl_storable_unbounded!(IndexEntry);
375
376///
377/// TESTS
378///
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383    use crate::traits::*;
384
385    #[test]
386    fn index_key_max_size_is_bounded() {
387        let index_key = IndexKey::max_storable();
388        let size = Storable::to_bytes(&index_key).len();
389
390        assert!(
391            size <= IndexKey::STORABLE_MAX_SIZE as usize,
392            "serialized IndexKey too large: got {size} bytes (limit {})",
393            IndexKey::STORABLE_MAX_SIZE
394        );
395    }
396
397    #[test]
398    fn index_entry_round_trip() {
399        let original = IndexEntry::new(&["a", "b"], Key::from(1u64));
400        let encoded = Storable::to_bytes(&original);
401        let decoded = IndexEntry::from_bytes(encoded);
402
403        assert_eq!(original.fields, decoded.fields);
404        assert_eq!(original.to_sorted_vec(), decoded.to_sorted_vec());
405    }
406}