milli_core/update/facet/
incremental.rs

1use std::fs::File;
2use std::io::BufReader;
3
4use grenad::Merger;
5use heed::types::{Bytes, DecodeIgnore};
6use heed::{BytesDecode, Error, RoTxn, RwTxn};
7use obkv::KvReader;
8use roaring::RoaringBitmap;
9
10use crate::facet::FacetType;
11use crate::heed_codec::facet::{
12    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
13};
14use crate::heed_codec::BytesRefCodec;
15use crate::search::facet::get_highest_level;
16use crate::update::del_add::DelAdd;
17use crate::update::index_documents::valid_lmdb_key;
18use crate::update::MergeDeladdCboRoaringBitmaps;
19use crate::{CboRoaringBitmapCodec, Index, Result};
20
21/// Enum used as a return value for the facet incremental indexing.
22///
23/// - `ModificationResult::InPlace` means that modifying the `facet_value` into the `level` did not have
24///   an effect on the number of keys in that level. Therefore, it did not increase the number of children
25///   of the parent node.
26///
27/// - `ModificationResult::Insert` means that modifying the `facet_value` into the `level` resulted
28///   in the addition of a new key in that level, and that therefore the number of children
29///   of the parent node should be incremented.
30///
31/// - `ModificationResult::Remove` means that modifying the `facet_value` into the `level` resulted in a change in the
32///   number of keys in the level. For example, removing a document id from the facet value `3` could
33///   cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted
34///   entirely. In that case, `ModificationResult::Remove` is returned. The parent of the deleted key must
35///   then adjust its group size. If its group size falls to 0, then it will need to be deleted as well.
36///
37/// - `ModificationResult::Reduce/Expand` means that modifying the `facet_value` into the `level` resulted in a change in the
38///   bounds of the keys of the level. For example, removing a document id from the facet value
39///   `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore,
40///   in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4).
41///   In that case `ModificationResult::Reduce` is returned. The parent of the reduced key may need to adjust
42///   its left bound as well.
43///
44/// - `ModificationResult::Nothing` means that modifying the `facet_value` didn't have any impact into the `level`.
45///   This case is reachable when a document id is removed from a sub-level node but is still present in another one.
46///   For example, removing `2` from a document containing `2` and `3`, the document id will removed form the `level 0`
47///   but should remain in the group node [1..4] in `level 1`.
48enum ModificationResult {
49    InPlace,
50    Expand,
51    Insert,
52    Reduce { next: Option<Vec<u8>> },
53    Remove { next: Option<Vec<u8>> },
54    Nothing,
55}
56
57/// Algorithm to incrementally insert and delete elememts into the
58/// `facet_id_(string/f64)_docids` databases.
59pub struct FacetsUpdateIncremental {
60    inner: FacetsUpdateIncrementalInner,
61    delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
62}
63
64impl FacetsUpdateIncremental {
65    pub fn new(
66        index: &Index,
67        facet_type: FacetType,
68        delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
69        group_size: u8,
70        min_level_size: u8,
71        max_group_size: u8,
72    ) -> Self {
73        FacetsUpdateIncremental {
74            inner: FacetsUpdateIncrementalInner {
75                db: match facet_type {
76                    FacetType::String => index
77                        .facet_id_string_docids
78                        .remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
79                    FacetType::Number => index
80                        .facet_id_f64_docids
81                        .remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
82                },
83                group_size,
84                max_group_size,
85                min_level_size,
86            },
87            delta_data,
88        }
89    }
90
91    #[tracing::instrument(level = "trace", skip_all, target = "indexing::facets::incremental")]
92    pub fn execute(self, wtxn: &mut RwTxn<'_>) -> crate::Result<()> {
93        let mut current_field_id = None;
94        let mut facet_level_may_be_updated = false;
95        let mut iter = self.delta_data.into_stream_merger_iter()?;
96        while let Some((key, value)) = iter.next()? {
97            if !valid_lmdb_key(key) {
98                continue;
99            }
100
101            let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key)
102                .map_err(heed::Error::Encoding)?;
103
104            if facet_level_may_be_updated && current_field_id.is_some_and(|fid| fid != key.field_id)
105            {
106                // Only add or remove a level after making all the field modifications.
107                self.inner.add_or_delete_level(wtxn, current_field_id.unwrap())?;
108                facet_level_may_be_updated = false;
109            }
110            current_field_id = Some(key.field_id);
111
112            let value = KvReader::from_slice(value);
113            let docids_to_delete = value
114                .get(DelAdd::Deletion)
115                .map(CboRoaringBitmapCodec::bytes_decode)
116                .map(|o| o.map_err(heed::Error::Encoding))
117                .transpose()?;
118
119            let docids_to_add = value
120                .get(DelAdd::Addition)
121                .map(CboRoaringBitmapCodec::bytes_decode)
122                .map(|o| o.map_err(heed::Error::Encoding))
123                .transpose()?;
124
125            let level_size_changed = self.inner.modify(
126                wtxn,
127                key.field_id,
128                key.left_bound,
129                docids_to_add.as_ref(),
130                docids_to_delete.as_ref(),
131            )?;
132
133            if level_size_changed {
134                // if a node has been added or removed from the highest level,
135                // we may have to update the facet level.
136                facet_level_may_be_updated = true;
137            }
138        }
139
140        if let Some(field_id) = current_field_id {
141            if facet_level_may_be_updated {
142                self.inner.add_or_delete_level(wtxn, field_id)?;
143            }
144        }
145
146        Ok(())
147    }
148}
149
150/// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type
151pub struct FacetsUpdateIncrementalInner {
152    pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
153    pub group_size: u8,
154    pub min_level_size: u8,
155    pub max_group_size: u8,
156}
157impl FacetsUpdateIncrementalInner {
158    /// Find the `FacetGroupKey`/`FacetGroupValue` in the database that
159    /// should be used to insert the new `facet_value` for the given `field_id` and `level`
160    /// where `level` must be strictly greater than 0.
161    ///
162    /// For example, when inserting the facet value `4`, there are two possibilities:
163    ///
164    /// 1. We find a key whose lower bound is 3 followed by a key whose lower bound is 6. Therefore,
165    ///    we know that the implicit range of the first key is 3..6, which contains 4.
166    ///    So the new facet value belongs in that first key/value pair.
167    ///
168    /// 2. The first key of the level has a lower bound of `5`. We return this key/value pair
169    ///    but will need to change the lowerbound of this key to `4` in order to insert this facet value.
170    fn find_insertion_key_value(
171        &self,
172        field_id: u16,
173        level: u8,
174        facet_value: &[u8],
175        txn: &RoTxn<'_>,
176    ) -> Result<(FacetGroupKey<Vec<u8>>, FacetGroupValue)> {
177        assert!(level > 0);
178        match self.db.get_lower_than_or_equal_to(
179            txn,
180            &FacetGroupKey { field_id, level, left_bound: facet_value },
181        )? {
182            Some((key, value)) => {
183                if key.level != level {
184                    let mut prefix = vec![];
185                    prefix.extend_from_slice(&field_id.to_be_bytes());
186                    prefix.push(level);
187
188                    let mut iter = self
189                        .db
190                        .remap_types::<Bytes, FacetGroupValueCodec>()
191                        .prefix_iter(txn, prefix.as_slice())?;
192                    let (key_bytes, value) = iter.next().unwrap()?;
193                    Ok((
194                        FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
195                            .map_err(Error::Encoding)?
196                            .into_owned(),
197                        value,
198                    ))
199                } else {
200                    Ok((key.into_owned(), value))
201                }
202            }
203            None => {
204                // We checked that the level is > 0
205                // Since all keys of level 1 are greater than those of level 0,
206                // we are guaranteed that db.get_lower_than_or_equal_to(key) exists
207                panic!()
208            }
209        }
210    }
211
212    /// Insert the given facet value and corresponding document ids in the level 0 of the database
213    ///
214    /// ## Return
215    /// See documentation of `insert_in_level`
216    fn modify_in_level_0(
217        &self,
218        txn: &mut RwTxn<'_>,
219        field_id: u16,
220        facet_value: &[u8],
221        add_docids: Option<&RoaringBitmap>,
222        del_docids: Option<&RoaringBitmap>,
223    ) -> Result<ModificationResult> {
224        let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value };
225
226        let old_value = self.db.get(txn, &key)?;
227        match (old_value, add_docids, del_docids) {
228            // Addition + deletion on an existing value
229            (Some(FacetGroupValue { bitmap, .. }), Some(add_docids), Some(del_docids)) => {
230                let value = FacetGroupValue { bitmap: (bitmap - del_docids) | add_docids, size: 1 };
231                self.db.put(txn, &key, &value)?;
232                Ok(ModificationResult::InPlace)
233            }
234            // Addition on an existing value
235            (Some(FacetGroupValue { bitmap, .. }), Some(add_docids), None) => {
236                let value = FacetGroupValue { bitmap: bitmap | add_docids, size: 1 };
237                self.db.put(txn, &key, &value)?;
238                Ok(ModificationResult::InPlace)
239            }
240            // Addition of a new value (ignore deletion)
241            (None, Some(add_docids), _) => {
242                let value = FacetGroupValue { bitmap: add_docids.clone(), size: 1 };
243                self.db.put(txn, &key, &value)?;
244                Ok(ModificationResult::Insert)
245            }
246            // Deletion on an existing value, fully delete the key if the resulted value is empty.
247            (Some(FacetGroupValue { mut bitmap, .. }), None, Some(del_docids)) => {
248                bitmap -= del_docids;
249                if bitmap.is_empty() {
250                    // Full deletion
251                    let mut next_key = None;
252                    if let Some((next, _)) =
253                        self.db.remap_data_type::<DecodeIgnore>().get_greater_than(txn, &key)?
254                    {
255                        if next.field_id == field_id && next.level == 0 {
256                            next_key = Some(next.left_bound.to_vec());
257                        }
258                    }
259                    self.db.delete(txn, &key)?;
260                    Ok(ModificationResult::Remove { next: next_key })
261                } else {
262                    // Partial deletion
263                    let value = FacetGroupValue { bitmap, size: 1 };
264                    self.db.put(txn, &key, &value)?;
265                    Ok(ModificationResult::InPlace)
266                }
267            }
268            // Otherwise do nothing (None + no addition + deletion == Some + no addition + no deletion == Nothing),
269            // may be unreachable at some point.
270            (None, None, _) | (Some(_), None, None) => Ok(ModificationResult::Nothing),
271        }
272    }
273
274    /// Split a level node into two balanced nodes.
275    ///
276    /// # Return
277    /// Returns `ModificationResult::Insert` if the split is successful.
278    fn split_group(
279        &self,
280        txn: &mut RwTxn<'_>,
281        field_id: u16,
282        level: u8,
283        insertion_key: FacetGroupKey<Vec<u8>>,
284        insertion_value: FacetGroupValue,
285    ) -> Result<ModificationResult> {
286        let size_left = insertion_value.size / 2;
287        let size_right = insertion_value.size - size_left;
288
289        let level_below = level - 1;
290
291        let start_key = FacetGroupKey {
292            field_id,
293            level: level_below,
294            left_bound: insertion_key.left_bound.as_slice(),
295        };
296
297        let mut iter =
298            self.db.range(txn, &(start_key..))?.take((size_left as usize) + (size_right as usize));
299
300        let group_left = {
301            let mut values_left = RoaringBitmap::new();
302
303            let mut i = 0;
304            for next in iter.by_ref() {
305                let (_key, value) = next?;
306                i += 1;
307                values_left |= &value.bitmap;
308                if i == size_left {
309                    break;
310                }
311            }
312
313            let key =
314                FacetGroupKey { field_id, level, left_bound: insertion_key.left_bound.clone() };
315            let value = FacetGroupValue { size: size_left, bitmap: values_left };
316            (key, value)
317        };
318
319        let group_right = {
320            let (
321                FacetGroupKey { left_bound: right_left_bound, .. },
322                FacetGroupValue { bitmap: mut values_right, .. },
323            ) = iter.next().unwrap()?;
324
325            for next in iter.by_ref() {
326                let (_, value) = next?;
327                values_right |= &value.bitmap;
328            }
329
330            let key = FacetGroupKey { field_id, level, left_bound: right_left_bound.to_vec() };
331            let value = FacetGroupValue { size: size_right, bitmap: values_right };
332            (key, value)
333        };
334        drop(iter);
335
336        let _ = self.db.delete(txn, &insertion_key.as_ref())?;
337
338        self.db.put(txn, &group_left.0.as_ref(), &group_left.1)?;
339        self.db.put(txn, &group_right.0.as_ref(), &group_right.1)?;
340
341        Ok(ModificationResult::Insert)
342    }
343
344    /// Remove the docids still present in the related sub-level nodes from the del_docids.
345    ///
346    /// This process is needed to avoid removing docids from a group node where the docid is present in several sub-nodes.
347    fn trim_del_docids<'a>(
348        &self,
349        txn: &mut RwTxn<'_>,
350        field_id: u16,
351        level: u8,
352        insertion_key: &FacetGroupKey<Vec<u8>>,
353        insertion_value_size: usize,
354        del_docids: &'a RoaringBitmap,
355    ) -> Result<std::borrow::Cow<'a, RoaringBitmap>> {
356        let level_below = level - 1;
357
358        let start_key = FacetGroupKey {
359            field_id,
360            level: level_below,
361            left_bound: insertion_key.left_bound.as_slice(),
362        };
363
364        let mut del_docids = std::borrow::Cow::Borrowed(del_docids);
365        let iter = self.db.range(txn, &(start_key..))?.take(insertion_value_size);
366        for next in iter {
367            let (_, value) = next?;
368            // if a sublevel bitmap as common docids with del_docids,
369            // then these docids shouldn't be removed and so, remove them from the deletion list.
370            if !value.bitmap.is_disjoint(&del_docids) {
371                *del_docids.to_mut() -= value.bitmap;
372            }
373        }
374
375        Ok(del_docids)
376    }
377
378    /// Modify the given facet value and corresponding document ids in all the levels of the database up to the given `level`.
379    /// This function works recursively.
380    ///
381    /// ## Return
382    /// Returns the effect of modifying the facet value to the database on the given `level`.
383    ///
384    fn modify_in_level(
385        &self,
386        txn: &mut RwTxn<'_>,
387        field_id: u16,
388        level: u8,
389        facet_value: &[u8],
390        add_docids: Option<&RoaringBitmap>,
391        del_docids: Option<&RoaringBitmap>,
392    ) -> Result<ModificationResult> {
393        if level == 0 {
394            return self.modify_in_level_0(txn, field_id, facet_value, add_docids, del_docids);
395        }
396
397        let result =
398            self.modify_in_level(txn, field_id, level - 1, facet_value, add_docids, del_docids)?;
399        // level below inserted an element
400
401        if let ModificationResult::Nothing = result {
402            // if the previous level has not been modified,
403            // early return ModificationResult::Nothing.
404            return Ok(ModificationResult::Nothing);
405        }
406
407        let (insertion_key, insertion_value) =
408            self.find_insertion_key_value(field_id, level, facet_value, txn)?;
409        let insertion_value_size = insertion_value.size as usize;
410
411        let mut insertion_value_was_modified = false;
412        let mut updated_value = insertion_value;
413
414        if let ModificationResult::Insert = result {
415            // if a key has been inserted in the sub-level raise the value size.
416            updated_value.size += 1;
417            insertion_value_was_modified = true;
418        } else if let ModificationResult::Remove { .. } = result {
419            if updated_value.size <= 1 {
420                // if the only remaining node is the one to delete,
421                // delete the key instead and early return.
422                let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?;
423                assert!(is_deleted);
424                return Ok(result);
425            } else {
426                // Reduce the value size
427                updated_value.size -= 1;
428                insertion_value_was_modified = true;
429            }
430        }
431
432        let (insertion_key, insertion_key_modification) =
433            if let ModificationResult::InPlace = result {
434                (insertion_key, ModificationResult::InPlace)
435            } else {
436                // Inserting or deleting the facet value in the level below resulted in the creation
437                // of a new key. Therefore, it may be the case that we need to modify the left bound of the
438                // insertion key (see documentation of `find_insertion_key_value` for an example of when that
439                // could happen).
440                let mut new_insertion_key = insertion_key.clone();
441                let mut key_modification = ModificationResult::InPlace;
442
443                if let ModificationResult::Remove { next } | ModificationResult::Reduce { next } =
444                    result
445                {
446                    // if the deleted facet_value is the left_bound of the current node,
447                    // the left_bound should be updated reducing the current node.
448                    let reduced_range = facet_value == insertion_key.left_bound;
449                    if reduced_range {
450                        new_insertion_key.left_bound = next.clone().unwrap();
451                        key_modification = ModificationResult::Reduce { next };
452                    }
453                } else if facet_value < insertion_key.left_bound.as_slice() {
454                    // if the added facet_value is the under the left_bound of the current node,
455                    // the left_bound should be updated expanding the current node.
456                    new_insertion_key.left_bound = facet_value.to_vec();
457                    key_modification = ModificationResult::Expand;
458                }
459
460                if matches!(
461                    key_modification,
462                    ModificationResult::Expand | ModificationResult::Reduce { .. }
463                ) {
464                    // if the node should be updated, delete it, it will be recreated using a new key later.
465                    let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?;
466                    assert!(is_deleted);
467                }
468                (new_insertion_key, key_modification)
469            };
470
471        if updated_value.size < self.max_group_size {
472            // If there are docids to delete, trim them avoiding unexpected removal.
473            if let Some(del_docids) = del_docids
474                .map(|ids| {
475                    self.trim_del_docids(
476                        txn,
477                        field_id,
478                        level,
479                        &insertion_key,
480                        insertion_value_size,
481                        ids,
482                    )
483                })
484                .transpose()?
485                .filter(|ids| !ids.is_empty())
486            {
487                updated_value.bitmap -= &*del_docids;
488                insertion_value_was_modified = true;
489            }
490
491            if let Some(add_docids) = add_docids {
492                updated_value.bitmap |= add_docids;
493                insertion_value_was_modified = true;
494            }
495
496            if insertion_value_was_modified
497                || matches!(
498                    insertion_key_modification,
499                    ModificationResult::Expand | ModificationResult::Reduce { .. }
500                )
501            {
502                // if any modification occurred, insert it in the database.
503                self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
504                Ok(insertion_key_modification)
505            } else {
506                // this case is reachable when a docid is removed from a sub-level node but is still present in another one.
507                // For instance, a document containing 2 and 3, if 2 is removed, the docid should remain in the group node [1..4].
508                Ok(ModificationResult::Nothing)
509            }
510        } else {
511            // We've increased the group size of the value and realised it has become greater than or equal to `max_group_size`
512            // Therefore it must be split into two nodes.
513            self.split_group(txn, field_id, level, insertion_key, updated_value)
514        }
515    }
516
517    /// Modify the given facet value and corresponding document ids in the database.
518    /// If no more document ids correspond to the facet value, delete it completely.
519    ///
520    /// ## Return
521    /// Returns `true` if some tree-nodes of the highest level have been removed or added implying a potential
522    /// addition or deletion of a facet level.
523    /// Otherwise returns `false` if the tree-nodes have been modified in place.
524    pub fn modify(
525        &self,
526        txn: &mut RwTxn<'_>,
527        field_id: u16,
528        facet_value: &[u8],
529        add_docids: Option<&RoaringBitmap>,
530        del_docids: Option<&RoaringBitmap>,
531    ) -> Result<bool> {
532        if add_docids.is_none_or(RoaringBitmap::is_empty)
533            && del_docids.is_none_or(RoaringBitmap::is_empty)
534        {
535            return Ok(false);
536        }
537
538        let highest_level = get_highest_level(txn, self.db, field_id)?;
539
540        let result = self.modify_in_level(
541            txn,
542            field_id,
543            highest_level,
544            facet_value,
545            add_docids,
546            del_docids,
547        )?;
548        match result {
549            ModificationResult::InPlace
550            | ModificationResult::Expand
551            | ModificationResult::Nothing
552            | ModificationResult::Reduce { .. } => Ok(false),
553            ModificationResult::Insert | ModificationResult::Remove { .. } => Ok(true),
554        }
555    }
556
557    /// Check whether the highest level has exceeded `min_level_size` * `self.group_size`.
558    /// If it has, we must build an addition level above it.
559    /// Then check whether the highest level is under `min_level_size`.
560    /// If it has, we must remove the complete level.
561    pub(crate) fn add_or_delete_level(&self, txn: &mut RwTxn<'_>, field_id: u16) -> Result<()> {
562        let highest_level = get_highest_level(txn, self.db, field_id)?;
563        let mut highest_level_prefix = vec![];
564        highest_level_prefix.extend_from_slice(&field_id.to_be_bytes());
565        highest_level_prefix.push(highest_level);
566
567        let size_highest_level =
568            self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?.count();
569
570        if size_highest_level >= self.group_size as usize * self.min_level_size as usize {
571            self.add_level(txn, field_id, highest_level, &highest_level_prefix, size_highest_level)
572        } else if size_highest_level < self.min_level_size as usize && highest_level != 0 {
573            self.delete_level(txn, &highest_level_prefix)
574        } else {
575            Ok(())
576        }
577    }
578
579    /// Delete a level.
580    fn delete_level(&self, txn: &mut RwTxn<'_>, highest_level_prefix: &[u8]) -> Result<()> {
581        let mut to_delete = vec![];
582        let mut iter =
583            self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, highest_level_prefix)?;
584        for el in iter.by_ref() {
585            let (k, _) = el?;
586            to_delete.push(
587                FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k)
588                    .map_err(Error::Encoding)?
589                    .into_owned(),
590            );
591        }
592        drop(iter);
593        for k in to_delete {
594            self.db.delete(txn, &k.as_ref())?;
595        }
596        Ok(())
597    }
598
599    /// Build an additional level for the field id.
600    fn add_level(
601        &self,
602        txn: &mut RwTxn<'_>,
603        field_id: u16,
604        highest_level: u8,
605        highest_level_prefix: &[u8],
606        size_highest_level: usize,
607    ) -> Result<()> {
608        let mut groups_iter = self
609            .db
610            .remap_types::<Bytes, FacetGroupValueCodec>()
611            .prefix_iter(txn, highest_level_prefix)?;
612
613        let nbr_new_groups = size_highest_level / self.group_size as usize;
614        let nbr_leftover_elements = size_highest_level % self.group_size as usize;
615
616        let mut to_add = vec![];
617        for _ in 0..nbr_new_groups {
618            let mut first_key = None;
619            let mut values = RoaringBitmap::new();
620            for _ in 0..self.group_size {
621                let (key_bytes, value_i) = groups_iter.next().unwrap()?;
622                let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
623                    .map_err(Error::Encoding)?;
624
625                if first_key.is_none() {
626                    first_key = Some(key_i);
627                }
628                values |= value_i.bitmap;
629            }
630            let key = FacetGroupKey {
631                field_id,
632                level: highest_level + 1,
633                left_bound: first_key.unwrap().left_bound,
634            };
635            let value = FacetGroupValue { size: self.group_size, bitmap: values };
636            to_add.push((key.into_owned(), value));
637        }
638        // now we add the rest of the level, in case its size is > group_size * min_level_size
639        // this can indeed happen if the min_level_size parameter changes between two calls to `insert`
640        if nbr_leftover_elements > 0 {
641            let mut first_key = None;
642            let mut values = RoaringBitmap::new();
643            for _ in 0..nbr_leftover_elements {
644                let (key_bytes, value_i) = groups_iter.next().unwrap()?;
645                let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
646                    .map_err(Error::Encoding)?;
647
648                if first_key.is_none() {
649                    first_key = Some(key_i);
650                }
651                values |= value_i.bitmap;
652            }
653            let key = FacetGroupKey {
654                field_id,
655                level: highest_level + 1,
656                left_bound: first_key.unwrap().left_bound,
657            };
658            // Note: nbr_leftover_elements can be casted to a u8 since it is bounded by `max_group_size`
659            // when it is created above.
660            let value = FacetGroupValue { size: nbr_leftover_elements as u8, bitmap: values };
661            to_add.push((key.into_owned(), value));
662        }
663
664        drop(groups_iter);
665        for (key, value) in to_add {
666            self.db.put(txn, &key.as_ref(), &value)?;
667        }
668        Ok(())
669    }
670}
671
672impl FacetGroupKey<&[u8]> {
673    pub fn into_owned(self) -> FacetGroupKey<Vec<u8>> {
674        FacetGroupKey {
675            field_id: self.field_id,
676            level: self.level,
677            left_bound: self.left_bound.to_vec(),
678        }
679    }
680}
681
682impl FacetGroupKey<Vec<u8>> {
683    pub fn as_ref(&self) -> FacetGroupKey<&[u8]> {
684        FacetGroupKey {
685            field_id: self.field_id,
686            level: self.level,
687            left_bound: self.left_bound.as_slice(),
688        }
689    }
690}
691
692#[cfg(test)]
693mod tests {
694    use rand::seq::SliceRandom;
695    use rand::{Rng, SeedableRng};
696    use roaring::RoaringBitmap;
697
698    use crate::heed_codec::facet::OrderedF64Codec;
699    use crate::heed_codec::StrRefCodec;
700    use crate::milli_snap;
701    use crate::update::facet::test_helpers::FacetIndex;
702
703    #[test]
704    fn append() {
705        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
706        for i in 0..256u16 {
707            let mut bitmap = RoaringBitmap::new();
708            bitmap.insert(i as u32);
709            let mut txn = index.env.write_txn().unwrap();
710            index.insert(&mut txn, 0, &(i as f64), &bitmap);
711            txn.commit().unwrap();
712        }
713        let txn = index.env.read_txn().unwrap();
714        index.verify_structure_validity(&txn, 0);
715        txn.commit().unwrap();
716        milli_snap!(format!("{index}"));
717    }
718    #[test]
719    fn many_field_ids_append() {
720        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
721        for i in 0..256u16 {
722            let mut bitmap = RoaringBitmap::new();
723            bitmap.insert(i as u32);
724            let mut txn = index.env.write_txn().unwrap();
725            index.insert(&mut txn, 0, &(i as f64), &bitmap);
726            txn.commit().unwrap();
727        }
728        for i in 0..256u16 {
729            let mut bitmap = RoaringBitmap::new();
730            bitmap.insert(i as u32);
731            let mut txn = index.env.write_txn().unwrap();
732            index.insert(&mut txn, 2, &(i as f64), &bitmap);
733            txn.commit().unwrap();
734        }
735        for i in 0..256u16 {
736            let mut bitmap = RoaringBitmap::new();
737            bitmap.insert(i as u32);
738            let mut txn = index.env.write_txn().unwrap();
739            index.insert(&mut txn, 1, &(i as f64), &bitmap);
740            txn.commit().unwrap();
741        }
742        let txn = index.env.read_txn().unwrap();
743        index.verify_structure_validity(&txn, 0);
744        index.verify_structure_validity(&txn, 1);
745        index.verify_structure_validity(&txn, 2);
746        txn.commit().unwrap();
747        milli_snap!(format!("{index}"));
748    }
749    #[test]
750    fn many_field_ids_prepend() {
751        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
752        for i in (0..256).rev() {
753            let mut bitmap = RoaringBitmap::new();
754            bitmap.insert(i as u32);
755            let mut txn = index.env.write_txn().unwrap();
756            index.insert(&mut txn, 0, &(i as f64), &bitmap);
757            txn.commit().unwrap();
758        }
759        for i in (0..256).rev() {
760            let mut bitmap = RoaringBitmap::new();
761            bitmap.insert(i as u32);
762            let mut txn = index.env.write_txn().unwrap();
763            index.insert(&mut txn, 2, &(i as f64), &bitmap);
764            txn.commit().unwrap();
765        }
766        for i in (0..256).rev() {
767            let mut bitmap = RoaringBitmap::new();
768            bitmap.insert(i as u32);
769            let mut txn = index.env.write_txn().unwrap();
770            index.insert(&mut txn, 1, &(i as f64), &bitmap);
771            txn.commit().unwrap();
772        }
773        let txn = index.env.read_txn().unwrap();
774        index.verify_structure_validity(&txn, 0);
775        index.verify_structure_validity(&txn, 1);
776        index.verify_structure_validity(&txn, 2);
777        txn.commit().unwrap();
778        milli_snap!(format!("{index}"));
779    }
780
781    #[test]
782    fn prepend() {
783        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
784        let mut txn = index.env.write_txn().unwrap();
785
786        for i in (0..256).rev() {
787            let mut bitmap = RoaringBitmap::new();
788            bitmap.insert(i);
789            index.insert(&mut txn, 0, &(i as f64), &bitmap);
790        }
791
792        index.verify_structure_validity(&txn, 0);
793        txn.commit().unwrap();
794        milli_snap!(format!("{index}"));
795    }
796
797    #[test]
798    fn shuffled() {
799        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
800        let mut txn = index.env.write_txn().unwrap();
801
802        let mut keys = (0..256).collect::<Vec<_>>();
803        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
804        keys.shuffle(&mut rng);
805
806        for key in keys {
807            let mut bitmap = RoaringBitmap::new();
808            bitmap.insert(key);
809            index.insert(&mut txn, 0, &(key as f64), &bitmap);
810        }
811        index.verify_structure_validity(&txn, 0);
812        txn.commit().unwrap();
813        milli_snap!(format!("{index}"));
814    }
815
816    #[test]
817    fn merge_values() {
818        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
819        let mut txn = index.env.write_txn().unwrap();
820
821        let mut keys = (0..256).collect::<Vec<_>>();
822        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
823        keys.shuffle(&mut rng);
824
825        for key in keys {
826            let mut bitmap = RoaringBitmap::new();
827            bitmap.insert(key);
828            bitmap.insert(rng.gen_range(256..512));
829            index.verify_structure_validity(&txn, 0);
830            index.insert(&mut txn, 0, &(key as f64), &bitmap);
831        }
832
833        index.verify_structure_validity(&txn, 0);
834        txn.commit().unwrap();
835        milli_snap!(format!("{index}"));
836    }
837
838    #[test]
839    fn delete_from_end() {
840        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
841        let mut txn = index.env.write_txn().unwrap();
842        for i in 0..256 {
843            let mut bitmap = RoaringBitmap::new();
844            bitmap.insert(i);
845            index.verify_structure_validity(&txn, 0);
846            index.insert(&mut txn, 0, &(i as f64), &bitmap);
847        }
848
849        for i in (200..256).rev() {
850            index.verify_structure_validity(&txn, 0);
851            index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
852        }
853        index.verify_structure_validity(&txn, 0);
854        txn.commit().unwrap();
855        milli_snap!(format!("{index}"), 200);
856        let mut txn = index.env.write_txn().unwrap();
857
858        for i in (150..200).rev() {
859            index.verify_structure_validity(&txn, 0);
860            index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
861        }
862        index.verify_structure_validity(&txn, 0);
863        txn.commit().unwrap();
864        milli_snap!(format!("{index}"), 150);
865        let mut txn = index.env.write_txn().unwrap();
866        for i in (100..150).rev() {
867            index.verify_structure_validity(&txn, 0);
868            index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
869        }
870        index.verify_structure_validity(&txn, 0);
871        txn.commit().unwrap();
872        milli_snap!(format!("{index}"), 100);
873        let mut txn = index.env.write_txn().unwrap();
874        for i in (17..100).rev() {
875            index.verify_structure_validity(&txn, 0);
876            index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
877        }
878        index.verify_structure_validity(&txn, 0);
879        txn.commit().unwrap();
880        milli_snap!(format!("{index}"), 17);
881        let mut txn = index.env.write_txn().unwrap();
882        for i in (15..17).rev() {
883            index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
884        }
885        index.verify_structure_validity(&txn, 0);
886        txn.commit().unwrap();
887        milli_snap!(format!("{index}"), 15);
888        let mut txn = index.env.write_txn().unwrap();
889        for i in (0..15).rev() {
890            index.verify_structure_validity(&txn, 0);
891            index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
892        }
893        index.verify_structure_validity(&txn, 0);
894        txn.commit().unwrap();
895        milli_snap!(format!("{index}"), 0);
896    }
897
898    #[test]
899    fn delete_from_start() {
900        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
901        let mut txn = index.env.write_txn().unwrap();
902
903        for i in 0..256 {
904            let mut bitmap = RoaringBitmap::new();
905            bitmap.insert(i);
906            index.verify_structure_validity(&txn, 0);
907            index.insert(&mut txn, 0, &(i as f64), &bitmap);
908        }
909
910        for i in 0..128 {
911            index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
912        }
913        index.verify_structure_validity(&txn, 0);
914        txn.commit().unwrap();
915        milli_snap!(format!("{index}"), 127);
916        let mut txn = index.env.write_txn().unwrap();
917        for i in 128..216 {
918            index.verify_structure_validity(&txn, 0);
919            index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
920        }
921        index.verify_structure_validity(&txn, 0);
922        txn.commit().unwrap();
923        milli_snap!(format!("{index}"), 215);
924        let mut txn = index.env.write_txn().unwrap();
925        for i in 216..256 {
926            index.verify_structure_validity(&txn, 0);
927            index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
928        }
929        index.verify_structure_validity(&txn, 0);
930        txn.commit().unwrap();
931        milli_snap!(format!("{index}"), 255);
932    }
933
934    #[test]
935    #[allow(clippy::needless_range_loop)]
936    fn delete_shuffled() {
937        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
938        let mut txn = index.env.write_txn().unwrap();
939        for i in 0..256 {
940            let mut bitmap = RoaringBitmap::new();
941            bitmap.insert(i);
942            index.verify_structure_validity(&txn, 0);
943            index.insert(&mut txn, 0, &(i as f64), &bitmap);
944        }
945
946        let mut keys = (0..256).collect::<Vec<_>>();
947        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
948        keys.shuffle(&mut rng);
949
950        for i in 0..128 {
951            let key = keys[i];
952            index.verify_structure_validity(&txn, 0);
953            index.delete_single_docid(&mut txn, 0, &(key as f64), key as u32);
954        }
955        index.verify_structure_validity(&txn, 0);
956        txn.commit().unwrap();
957        milli_snap!(format!("{index}"), 127);
958        let mut txn = index.env.write_txn().unwrap();
959        for i in 128..216 {
960            let key = keys[i];
961            index.verify_structure_validity(&txn, 0);
962            index.delete_single_docid(&mut txn, 0, &(key as f64), key as u32);
963        }
964        index.verify_structure_validity(&txn, 0);
965        txn.commit().unwrap();
966        let mut txn = index.env.write_txn().unwrap();
967        milli_snap!(format!("{index}"), 215);
968        for i in 216..256 {
969            let key = keys[i];
970            index.verify_structure_validity(&txn, 0);
971            index.delete_single_docid(&mut txn, 0, &(key as f64), key as u32);
972        }
973        index.verify_structure_validity(&txn, 0);
974        txn.commit().unwrap();
975        milli_snap!(format!("{index}"), 255);
976    }
977
978    #[test]
979    fn in_place_level0_insert() {
980        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
981        let mut txn = index.env.write_txn().unwrap();
982
983        let mut keys = (0..16).collect::<Vec<_>>();
984        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
985        keys.shuffle(&mut rng);
986        for i in 0..4 {
987            for &key in keys.iter() {
988                let mut bitmap = RoaringBitmap::new();
989                bitmap.insert(rng.gen_range(i * 256..(i + 1) * 256));
990                index.verify_structure_validity(&txn, 0);
991                index.insert(&mut txn, 0, &(key as f64), &bitmap);
992            }
993        }
994        index.verify_structure_validity(&txn, 0);
995        txn.commit().unwrap();
996        milli_snap!(format!("{index}"));
997    }
998
999    #[test]
1000    fn in_place_level0_delete() {
1001        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
1002        let mut txn = index.env.write_txn().unwrap();
1003
1004        let mut keys = (0..64).collect::<Vec<_>>();
1005        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
1006        keys.shuffle(&mut rng);
1007
1008        for &key in keys.iter() {
1009            let mut bitmap = RoaringBitmap::new();
1010            bitmap.insert(key);
1011            bitmap.insert(key + 100);
1012            index.verify_structure_validity(&txn, 0);
1013
1014            index.insert(&mut txn, 0, &(key as f64), &bitmap);
1015        }
1016        index.verify_structure_validity(&txn, 0);
1017        txn.commit().unwrap();
1018        milli_snap!(format!("{index}"), "before_delete");
1019
1020        let mut txn = index.env.write_txn().unwrap();
1021
1022        for &key in keys.iter() {
1023            index.verify_structure_validity(&txn, 0);
1024            index.delete_single_docid(&mut txn, 0, &(key as f64), key + 100);
1025        }
1026        index.verify_structure_validity(&txn, 0);
1027        txn.commit().unwrap();
1028        milli_snap!(format!("{index}"), "after_delete");
1029    }
1030
1031    #[test]
1032    fn shuffle_merge_string_and_delete() {
1033        let index = FacetIndex::<StrRefCodec>::new(4, 8, 5);
1034        let mut txn = index.env.write_txn().unwrap();
1035
1036        let mut keys = (1000..1064).collect::<Vec<_>>();
1037        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
1038        keys.shuffle(&mut rng);
1039
1040        for &key in keys.iter() {
1041            let mut bitmap = RoaringBitmap::new();
1042            bitmap.insert(key);
1043            bitmap.insert(key + 100);
1044            index.verify_structure_validity(&txn, 0);
1045            index.insert(&mut txn, 0, &format!("{key:x}").as_str(), &bitmap);
1046        }
1047        index.verify_structure_validity(&txn, 0);
1048        txn.commit().unwrap();
1049        milli_snap!(format!("{index}"), "before_delete");
1050
1051        let mut txn = index.env.write_txn().unwrap();
1052
1053        for &key in keys.iter() {
1054            index.verify_structure_validity(&txn, 0);
1055            index.delete_single_docid(&mut txn, 0, &format!("{key:x}").as_str(), key + 100);
1056        }
1057        index.verify_structure_validity(&txn, 0);
1058        txn.commit().unwrap();
1059        milli_snap!(format!("{index}"), "after_delete");
1060    }
1061}