milli_core/update/
clear_documents.rs

1use heed::RwTxn;
2use roaring::RoaringBitmap;
3use time::OffsetDateTime;
4
5use crate::{FieldDistribution, Index, Result};
6
7pub struct ClearDocuments<'t, 'i> {
8    wtxn: &'t mut RwTxn<'i>,
9    index: &'i Index,
10}
11
12impl<'t, 'i> ClearDocuments<'t, 'i> {
13    pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> ClearDocuments<'t, 'i> {
14        ClearDocuments { wtxn, index }
15    }
16
17    #[tracing::instrument(
18        level = "trace",
19        skip(self),
20        target = "indexing::documents",
21        name = "clear_documents"
22    )]
23    pub fn execute(self) -> Result<u64> {
24        self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
25        let Index {
26            env: _env,
27            main: _main,
28            external_documents_ids,
29            word_docids,
30            exact_word_docids,
31            word_prefix_docids,
32            exact_word_prefix_docids,
33            word_pair_proximity_docids,
34            word_position_docids,
35            word_fid_docids,
36            field_id_word_count_docids,
37            word_prefix_position_docids,
38            word_prefix_fid_docids,
39            facet_id_f64_docids,
40            facet_id_string_docids,
41            facet_id_normalized_string_strings,
42            facet_id_string_fst,
43            facet_id_exists_docids,
44            facet_id_is_null_docids,
45            facet_id_is_empty_docids,
46            field_id_docid_facet_f64s,
47            field_id_docid_facet_strings,
48            vector_arroy,
49            embedder_category_id: _,
50            documents,
51        } = self.index;
52
53        let empty_roaring = RoaringBitmap::default();
54
55        // We retrieve the number of documents ids that we are deleting.
56        let number_of_documents = self.index.number_of_documents(self.wtxn)?;
57
58        // We clean some of the main engine datastructures.
59        self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
60        self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?;
61        self.index.put_documents_ids(self.wtxn, &empty_roaring)?;
62        self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
63        self.index.delete_geo_rtree(self.wtxn)?;
64        self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
65
66        // Remove all user-provided bits from the configs
67        let mut configs = self.index.embedding_configs(self.wtxn)?;
68        for config in configs.iter_mut() {
69            config.user_provided.clear();
70        }
71        self.index.put_embedding_configs(self.wtxn, configs)?;
72
73        // Clear the other databases.
74        external_documents_ids.clear(self.wtxn)?;
75        word_docids.clear(self.wtxn)?;
76        exact_word_docids.clear(self.wtxn)?;
77        word_prefix_docids.clear(self.wtxn)?;
78        exact_word_prefix_docids.clear(self.wtxn)?;
79        word_pair_proximity_docids.clear(self.wtxn)?;
80        word_position_docids.clear(self.wtxn)?;
81        word_fid_docids.clear(self.wtxn)?;
82        field_id_word_count_docids.clear(self.wtxn)?;
83        word_prefix_position_docids.clear(self.wtxn)?;
84        word_prefix_fid_docids.clear(self.wtxn)?;
85        facet_id_f64_docids.clear(self.wtxn)?;
86        facet_id_normalized_string_strings.clear(self.wtxn)?;
87        facet_id_string_fst.clear(self.wtxn)?;
88        facet_id_exists_docids.clear(self.wtxn)?;
89        facet_id_is_null_docids.clear(self.wtxn)?;
90        facet_id_is_empty_docids.clear(self.wtxn)?;
91        facet_id_string_docids.clear(self.wtxn)?;
92        field_id_docid_facet_f64s.clear(self.wtxn)?;
93        field_id_docid_facet_strings.clear(self.wtxn)?;
94        // vector
95        vector_arroy.clear(self.wtxn)?;
96
97        documents.clear(self.wtxn)?;
98
99        Ok(number_of_documents)
100    }
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106    use crate::constants::RESERVED_GEO_FIELD_NAME;
107    use crate::index::tests::TempIndex;
108
109    #[test]
110    fn clear_documents() {
111        let index = TempIndex::new();
112
113        let mut wtxn = index.write_txn().unwrap();
114        index
115            .add_documents_using_wtxn(&mut wtxn, documents!([
116                { "id": 0, "name": "kevin", "age": 20 },
117                { "id": 1, "name": "kevina" },
118                { "id": 2, "name": "benoit", "country": "France", RESERVED_GEO_FIELD_NAME: { "lng": 42, "lat": 35 } }
119            ]))
120            .unwrap();
121
122        // Clear all documents from the database.
123        let builder = ClearDocuments::new(&mut wtxn, &index);
124        assert_eq!(builder.execute().unwrap(), 3);
125        wtxn.commit().unwrap();
126
127        let rtxn = index.read_txn().unwrap();
128
129        // the value is 7 because there is `[id, name, age, country, _geo, _geo.lng, _geo.lat]`
130        assert_eq!(index.fields_ids_map(&rtxn).unwrap().len(), 7);
131
132        assert!(index.words_fst(&rtxn).unwrap().is_empty());
133        assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
134        assert!(index.external_documents_ids().is_empty(&rtxn).unwrap());
135        assert!(index.documents_ids(&rtxn).unwrap().is_empty());
136        assert!(index.field_distribution(&rtxn).unwrap().is_empty());
137        assert!(index.geo_rtree(&rtxn).unwrap().is_none());
138        assert!(index.geo_faceted_documents_ids(&rtxn).unwrap().is_empty());
139
140        assert!(index.word_docids.is_empty(&rtxn).unwrap());
141        assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
142        assert!(index.word_pair_proximity_docids.is_empty(&rtxn).unwrap());
143        assert!(index.field_id_word_count_docids.is_empty(&rtxn).unwrap());
144        assert!(index.facet_id_f64_docids.is_empty(&rtxn).unwrap());
145        assert!(index.facet_id_string_docids.is_empty(&rtxn).unwrap());
146        assert!(index.field_id_docid_facet_f64s.is_empty(&rtxn).unwrap());
147        assert!(index.field_id_docid_facet_strings.is_empty(&rtxn).unwrap());
148        assert!(index.documents.is_empty(&rtxn).unwrap());
149    }
150}