milli_core/update/
clear_documents.rs1use heed::RwTxn;
2use roaring::RoaringBitmap;
3use time::OffsetDateTime;
4
5use crate::{FieldDistribution, Index, Result};
6
7pub struct ClearDocuments<'t, 'i> {
8 wtxn: &'t mut RwTxn<'i>,
9 index: &'i Index,
10}
11
12impl<'t, 'i> ClearDocuments<'t, 'i> {
13 pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> ClearDocuments<'t, 'i> {
14 ClearDocuments { wtxn, index }
15 }
16
17 #[tracing::instrument(
18 level = "trace",
19 skip(self),
20 target = "indexing::documents",
21 name = "clear_documents"
22 )]
23 pub fn execute(self) -> Result<u64> {
24 self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
25 let Index {
26 env: _env,
27 main: _main,
28 external_documents_ids,
29 word_docids,
30 exact_word_docids,
31 word_prefix_docids,
32 exact_word_prefix_docids,
33 word_pair_proximity_docids,
34 word_position_docids,
35 word_fid_docids,
36 field_id_word_count_docids,
37 word_prefix_position_docids,
38 word_prefix_fid_docids,
39 facet_id_f64_docids,
40 facet_id_string_docids,
41 facet_id_normalized_string_strings,
42 facet_id_string_fst,
43 facet_id_exists_docids,
44 facet_id_is_null_docids,
45 facet_id_is_empty_docids,
46 field_id_docid_facet_f64s,
47 field_id_docid_facet_strings,
48 vector_arroy,
49 embedder_category_id: _,
50 documents,
51 } = self.index;
52
53 let empty_roaring = RoaringBitmap::default();
54
55 let number_of_documents = self.index.number_of_documents(self.wtxn)?;
57
58 self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
60 self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?;
61 self.index.put_documents_ids(self.wtxn, &empty_roaring)?;
62 self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
63 self.index.delete_geo_rtree(self.wtxn)?;
64 self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
65
66 let mut configs = self.index.embedding_configs(self.wtxn)?;
68 for config in configs.iter_mut() {
69 config.user_provided.clear();
70 }
71 self.index.put_embedding_configs(self.wtxn, configs)?;
72
73 external_documents_ids.clear(self.wtxn)?;
75 word_docids.clear(self.wtxn)?;
76 exact_word_docids.clear(self.wtxn)?;
77 word_prefix_docids.clear(self.wtxn)?;
78 exact_word_prefix_docids.clear(self.wtxn)?;
79 word_pair_proximity_docids.clear(self.wtxn)?;
80 word_position_docids.clear(self.wtxn)?;
81 word_fid_docids.clear(self.wtxn)?;
82 field_id_word_count_docids.clear(self.wtxn)?;
83 word_prefix_position_docids.clear(self.wtxn)?;
84 word_prefix_fid_docids.clear(self.wtxn)?;
85 facet_id_f64_docids.clear(self.wtxn)?;
86 facet_id_normalized_string_strings.clear(self.wtxn)?;
87 facet_id_string_fst.clear(self.wtxn)?;
88 facet_id_exists_docids.clear(self.wtxn)?;
89 facet_id_is_null_docids.clear(self.wtxn)?;
90 facet_id_is_empty_docids.clear(self.wtxn)?;
91 facet_id_string_docids.clear(self.wtxn)?;
92 field_id_docid_facet_f64s.clear(self.wtxn)?;
93 field_id_docid_facet_strings.clear(self.wtxn)?;
94 vector_arroy.clear(self.wtxn)?;
96
97 documents.clear(self.wtxn)?;
98
99 Ok(number_of_documents)
100 }
101}
102
103#[cfg(test)]
104mod tests {
105 use super::*;
106 use crate::constants::RESERVED_GEO_FIELD_NAME;
107 use crate::index::tests::TempIndex;
108
109 #[test]
110 fn clear_documents() {
111 let index = TempIndex::new();
112
113 let mut wtxn = index.write_txn().unwrap();
114 index
115 .add_documents_using_wtxn(&mut wtxn, documents!([
116 { "id": 0, "name": "kevin", "age": 20 },
117 { "id": 1, "name": "kevina" },
118 { "id": 2, "name": "benoit", "country": "France", RESERVED_GEO_FIELD_NAME: { "lng": 42, "lat": 35 } }
119 ]))
120 .unwrap();
121
122 let builder = ClearDocuments::new(&mut wtxn, &index);
124 assert_eq!(builder.execute().unwrap(), 3);
125 wtxn.commit().unwrap();
126
127 let rtxn = index.read_txn().unwrap();
128
129 assert_eq!(index.fields_ids_map(&rtxn).unwrap().len(), 7);
131
132 assert!(index.words_fst(&rtxn).unwrap().is_empty());
133 assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
134 assert!(index.external_documents_ids().is_empty(&rtxn).unwrap());
135 assert!(index.documents_ids(&rtxn).unwrap().is_empty());
136 assert!(index.field_distribution(&rtxn).unwrap().is_empty());
137 assert!(index.geo_rtree(&rtxn).unwrap().is_none());
138 assert!(index.geo_faceted_documents_ids(&rtxn).unwrap().is_empty());
139
140 assert!(index.word_docids.is_empty(&rtxn).unwrap());
141 assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
142 assert!(index.word_pair_proximity_docids.is_empty(&rtxn).unwrap());
143 assert!(index.field_id_word_count_docids.is_empty(&rtxn).unwrap());
144 assert!(index.facet_id_f64_docids.is_empty(&rtxn).unwrap());
145 assert!(index.facet_id_string_docids.is_empty(&rtxn).unwrap());
146 assert!(index.field_id_docid_facet_f64s.is_empty(&rtxn).unwrap());
147 assert!(index.field_id_docid_facet_strings.is_empty(&rtxn).unwrap());
148 assert!(index.documents.is_empty(&rtxn).unwrap());
149 }
150}