milli_core/
index.rs

1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
3use std::fs::File;
4use std::path::Path;
5
6use heed::types::*;
7use heed::{CompactionOption, Database, DatabaseStat, RoTxn, RwTxn, Unspecified, WithoutTls};
8use indexmap::IndexMap;
9use roaring::RoaringBitmap;
10use rstar::RTree;
11use serde::{Deserialize, Serialize};
12
13use crate::constants::{self, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
14use crate::database_stats::DatabaseStats;
15use crate::documents::PrimaryKey;
16use crate::error::{InternalError, UserError};
17use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
18use crate::fields_ids_map::FieldsIdsMap;
19use crate::heed_codec::facet::{
20    FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
21    FieldIdCodec, OrderedF64Codec,
22};
23use crate::heed_codec::version::VersionCodec;
24use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
25use crate::order_by_map::OrderByMap;
26use crate::proximity::ProximityPrecision;
27use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig};
28use crate::{
29    default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
30    FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
31    FieldidsWeightsMap, FilterableAttributesRule, GeoPoint, LocalizedAttributesRule, ObkvCodec,
32    Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32,
33    BEU64,
34};
35
36pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
37pub const DEFAULT_MIN_WORD_LEN_TWO_TYPOS: u8 = 9;
38
39pub mod main_key {
40    pub const VERSION_KEY: &str = "version";
41    pub const CRITERIA_KEY: &str = "criteria";
42    pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
43    pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
44    pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
45    pub const HIDDEN_FACETED_FIELDS_KEY: &str = "hidden-faceted-fields";
46    pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
47    pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
48    pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
49    pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
50    pub const FIELDIDS_WEIGHTS_MAP_KEY: &str = "fieldids-weights-map";
51    pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
52    pub const GEO_RTREE_KEY: &str = "geo-rtree";
53    pub const PRIMARY_KEY_KEY: &str = "primary-key";
54    pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
55    pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
56    pub const STOP_WORDS_KEY: &str = "stop-words";
57    pub const NON_SEPARATOR_TOKENS_KEY: &str = "non-separator-tokens";
58    pub const SEPARATOR_TOKENS_KEY: &str = "separator-tokens";
59    pub const DICTIONARY_KEY: &str = "dictionary";
60    pub const SYNONYMS_KEY: &str = "synonyms";
61    pub const USER_DEFINED_SYNONYMS_KEY: &str = "user-defined-synonyms";
62    pub const WORDS_FST_KEY: &str = "words-fst";
63    pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
64    pub const CREATED_AT_KEY: &str = "created-at";
65    pub const UPDATED_AT_KEY: &str = "updated-at";
66    pub const AUTHORIZE_TYPOS: &str = "authorize-typos";
67    pub const ONE_TYPO_WORD_LEN: &str = "one-typo-word-len";
68    pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len";
69    pub const EXACT_WORDS: &str = "exact-words";
70    pub const EXACT_ATTRIBUTES: &str = "exact-attributes";
71    pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet";
72    pub const SORT_FACET_VALUES_BY: &str = "sort-facet-values-by";
73    pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
74    pub const PROXIMITY_PRECISION: &str = "proximity-precision";
75    pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
76    pub const SEARCH_CUTOFF: &str = "search_cutoff";
77    pub const LOCALIZED_ATTRIBUTES_RULES: &str = "localized_attributes_rules";
78    pub const FACET_SEARCH: &str = "facet_search";
79    pub const PREFIX_SEARCH: &str = "prefix_search";
80    pub const DOCUMENTS_STATS: &str = "documents_stats";
81    pub const DISABLED_TYPOS_TERMS: &str = "disabled_typos_terms";
82}
83
84pub mod db_name {
85    pub const MAIN: &str = "main";
86    pub const WORD_DOCIDS: &str = "word-docids";
87    pub const EXACT_WORD_DOCIDS: &str = "exact-word-docids";
88    pub const WORD_PREFIX_DOCIDS: &str = "word-prefix-docids";
89    pub const EXACT_WORD_PREFIX_DOCIDS: &str = "exact-word-prefix-docids";
90    pub const EXTERNAL_DOCUMENTS_IDS: &str = "external-documents-ids";
91    pub const DOCID_WORD_POSITIONS: &str = "docid-word-positions";
92    pub const WORD_PAIR_PROXIMITY_DOCIDS: &str = "word-pair-proximity-docids";
93    pub const WORD_POSITION_DOCIDS: &str = "word-position-docids";
94    pub const WORD_FIELD_ID_DOCIDS: &str = "word-field-id-docids";
95    pub const WORD_PREFIX_POSITION_DOCIDS: &str = "word-prefix-position-docids";
96    pub const WORD_PREFIX_FIELD_ID_DOCIDS: &str = "word-prefix-field-id-docids";
97    pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids";
98    pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
99    pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids";
100    pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
101    pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
102    pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
103    pub const FACET_ID_NORMALIZED_STRING_STRINGS: &str = "facet-id-normalized-string-strings";
104    pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
105    pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
106    pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
107    pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id";
108    pub const VECTOR_ARROY: &str = "vector-arroy";
109    pub const DOCUMENTS: &str = "documents";
110}
111const NUMBER_OF_DBS: u32 = 25;
112
113#[derive(Clone)]
114pub struct Index {
115    /// The LMDB environment which this index is associated with.
116    pub(crate) env: heed::Env<WithoutTls>,
117
118    /// Contains many different types (e.g. the fields ids map).
119    pub(crate) main: Database<Unspecified, Unspecified>,
120
121    /// Maps the external documents ids with the internal document id.
122    pub external_documents_ids: Database<Str, BEU32>,
123
124    /// A word and all the documents ids containing the word.
125    pub word_docids: Database<Str, CboRoaringBitmapCodec>,
126
127    /// A word and all the documents ids containing the word, from attributes for which typos are not allowed.
128    pub exact_word_docids: Database<Str, CboRoaringBitmapCodec>,
129
130    /// A prefix of word and all the documents ids containing this prefix.
131    pub word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
132
133    /// A prefix of word and all the documents ids containing this prefix, from attributes for which typos are not allowed.
134    pub exact_word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
135
136    /// Maps the proximity between a pair of words with all the docids where this relation appears.
137    pub word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
138
139    /// Maps the word and the position with the docids that corresponds to it.
140    pub word_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
141    /// Maps the word and the field id with the docids that corresponds to it.
142    pub word_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
143
144    /// Maps the field id and the word count with the docids that corresponds to it.
145    pub field_id_word_count_docids: Database<FieldIdWordCountCodec, CboRoaringBitmapCodec>,
146    /// Maps the word prefix and a position with all the docids where the prefix appears at the position.
147    pub word_prefix_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
148    /// Maps the word prefix and a field id with all the docids where the prefix appears inside the field
149    pub word_prefix_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
150
151    /// Maps the facet field id and the docids for which this field exists
152    pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
153    /// Maps the facet field id and the docids for which this field is set as null
154    pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
155    /// Maps the facet field id and the docids for which this field is considered empty
156    pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
157
158    /// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
159    pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
160    /// Maps the facet field id and ranges of strings with the docids that corresponds to them.
161    pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
162    /// Maps the facet field id of the normalized-for-search string facets with their original versions.
163    pub facet_id_normalized_string_strings: Database<BEU16StrCodec, SerdeJson<BTreeSet<String>>>,
164    /// Maps the facet field id of the string facets with an FST containing all the facets values.
165    pub facet_id_string_fst: Database<BEU16, FstSetCodec>,
166
167    /// Maps the document id, the facet field id and the numbers.
168    pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
169    /// Maps the document id, the facet field id and the strings.
170    pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
171
172    /// Maps an embedder name to its id in the arroy store.
173    pub embedder_category_id: Database<Str, U8>,
174    /// Vector store based on arroyâ„¢.
175    pub vector_arroy: arroy::Database<Unspecified>,
176
177    /// Maps the document id to the document as an obkv store.
178    pub(crate) documents: Database<BEU32, ObkvCodec>,
179}
180
181impl Index {
182    pub fn new_with_creation_dates<P: AsRef<Path>>(
183        mut options: heed::EnvOpenOptions<WithoutTls>,
184        path: P,
185        created_at: time::OffsetDateTime,
186        updated_at: time::OffsetDateTime,
187        creation: bool,
188    ) -> Result<Index> {
189        use db_name::*;
190
191        options.max_dbs(NUMBER_OF_DBS);
192
193        let env = unsafe { options.open(path) }?;
194        let mut wtxn = env.write_txn()?;
195        let main = env.database_options().name(MAIN).create(&mut wtxn)?;
196        let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
197        let external_documents_ids =
198            env.create_database(&mut wtxn, Some(EXTERNAL_DOCUMENTS_IDS))?;
199        let exact_word_docids = env.create_database(&mut wtxn, Some(EXACT_WORD_DOCIDS))?;
200        let word_prefix_docids = env.create_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?;
201        let exact_word_prefix_docids =
202            env.create_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?;
203        let word_pair_proximity_docids =
204            env.create_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
205        let word_position_docids = env.create_database(&mut wtxn, Some(WORD_POSITION_DOCIDS))?;
206        let word_fid_docids = env.create_database(&mut wtxn, Some(WORD_FIELD_ID_DOCIDS))?;
207        let field_id_word_count_docids =
208            env.create_database(&mut wtxn, Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
209        let word_prefix_position_docids =
210            env.create_database(&mut wtxn, Some(WORD_PREFIX_POSITION_DOCIDS))?;
211        let word_prefix_fid_docids =
212            env.create_database(&mut wtxn, Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
213        let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
214        let facet_id_string_docids =
215            env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
216        let facet_id_normalized_string_strings =
217            env.create_database(&mut wtxn, Some(FACET_ID_NORMALIZED_STRING_STRINGS))?;
218        let facet_id_string_fst = env.create_database(&mut wtxn, Some(FACET_ID_STRING_FST))?;
219        let facet_id_exists_docids =
220            env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
221        let facet_id_is_null_docids =
222            env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?;
223        let facet_id_is_empty_docids =
224            env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?;
225        let field_id_docid_facet_f64s =
226            env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
227        let field_id_docid_facet_strings =
228            env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?;
229        // vector stuff
230        let embedder_category_id =
231            env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?;
232        let vector_arroy = env.create_database(&mut wtxn, Some(VECTOR_ARROY))?;
233
234        let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
235
236        let this = Index {
237            env: env.clone(),
238            main,
239            external_documents_ids,
240            word_docids,
241            exact_word_docids,
242            word_prefix_docids,
243            exact_word_prefix_docids,
244            word_pair_proximity_docids,
245            word_position_docids,
246            word_fid_docids,
247            word_prefix_position_docids,
248            word_prefix_fid_docids,
249            field_id_word_count_docids,
250            facet_id_f64_docids,
251            facet_id_string_docids,
252            facet_id_normalized_string_strings,
253            facet_id_string_fst,
254            facet_id_exists_docids,
255            facet_id_is_null_docids,
256            facet_id_is_empty_docids,
257            field_id_docid_facet_f64s,
258            field_id_docid_facet_strings,
259            vector_arroy,
260            embedder_category_id,
261            documents,
262        };
263        if this.get_version(&wtxn)?.is_none() && creation {
264            this.put_version(
265                &mut wtxn,
266                (constants::VERSION_MAJOR, constants::VERSION_MINOR, constants::VERSION_PATCH),
267            )?;
268        }
269        wtxn.commit()?;
270
271        Index::set_creation_dates(&this.env, this.main, created_at, updated_at)?;
272
273        Ok(this)
274    }
275
276    pub fn new<P: AsRef<Path>>(
277        options: heed::EnvOpenOptions<WithoutTls>,
278        path: P,
279        creation: bool,
280    ) -> Result<Index> {
281        let now = time::OffsetDateTime::now_utc();
282        Self::new_with_creation_dates(options, path, now, now, creation)
283    }
284
285    /// Attempts to rollback the index at `path` to the version specified by `requested_version`.
286    pub fn rollback<P: AsRef<Path>>(
287        mut options: heed::EnvOpenOptions<WithoutTls>,
288        path: P,
289        requested_version: (u32, u32, u32),
290    ) -> Result<RollbackOutcome> {
291        options.max_dbs(NUMBER_OF_DBS);
292
293        // optimistically check if the index is already at the requested version.
294        let env = unsafe { options.open(path.as_ref()) }?;
295        let rtxn = env.read_txn()?;
296        let Some(main) = env.database_options().name(db_name::MAIN).open(&rtxn)? else {
297            return Err(crate::Error::InternalError(crate::InternalError::DatabaseMissingEntry {
298                db_name: db_name::MAIN,
299                key: None,
300            }));
301        };
302        let rollback_version =
303            main.remap_types::<Str, VersionCodec>().get(&rtxn, main_key::VERSION_KEY)?;
304        if rollback_version == Some(requested_version) {
305            return Ok(RollbackOutcome::NoRollback);
306        }
307
308        // explicitly drop the environment before reopening it.
309        drop(rtxn);
310        drop(env);
311
312        // really need to rollback then...
313        unsafe { options.flags(heed::EnvFlags::PREV_SNAPSHOT) };
314        let env = unsafe { options.open(path) }?;
315        let mut wtxn = env.write_txn()?;
316        let Some(main) = env.database_options().name(db_name::MAIN).open(&wtxn)? else {
317            return Err(crate::Error::InternalError(crate::InternalError::DatabaseMissingEntry {
318                db_name: db_name::MAIN,
319                key: None,
320            }));
321        };
322
323        let main = main.remap_key_type::<Str>();
324
325        let Some(rollback_version) =
326            main.remap_data_type::<VersionCodec>().get(&wtxn, main_key::VERSION_KEY)?
327        else {
328            return Ok(RollbackOutcome::VersionMismatch {
329                requested_version,
330                rollback_version: None,
331            });
332        };
333
334        if requested_version != rollback_version {
335            return Ok(RollbackOutcome::VersionMismatch {
336                requested_version,
337                rollback_version: Some(rollback_version),
338            });
339        }
340
341        // this is a bit of a trick to force a change in the index
342        // which is necessary to actually discard the next snapshot, replacing it with this transaction.
343        let now = time::OffsetDateTime::now_utc();
344        main.remap_data_type::<SerdeJson<OffsetDateTime>>().put(
345            &mut wtxn,
346            main_key::UPDATED_AT_KEY,
347            &OffsetDateTime(now),
348        )?;
349
350        wtxn.commit()?;
351
352        Ok(RollbackOutcome::Rollback)
353    }
354
355    fn set_creation_dates(
356        env: &heed::Env<WithoutTls>,
357        main: Database<Unspecified, Unspecified>,
358        created_at: time::OffsetDateTime,
359        updated_at: time::OffsetDateTime,
360    ) -> heed::Result<()> {
361        let mut txn = env.write_txn()?;
362        // The db was just created, we update its metadata with the relevant information.
363        let main = main.remap_types::<Str, SerdeJson<OffsetDateTime>>();
364        if main.get(&txn, main_key::CREATED_AT_KEY)?.is_none() {
365            main.put(&mut txn, main_key::UPDATED_AT_KEY, &OffsetDateTime(updated_at))?;
366            main.put(&mut txn, main_key::CREATED_AT_KEY, &OffsetDateTime(created_at))?;
367            txn.commit()?;
368        }
369        Ok(())
370    }
371
372    /// Create a write transaction to be able to write into the index.
373    pub fn write_txn(&self) -> heed::Result<RwTxn<'_>> {
374        self.env.write_txn()
375    }
376
377    /// Create a read transaction to be able to read the index.
378    pub fn read_txn(&self) -> heed::Result<RoTxn<'_, WithoutTls>> {
379        self.env.read_txn()
380    }
381
382    /// Create a static read transaction to be able to read the index without keeping a reference to it.
383    pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static, WithoutTls>> {
384        self.env.clone().static_read_txn()
385    }
386
387    /// Returns the canonicalized path where the heed `Env` of this `Index` lives.
388    pub fn path(&self) -> &Path {
389        self.env.path()
390    }
391
392    /// Returns the size used by the index without the cached pages.
393    pub fn used_size(&self) -> Result<u64> {
394        Ok(self.env.non_free_pages_size()?)
395    }
396
397    /// Returns the real size used by the index.
398    pub fn on_disk_size(&self) -> Result<u64> {
399        Ok(self.env.real_disk_size()?)
400    }
401
402    /// Returns the map size the underlying environment was opened with, in bytes.
403    ///
404    /// This value does not represent the current on-disk size of the index.
405    ///
406    /// This value is the maximum between the map size passed during the opening of the index
407    /// and the on-disk size of the index at the time of opening.
408    pub fn map_size(&self) -> usize {
409        self.env.info().map_size
410    }
411
412    pub fn copy_to_file(&self, file: &mut File, option: CompactionOption) -> Result<()> {
413        self.env.copy_to_file(file, option).map_err(Into::into)
414    }
415
416    pub fn copy_to_path<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
417        self.env.copy_to_path(path, option).map_err(Into::into)
418    }
419
420    /// Returns an `EnvClosingEvent` that can be used to wait for the closing event,
421    /// multiple threads can wait on this event.
422    ///
423    /// Make sure that you drop all the copies of `Index`es you have, env closing are triggered
424    /// when all references are dropped, the last one will eventually close the environment.
425    pub fn prepare_for_closing(self) -> heed::EnvClosingEvent {
426        self.env.prepare_for_closing()
427    }
428
429    /* version */
430
431    /// Writes the version of the database.
432    pub(crate) fn put_version(
433        &self,
434        wtxn: &mut RwTxn<'_>,
435        (major, minor, patch): (u32, u32, u32),
436    ) -> heed::Result<()> {
437        self.main.remap_types::<Str, VersionCodec>().put(
438            wtxn,
439            main_key::VERSION_KEY,
440            &(major, minor, patch),
441        )
442    }
443
444    /// Get the version of the database. `None` if it was never set.
445    pub fn get_version(&self, rtxn: &RoTxn<'_>) -> heed::Result<Option<(u32, u32, u32)>> {
446        self.main.remap_types::<Str, VersionCodec>().get(rtxn, main_key::VERSION_KEY)
447    }
448
449    /* documents ids */
450
451    /// Writes the documents ids that corresponds to the user-ids-documents-ids FST.
452    pub(crate) fn put_documents_ids(
453        &self,
454        wtxn: &mut RwTxn<'_>,
455        docids: &RoaringBitmap,
456    ) -> heed::Result<()> {
457        self.main.remap_types::<Str, RoaringBitmapCodec>().put(
458            wtxn,
459            main_key::DOCUMENTS_IDS_KEY,
460            docids,
461        )
462    }
463
464    /// Returns the internal documents ids.
465    pub fn documents_ids(&self, rtxn: &RoTxn<'_>) -> heed::Result<RoaringBitmap> {
466        Ok(self
467            .main
468            .remap_types::<Str, RoaringBitmapCodec>()
469            .get(rtxn, main_key::DOCUMENTS_IDS_KEY)?
470            .unwrap_or_default())
471    }
472
473    /// Returns the number of documents indexed in the database.
474    pub fn number_of_documents(&self, rtxn: &RoTxn<'_>) -> Result<u64> {
475        let count = self
476            .main
477            .remap_types::<Str, RoaringBitmapLenCodec>()
478            .get(rtxn, main_key::DOCUMENTS_IDS_KEY)?;
479        Ok(count.unwrap_or_default())
480    }
481
482    /// Writes the stats of the documents database.
483    pub fn put_documents_stats(
484        &self,
485        wtxn: &mut RwTxn<'_>,
486        stats: DatabaseStats,
487    ) -> heed::Result<()> {
488        self.main.remap_types::<Str, SerdeJson<DatabaseStats>>().put(
489            wtxn,
490            main_key::DOCUMENTS_STATS,
491            &stats,
492        )
493    }
494
495    /// Returns the stats of the documents database.
496    pub fn documents_stats(&self, rtxn: &RoTxn<'_>) -> heed::Result<Option<DatabaseStats>> {
497        self.main
498            .remap_types::<Str, SerdeJson<DatabaseStats>>()
499            .get(rtxn, main_key::DOCUMENTS_STATS)
500    }
501
502    /* primary key */
503
504    /// Writes the documents primary key, this is the field name that is used to store the id.
505    pub(crate) fn put_primary_key(
506        &self,
507        wtxn: &mut RwTxn<'_>,
508        primary_key: &str,
509    ) -> heed::Result<()> {
510        self.set_updated_at(wtxn, &time::OffsetDateTime::now_utc())?;
511        self.main.remap_types::<Str, Str>().put(wtxn, main_key::PRIMARY_KEY_KEY, primary_key)
512    }
513
514    /// Deletes the primary key of the documents, this can be done to reset indexes settings.
515    pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
516        self.main.remap_key_type::<Str>().delete(wtxn, main_key::PRIMARY_KEY_KEY)
517    }
518
519    /// Returns the documents primary key, `None` if it hasn't been defined.
520    pub fn primary_key<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<Option<&'t str>> {
521        self.main.remap_types::<Str, Str>().get(rtxn, main_key::PRIMARY_KEY_KEY)
522    }
523
524    /* external documents ids */
525
526    /// Returns the external documents ids map which associate the external ids
527    /// with the internal ids (i.e. `u32`).
528    pub fn external_documents_ids(&self) -> ExternalDocumentsIds {
529        ExternalDocumentsIds::new(self.external_documents_ids)
530    }
531
532    /* fields ids map */
533
534    /// Writes the fields ids map which associate the documents keys with an internal field id
535    /// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
536    pub(crate) fn put_fields_ids_map(
537        &self,
538        wtxn: &mut RwTxn<'_>,
539        map: &FieldsIdsMap,
540    ) -> heed::Result<()> {
541        self.main.remap_types::<Str, SerdeJson<FieldsIdsMap>>().put(
542            wtxn,
543            main_key::FIELDS_IDS_MAP_KEY,
544            map,
545        )
546    }
547
548    /// Returns the fields ids map which associate the documents keys with an internal field id
549    /// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
550    pub fn fields_ids_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<FieldsIdsMap> {
551        Ok(self
552            .main
553            .remap_types::<Str, SerdeJson<FieldsIdsMap>>()
554            .get(rtxn, main_key::FIELDS_IDS_MAP_KEY)?
555            .unwrap_or_default())
556    }
557
558    /// Returns the fields ids map with metadata.
559    ///
560    /// This structure is not yet stored in the index, and is generated on the fly.
561    pub fn fields_ids_map_with_metadata(&self, rtxn: &RoTxn<'_>) -> Result<FieldIdMapWithMetadata> {
562        Ok(FieldIdMapWithMetadata::new(
563            self.fields_ids_map(rtxn)?,
564            MetadataBuilder::from_index(self, rtxn)?,
565        ))
566    }
567
568    /* fieldids weights map */
569    // This maps the fields ids to their weights.
570    // Their weights is defined by the ordering of the searchable attributes.
571
572    /// Writes the fieldids weights map which associates the field ids to their weights
573    pub(crate) fn put_fieldids_weights_map(
574        &self,
575        wtxn: &mut RwTxn<'_>,
576        map: &FieldidsWeightsMap,
577    ) -> heed::Result<()> {
578        self.main.remap_types::<Str, SerdeJson<_>>().put(
579            wtxn,
580            main_key::FIELDIDS_WEIGHTS_MAP_KEY,
581            map,
582        )
583    }
584
585    /// Get the fieldids weights map which associates the field ids to their weights
586    pub fn fieldids_weights_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<FieldidsWeightsMap> {
587        self.main
588            .remap_types::<Str, SerdeJson<_>>()
589            .get(rtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)?
590            .map(Ok)
591            .unwrap_or_else(|| {
592                Ok(FieldidsWeightsMap::from_field_id_map_without_searchable(
593                    &self.fields_ids_map(rtxn)?,
594                ))
595            })
596    }
597
598    /// Delete the fieldsids weights map
599    pub fn delete_fieldids_weights_map(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
600        self.main.remap_key_type::<Str>().delete(wtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)
601    }
602
603    pub fn max_searchable_attribute_weight(&self, rtxn: &RoTxn<'_>) -> Result<Option<Weight>> {
604        let user_defined_searchable_fields = self.user_defined_searchable_fields(rtxn)?;
605        if let Some(user_defined_searchable_fields) = user_defined_searchable_fields {
606            if !user_defined_searchable_fields.contains(&"*") {
607                return Ok(Some(user_defined_searchable_fields.len().saturating_sub(1) as Weight));
608            }
609        }
610
611        Ok(None)
612    }
613
614    pub fn searchable_fields_and_weights<'a>(
615        &self,
616        rtxn: &'a RoTxn<'a>,
617    ) -> Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
618        let fid_map = self.fields_ids_map(rtxn)?;
619        let weight_map = self.fieldids_weights_map(rtxn)?;
620        let searchable = self.searchable_fields(rtxn)?;
621
622        searchable
623            .into_iter()
624            .map(|field| -> Result<_> {
625                let fid = fid_map.id(&field).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
626                    field_name: field.to_string(),
627                    process: "searchable_fields_and_weights",
628                })?;
629                let weight = weight_map
630                    .weight(fid)
631                    .ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
632
633                Ok((field, fid, weight))
634            })
635            .collect()
636    }
637
638    /* geo rtree */
639
640    /// Writes the provided `rtree` which associates coordinates to documents ids.
641    pub(crate) fn put_geo_rtree(
642        &self,
643        wtxn: &mut RwTxn<'_>,
644        rtree: &RTree<GeoPoint>,
645    ) -> heed::Result<()> {
646        self.main.remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>().put(
647            wtxn,
648            main_key::GEO_RTREE_KEY,
649            rtree,
650        )
651    }
652
653    /// Delete the `rtree` which associates coordinates to documents ids.
654    pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
655        self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_RTREE_KEY)
656    }
657
658    /// Returns the `rtree` which associates coordinates to documents ids.
659    pub fn geo_rtree(&self, rtxn: &RoTxn<'_>) -> Result<Option<RTree<GeoPoint>>> {
660        match self
661            .main
662            .remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>()
663            .get(rtxn, main_key::GEO_RTREE_KEY)?
664        {
665            Some(rtree) => Ok(Some(rtree)),
666            None => Ok(None),
667        }
668    }
669
670    /* geo faceted */
671
672    /// Writes the documents ids that are faceted with a _geo field.
673    pub(crate) fn put_geo_faceted_documents_ids(
674        &self,
675        wtxn: &mut RwTxn<'_>,
676        docids: &RoaringBitmap,
677    ) -> heed::Result<()> {
678        self.main.remap_types::<Str, RoaringBitmapCodec>().put(
679            wtxn,
680            main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
681            docids,
682        )
683    }
684
685    /// Delete the documents ids that are faceted with a _geo field.
686    pub(crate) fn delete_geo_faceted_documents_ids(
687        &self,
688        wtxn: &mut RwTxn<'_>,
689    ) -> heed::Result<bool> {
690        self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)
691    }
692
693    /// Retrieve all the documents ids that are faceted with a _geo field.
694    pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn<'_>) -> heed::Result<RoaringBitmap> {
695        match self
696            .main
697            .remap_types::<Str, RoaringBitmapCodec>()
698            .get(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
699        {
700            Some(docids) => Ok(docids),
701            None => Ok(RoaringBitmap::new()),
702        }
703    }
704    /* field distribution */
705
706    /// Writes the field distribution which associates every field name with
707    /// the number of times it occurs in the documents.
708    pub(crate) fn put_field_distribution(
709        &self,
710        wtxn: &mut RwTxn<'_>,
711        distribution: &FieldDistribution,
712    ) -> heed::Result<()> {
713        self.main.remap_types::<Str, SerdeJson<FieldDistribution>>().put(
714            wtxn,
715            main_key::FIELD_DISTRIBUTION_KEY,
716            distribution,
717        )
718    }
719
720    /// Returns the field distribution which associates every field name with
721    /// the number of times it occurs in the documents.
722    pub fn field_distribution(&self, rtxn: &RoTxn<'_>) -> heed::Result<FieldDistribution> {
723        Ok(self
724            .main
725            .remap_types::<Str, SerdeJson<FieldDistribution>>()
726            .get(rtxn, main_key::FIELD_DISTRIBUTION_KEY)?
727            .unwrap_or_default())
728    }
729
730    /* displayed fields */
731
732    /// Writes the fields that must be displayed in the defined order.
733    /// There must be not be any duplicate field id.
734    pub(crate) fn put_displayed_fields(
735        &self,
736        wtxn: &mut RwTxn<'_>,
737        fields: &[&str],
738    ) -> heed::Result<()> {
739        self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
740            wtxn,
741            main_key::DISPLAYED_FIELDS_KEY,
742            &fields,
743        )
744    }
745
746    /// Deletes the displayed fields ids, this will make the engine to display
747    /// all the documents attributes in the order of the `FieldsIdsMap`.
748    pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
749        self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISPLAYED_FIELDS_KEY)
750    }
751
752    /// Returns the displayed fields in the order they were set by the user. If it returns
753    /// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`.
754    pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<Option<Vec<&'t str>>> {
755        self.main
756            .remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
757            .get(rtxn, main_key::DISPLAYED_FIELDS_KEY)
758    }
759
760    /// Identical to `displayed_fields`, but returns the ids instead.
761    pub fn displayed_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<Option<Vec<FieldId>>> {
762        match self.displayed_fields(rtxn)? {
763            Some(fields) => {
764                let fields_ids_map = self.fields_ids_map(rtxn)?;
765                let mut fields_ids = Vec::new();
766                for name in fields.into_iter() {
767                    if let Some(field_id) = fields_ids_map.id(name) {
768                        fields_ids.push(field_id);
769                    }
770                }
771                Ok(Some(fields_ids))
772            }
773            None => Ok(None),
774        }
775    }
776
777    /* remove hidden fields */
778    pub fn remove_hidden_fields(
779        &self,
780        rtxn: &RoTxn<'_>,
781        fields: impl IntoIterator<Item = impl AsRef<str>>,
782    ) -> Result<(BTreeSet<String>, bool)> {
783        let mut valid_fields =
784            fields.into_iter().map(|f| f.as_ref().to_string()).collect::<BTreeSet<String>>();
785
786        let fields_len = valid_fields.len();
787
788        if let Some(dn) = self.displayed_fields(rtxn)? {
789            let displayable_names = dn.iter().map(|s| s.to_string()).collect();
790            valid_fields = &valid_fields & &displayable_names;
791        }
792
793        let hidden_fields = fields_len > valid_fields.len();
794        Ok((valid_fields, hidden_fields))
795    }
796
797    /* searchable fields */
798
799    /// Write the user defined searchable fields and generate the real searchable fields from the specified fields ids map.
800    pub(crate) fn put_all_searchable_fields_from_fields_ids_map(
801        &self,
802        wtxn: &mut RwTxn<'_>,
803        user_fields: &[&str],
804        fields_ids_map: &FieldIdMapWithMetadata,
805    ) -> Result<()> {
806        // We can write the user defined searchable fields as-is.
807        self.put_user_defined_searchable_fields(wtxn, user_fields)?;
808
809        let mut weights = FieldidsWeightsMap::default();
810
811        // Now we generate the real searchable fields:
812        let mut real_fields = Vec::new();
813        for (id, field_from_map, metadata) in fields_ids_map.iter() {
814            if let Some(weight) = metadata.searchable_weight() {
815                real_fields.push(field_from_map);
816                weights.insert(id, weight);
817            }
818        }
819
820        self.put_searchable_fields(wtxn, &real_fields)?;
821        self.put_fieldids_weights_map(wtxn, &weights)?;
822
823        Ok(())
824    }
825
826    pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
827        let did_delete_searchable = self.delete_searchable_fields(wtxn)?;
828        let did_delete_user_defined = self.delete_user_defined_searchable_fields(wtxn)?;
829        self.delete_fieldids_weights_map(wtxn)?;
830        Ok(did_delete_searchable || did_delete_user_defined)
831    }
832
833    /// Writes the searchable fields, when this list is specified, only these are indexed.
834    fn put_searchable_fields(&self, wtxn: &mut RwTxn<'_>, fields: &[&str]) -> heed::Result<()> {
835        self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
836            wtxn,
837            main_key::SEARCHABLE_FIELDS_KEY,
838            &fields,
839        )
840    }
841
842    /// Deletes the searchable fields, when no fields are specified, all fields are indexed.
843    fn delete_searchable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
844        self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
845    }
846
847    /// Returns the searchable fields, those are the fields that are indexed,
848    pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<Vec<Cow<'t, str>>> {
849        self.main
850            .remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
851            .get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)?
852            .map(|fields| Ok(fields.into_iter().map(Cow::Borrowed).collect()))
853            .unwrap_or_else(|| {
854                Ok(self
855                    .fields_ids_map(rtxn)?
856                    .names()
857                    .filter(|name| !crate::is_faceted_by(name, RESERVED_VECTORS_FIELD_NAME))
858                    .map(|field| Cow::Owned(field.to_string()))
859                    .collect())
860            })
861    }
862
863    /// Identical to `searchable_fields`, but returns the ids instead.
864    pub fn searchable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<Vec<FieldId>> {
865        let fields = self.searchable_fields(rtxn)?;
866        let fields_ids_map = self.fields_ids_map(rtxn)?;
867        let mut fields_ids = Vec::new();
868        for name in fields {
869            if let Some(field_id) = fields_ids_map.id(&name) {
870                fields_ids.push(field_id);
871            }
872        }
873        Ok(fields_ids)
874    }
875
876    /// Writes the searchable fields, when this list is specified, only these are indexed.
877    pub(crate) fn put_user_defined_searchable_fields(
878        &self,
879        wtxn: &mut RwTxn<'_>,
880        fields: &[&str],
881    ) -> heed::Result<()> {
882        self.main.remap_types::<Str, SerdeBincode<_>>().put(
883            wtxn,
884            main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY,
885            &fields,
886        )
887    }
888
889    /// Deletes the searchable fields, when no fields are specified, all fields are indexed.
890    pub(crate) fn delete_user_defined_searchable_fields(
891        &self,
892        wtxn: &mut RwTxn<'_>,
893    ) -> heed::Result<bool> {
894        self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
895    }
896
897    /// Returns the user defined searchable fields.
898    pub fn user_defined_searchable_fields<'t>(
899        &self,
900        rtxn: &'t RoTxn<'t>,
901    ) -> heed::Result<Option<Vec<&'t str>>> {
902        self.main
903            .remap_types::<Str, SerdeBincode<Vec<_>>>()
904            .get(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
905    }
906
907    /// Identical to `user_defined_searchable_fields`, but returns ids instead.
908    pub fn user_defined_searchable_fields_ids(
909        &self,
910        rtxn: &RoTxn<'_>,
911    ) -> Result<Option<Vec<FieldId>>> {
912        match self.user_defined_searchable_fields(rtxn)? {
913            Some(fields) => {
914                let fields_ids_map = self.fields_ids_map(rtxn)?;
915                let mut fields_ids = Vec::new();
916                for name in fields {
917                    if let Some(field_id) = fields_ids_map.id(name) {
918                        fields_ids.push(field_id);
919                    }
920                }
921                Ok(Some(fields_ids))
922            }
923            None => Ok(None),
924        }
925    }
926
927    /* filterable fields */
928
929    /// Writes the filterable attributes rules in the database.
930    pub(crate) fn put_filterable_attributes_rules(
931        &self,
932        wtxn: &mut RwTxn<'_>,
933        fields: &[FilterableAttributesRule],
934    ) -> heed::Result<()> {
935        self.main.remap_types::<Str, SerdeJson<_>>().put(
936            wtxn,
937            main_key::FILTERABLE_FIELDS_KEY,
938            &fields,
939        )
940    }
941
942    /// Deletes the filterable attributes rules in the database.
943    pub(crate) fn delete_filterable_attributes_rules(
944        &self,
945        wtxn: &mut RwTxn<'_>,
946    ) -> heed::Result<bool> {
947        self.main.remap_key_type::<Str>().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY)
948    }
949
950    /// Returns the filterable attributes rules.
951    pub fn filterable_attributes_rules(
952        &self,
953        rtxn: &RoTxn<'_>,
954    ) -> heed::Result<Vec<FilterableAttributesRule>> {
955        Ok(self
956            .main
957            .remap_types::<Str, SerdeJson<_>>()
958            .get(rtxn, main_key::FILTERABLE_FIELDS_KEY)?
959            .unwrap_or_default())
960    }
961
962    /* sortable fields */
963
964    /// Writes the sortable fields names in the database.
965    pub(crate) fn put_sortable_fields(
966        &self,
967        wtxn: &mut RwTxn<'_>,
968        fields: &HashSet<String>,
969    ) -> heed::Result<()> {
970        self.main.remap_types::<Str, SerdeJson<_>>().put(
971            wtxn,
972            main_key::SORTABLE_FIELDS_KEY,
973            fields,
974        )
975    }
976
977    /// Deletes the sortable fields ids in the database.
978    pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
979        self.main.remap_key_type::<Str>().delete(wtxn, main_key::SORTABLE_FIELDS_KEY)
980    }
981
982    /// Returns the sortable fields names.
983    pub fn sortable_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
984        Ok(self
985            .main
986            .remap_types::<Str, SerdeJson<_>>()
987            .get(rtxn, main_key::SORTABLE_FIELDS_KEY)?
988            .unwrap_or_default())
989    }
990
991    /// Identical to `sortable_fields`, but returns ids instead.
992    pub fn sortable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
993        let fields = self.sortable_fields(rtxn)?;
994        let fields_ids_map = self.fields_ids_map(rtxn)?;
995        Ok(fields.into_iter().filter_map(|name| fields_ids_map.id(&name)).collect())
996    }
997
998    /// Returns true if the geo feature is enabled.
999    pub fn is_geo_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
1000        let geo_filter = self.is_geo_filtering_enabled(rtxn)?;
1001        let geo_sortable = self.is_geo_sorting_enabled(rtxn)?;
1002        Ok(geo_filter || geo_sortable)
1003    }
1004
1005    /// Returns true if the geo sorting feature is enabled.
1006    pub fn is_geo_sorting_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
1007        let geo_sortable = self.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
1008        Ok(geo_sortable)
1009    }
1010
1011    /// Returns true if the geo filtering feature is enabled.
1012    pub fn is_geo_filtering_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
1013        let geo_filter =
1014            self.filterable_attributes_rules(rtxn)?.iter().any(|field| field.has_geo());
1015        Ok(geo_filter)
1016    }
1017
1018    pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
1019        let asc_desc_fields = self
1020            .criteria(rtxn)?
1021            .into_iter()
1022            .filter_map(|criterion| match criterion {
1023                Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
1024                _otherwise => None,
1025            })
1026            .collect();
1027
1028        Ok(asc_desc_fields)
1029    }
1030
1031    /* faceted documents ids */
1032
1033    /// Retrieve all the documents which contain this field id set as null
1034    pub fn null_faceted_documents_ids(
1035        &self,
1036        rtxn: &RoTxn<'_>,
1037        field_id: FieldId,
1038    ) -> heed::Result<RoaringBitmap> {
1039        match self.facet_id_is_null_docids.get(rtxn, &field_id)? {
1040            Some(docids) => Ok(docids),
1041            None => Ok(RoaringBitmap::new()),
1042        }
1043    }
1044
1045    /// Retrieve all the documents which contain this field id and that is considered empty
1046    pub fn empty_faceted_documents_ids(
1047        &self,
1048        rtxn: &RoTxn<'_>,
1049        field_id: FieldId,
1050    ) -> heed::Result<RoaringBitmap> {
1051        match self.facet_id_is_empty_docids.get(rtxn, &field_id)? {
1052            Some(docids) => Ok(docids),
1053            None => Ok(RoaringBitmap::new()),
1054        }
1055    }
1056
1057    /// Retrieve all the documents which contain this field id
1058    pub fn exists_faceted_documents_ids(
1059        &self,
1060        rtxn: &RoTxn<'_>,
1061        field_id: FieldId,
1062    ) -> heed::Result<RoaringBitmap> {
1063        match self.facet_id_exists_docids.get(rtxn, &field_id)? {
1064            Some(docids) => Ok(docids),
1065            None => Ok(RoaringBitmap::new()),
1066        }
1067    }
1068
1069    /* distinct field */
1070
1071    pub(crate) fn put_distinct_field(
1072        &self,
1073        wtxn: &mut RwTxn<'_>,
1074        distinct_field: &str,
1075    ) -> heed::Result<()> {
1076        self.main.remap_types::<Str, Str>().put(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field)
1077    }
1078
1079    pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn<'_>) -> heed::Result<Option<&'a str>> {
1080        self.main.remap_types::<Str, Str>().get(rtxn, main_key::DISTINCT_FIELD_KEY)
1081    }
1082
1083    pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1084        self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISTINCT_FIELD_KEY)
1085    }
1086
1087    /* criteria */
1088
1089    pub(crate) fn put_criteria(
1090        &self,
1091        wtxn: &mut RwTxn<'_>,
1092        criteria: &[Criterion],
1093    ) -> heed::Result<()> {
1094        self.main.remap_types::<Str, SerdeJson<&[Criterion]>>().put(
1095            wtxn,
1096            main_key::CRITERIA_KEY,
1097            &criteria,
1098        )
1099    }
1100
1101    pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1102        self.main.remap_key_type::<Str>().delete(wtxn, main_key::CRITERIA_KEY)
1103    }
1104
1105    pub fn criteria(&self, rtxn: &RoTxn<'_>) -> heed::Result<Vec<Criterion>> {
1106        match self
1107            .main
1108            .remap_types::<Str, SerdeJson<Vec<Criterion>>>()
1109            .get(rtxn, main_key::CRITERIA_KEY)?
1110        {
1111            Some(criteria) => Ok(criteria),
1112            None => Ok(default_criteria()),
1113        }
1114    }
1115
1116    /* words fst */
1117
1118    /// Writes the FST which is the words dictionary of the engine.
1119    pub(crate) fn put_words_fst<A: AsRef<[u8]>>(
1120        &self,
1121        wtxn: &mut RwTxn<'_>,
1122        fst: &fst::Set<A>,
1123    ) -> heed::Result<()> {
1124        self.main.remap_types::<Str, Bytes>().put(
1125            wtxn,
1126            main_key::WORDS_FST_KEY,
1127            fst.as_fst().as_bytes(),
1128        )
1129    }
1130
1131    /// Returns the FST which is the words dictionary of the engine.
1132    pub fn words_fst<'t>(&self, rtxn: &'t RoTxn<'_>) -> Result<fst::Set<Cow<'t, [u8]>>> {
1133        match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_FST_KEY)? {
1134            Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
1135            None => Ok(fst::Set::default().map_data(Cow::Owned)?),
1136        }
1137    }
1138
1139    /* stop words */
1140
1141    pub(crate) fn put_stop_words<A: AsRef<[u8]>>(
1142        &self,
1143        wtxn: &mut RwTxn<'_>,
1144        fst: &fst::Set<A>,
1145    ) -> heed::Result<()> {
1146        self.main.remap_types::<Str, Bytes>().put(
1147            wtxn,
1148            main_key::STOP_WORDS_KEY,
1149            fst.as_fst().as_bytes(),
1150        )
1151    }
1152
1153    pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1154        self.main.remap_key_type::<Str>().delete(wtxn, main_key::STOP_WORDS_KEY)
1155    }
1156
1157    pub fn stop_words<'t>(&self, rtxn: &'t RoTxn<'t>) -> Result<Option<fst::Set<&'t [u8]>>> {
1158        match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::STOP_WORDS_KEY)? {
1159            Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
1160            None => Ok(None),
1161        }
1162    }
1163
1164    /* non separator tokens */
1165
1166    pub(crate) fn put_non_separator_tokens(
1167        &self,
1168        wtxn: &mut RwTxn<'_>,
1169        set: &BTreeSet<String>,
1170    ) -> heed::Result<()> {
1171        self.main.remap_types::<Str, SerdeBincode<_>>().put(
1172            wtxn,
1173            main_key::NON_SEPARATOR_TOKENS_KEY,
1174            set,
1175        )
1176    }
1177
1178    pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1179        self.main.remap_key_type::<Str>().delete(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
1180    }
1181
1182    pub fn non_separator_tokens(&self, rtxn: &RoTxn<'_>) -> Result<Option<BTreeSet<String>>> {
1183        Ok(self
1184            .main
1185            .remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
1186            .get(rtxn, main_key::NON_SEPARATOR_TOKENS_KEY)?)
1187    }
1188
1189    /* separator tokens */
1190
1191    pub(crate) fn put_separator_tokens(
1192        &self,
1193        wtxn: &mut RwTxn<'_>,
1194        set: &BTreeSet<String>,
1195    ) -> heed::Result<()> {
1196        self.main.remap_types::<Str, SerdeBincode<_>>().put(
1197            wtxn,
1198            main_key::SEPARATOR_TOKENS_KEY,
1199            set,
1200        )
1201    }
1202
1203    pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1204        self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEPARATOR_TOKENS_KEY)
1205    }
1206
1207    pub fn separator_tokens(&self, rtxn: &RoTxn<'_>) -> Result<Option<BTreeSet<String>>> {
1208        Ok(self
1209            .main
1210            .remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
1211            .get(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
1212    }
1213
1214    /* separators easing method */
1215
1216    pub fn allowed_separators(&self, rtxn: &RoTxn<'_>) -> Result<Option<BTreeSet<String>>> {
1217        let default_separators =
1218            charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string());
1219        let mut separators: Option<BTreeSet<_>> = None;
1220        if let Some(mut separator_tokens) = self.separator_tokens(rtxn)? {
1221            separator_tokens.extend(default_separators.clone());
1222            separators = Some(separator_tokens);
1223        }
1224
1225        if let Some(non_separator_tokens) = self.non_separator_tokens(rtxn)? {
1226            separators = separators
1227                .or_else(|| Some(default_separators.collect()))
1228                .map(|separators| &separators - &non_separator_tokens);
1229        }
1230
1231        Ok(separators)
1232    }
1233
1234    /* dictionary */
1235
1236    pub(crate) fn put_dictionary(
1237        &self,
1238        wtxn: &mut RwTxn<'_>,
1239        set: &BTreeSet<String>,
1240    ) -> heed::Result<()> {
1241        self.main.remap_types::<Str, SerdeBincode<_>>().put(wtxn, main_key::DICTIONARY_KEY, set)
1242    }
1243
1244    pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1245        self.main.remap_key_type::<Str>().delete(wtxn, main_key::DICTIONARY_KEY)
1246    }
1247
1248    pub fn dictionary(&self, rtxn: &RoTxn<'_>) -> Result<Option<BTreeSet<String>>> {
1249        Ok(self
1250            .main
1251            .remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
1252            .get(rtxn, main_key::DICTIONARY_KEY)?)
1253    }
1254
1255    /* synonyms */
1256
1257    pub(crate) fn put_synonyms(
1258        &self,
1259        wtxn: &mut RwTxn<'_>,
1260        synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
1261        user_defined_synonyms: &BTreeMap<String, Vec<String>>,
1262    ) -> heed::Result<()> {
1263        self.main.remap_types::<Str, SerdeBincode<_>>().put(
1264            wtxn,
1265            main_key::SYNONYMS_KEY,
1266            synonyms,
1267        )?;
1268        self.main.remap_types::<Str, SerdeBincode<_>>().put(
1269            wtxn,
1270            main_key::USER_DEFINED_SYNONYMS_KEY,
1271            user_defined_synonyms,
1272        )
1273    }
1274
1275    pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1276        self.main.remap_key_type::<Str>().delete(wtxn, main_key::SYNONYMS_KEY)?;
1277        self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
1278    }
1279
1280    pub fn user_defined_synonyms(
1281        &self,
1282        rtxn: &RoTxn<'_>,
1283    ) -> heed::Result<BTreeMap<String, Vec<String>>> {
1284        Ok(self
1285            .main
1286            .remap_types::<Str, SerdeBincode<_>>()
1287            .get(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
1288            .unwrap_or_default())
1289    }
1290
1291    pub fn synonyms(
1292        &self,
1293        rtxn: &RoTxn<'_>,
1294    ) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
1295        Ok(self
1296            .main
1297            .remap_types::<Str, SerdeBincode<_>>()
1298            .get(rtxn, main_key::SYNONYMS_KEY)?
1299            .unwrap_or_default())
1300    }
1301
1302    pub fn words_synonyms<S: AsRef<str>>(
1303        &self,
1304        rtxn: &RoTxn<'_>,
1305        words: &[S],
1306    ) -> heed::Result<Option<Vec<Vec<String>>>> {
1307        let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect();
1308        Ok(self.synonyms(rtxn)?.remove(&words))
1309    }
1310
1311    /* words prefixes fst */
1312
1313    /// Writes the FST which is the words prefixes dictionary of the engine.
1314    pub(crate) fn put_words_prefixes_fst<A: AsRef<[u8]>>(
1315        &self,
1316        wtxn: &mut RwTxn<'_>,
1317        fst: &fst::Set<A>,
1318    ) -> heed::Result<()> {
1319        self.main.remap_types::<Str, Bytes>().put(
1320            wtxn,
1321            main_key::WORDS_PREFIXES_FST_KEY,
1322            fst.as_fst().as_bytes(),
1323        )
1324    }
1325
1326    pub(crate) fn delete_words_prefixes_fst(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1327        self.main.remap_key_type::<Str>().delete(wtxn, main_key::WORDS_PREFIXES_FST_KEY)
1328    }
1329
1330    /// Returns the FST which is the words prefixes dictionary of the engine.
1331    pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn<'t>) -> Result<fst::Set<Cow<'t, [u8]>>> {
1332        match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
1333            Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
1334            None => Ok(fst::Set::default().map_data(Cow::Owned)?),
1335        }
1336    }
1337
1338    /* word documents count */
1339
1340    /// Returns the number of documents ids associated with the given word,
1341    /// it is much faster than deserializing the bitmap and getting the length of it.
1342    pub fn word_documents_count(&self, rtxn: &RoTxn<'_>, word: &str) -> heed::Result<Option<u64>> {
1343        self.word_docids.remap_data_type::<RoaringBitmapLenCodec>().get(rtxn, word)
1344    }
1345
1346    /* documents */
1347
1348    /// Returns a document by using the document id.
1349    pub fn document<'t>(&self, rtxn: &'t RoTxn, id: DocumentId) -> Result<&'t obkv::KvReaderU16> {
1350        self.documents
1351            .get(rtxn, &id)?
1352            .ok_or(UserError::UnknownInternalDocumentId { document_id: id })
1353            .map_err(Into::into)
1354    }
1355
1356    /// Returns an iterator over the requested documents. The next item will be an error if a document is missing.
1357    pub fn iter_documents<'a, 't: 'a>(
1358        &'a self,
1359        rtxn: &'t RoTxn<'t>,
1360        ids: impl IntoIterator<Item = DocumentId> + 'a,
1361    ) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
1362        Ok(ids.into_iter().map(move |id| {
1363            let kv = self
1364                .documents
1365                .get(rtxn, &id)?
1366                .ok_or(UserError::UnknownInternalDocumentId { document_id: id })?;
1367            Ok((id, kv))
1368        }))
1369    }
1370
1371    /// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
1372    pub fn documents<'t>(
1373        &self,
1374        rtxn: &'t RoTxn<'t>,
1375        ids: impl IntoIterator<Item = DocumentId>,
1376    ) -> Result<Vec<(DocumentId, &'t obkv::KvReaderU16)>> {
1377        self.iter_documents(rtxn, ids)?.collect()
1378    }
1379
1380    /// Returns an iterator over all the documents in the index.
1381    pub fn all_documents<'a, 't: 'a>(
1382        &'a self,
1383        rtxn: &'t RoTxn<'t>,
1384    ) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
1385        self.iter_documents(rtxn, self.documents_ids(rtxn)?)
1386    }
1387
1388    pub fn external_id_of<'a, 't: 'a>(
1389        &'a self,
1390        rtxn: &'t RoTxn<'t>,
1391        ids: impl IntoIterator<Item = DocumentId> + 'a,
1392    ) -> Result<impl IntoIterator<Item = Result<String>> + 'a> {
1393        let fields = self.fields_ids_map(rtxn)?;
1394
1395        // uses precondition "never called on an empty index"
1396        let primary_key = self.primary_key(rtxn)?.ok_or(InternalError::DatabaseMissingEntry {
1397            db_name: db_name::MAIN,
1398            key: Some(main_key::PRIMARY_KEY_KEY),
1399        })?;
1400        let primary_key = PrimaryKey::new(primary_key, &fields).ok_or_else(|| {
1401            InternalError::FieldIdMapMissingEntry(crate::FieldIdMapMissingEntry::FieldName {
1402                field_name: primary_key.to_owned(),
1403                process: "external_id_of",
1404            })
1405        })?;
1406        Ok(self.iter_documents(rtxn, ids)?.map(move |entry| -> Result<_> {
1407            let (_docid, obkv) = entry?;
1408            match primary_key.document_id(obkv, &fields)? {
1409                Ok(document_id) => Ok(document_id),
1410                Err(_) => Err(InternalError::DocumentsError(
1411                    crate::documents::Error::InvalidDocumentFormat,
1412                )
1413                .into()),
1414            }
1415        }))
1416    }
1417
1418    pub fn facets_distribution<'a>(&'a self, rtxn: &'a RoTxn<'a>) -> FacetDistribution<'a> {
1419        FacetDistribution::new(rtxn, self)
1420    }
1421
1422    pub fn search<'a>(&'a self, rtxn: &'a RoTxn<'a>) -> Search<'a> {
1423        Search::new(rtxn, self)
1424    }
1425
1426    /// Returns the index creation time.
1427    pub fn created_at(&self, rtxn: &RoTxn<'_>) -> Result<time::OffsetDateTime> {
1428        Ok(self
1429            .main
1430            .remap_types::<Str, SerdeJson<OffsetDateTime>>()
1431            .get(rtxn, main_key::CREATED_AT_KEY)?
1432            .ok_or(InternalError::DatabaseMissingEntry {
1433                db_name: db_name::MAIN,
1434                key: Some(main_key::CREATED_AT_KEY),
1435            })?
1436            .0)
1437    }
1438
1439    /// Returns the index last updated time.
1440    pub fn updated_at(&self, rtxn: &RoTxn<'_>) -> Result<time::OffsetDateTime> {
1441        Ok(self
1442            .main
1443            .remap_types::<Str, SerdeJson<OffsetDateTime>>()
1444            .get(rtxn, main_key::UPDATED_AT_KEY)?
1445            .ok_or(InternalError::DatabaseMissingEntry {
1446                db_name: db_name::MAIN,
1447                key: Some(main_key::UPDATED_AT_KEY),
1448            })?
1449            .0)
1450    }
1451
1452    pub(crate) fn set_updated_at(
1453        &self,
1454        wtxn: &mut RwTxn<'_>,
1455        time: &time::OffsetDateTime,
1456    ) -> heed::Result<()> {
1457        self.main.remap_types::<Str, SerdeJson<OffsetDateTime>>().put(
1458            wtxn,
1459            main_key::UPDATED_AT_KEY,
1460            &OffsetDateTime(*time),
1461        )
1462    }
1463
1464    pub fn authorize_typos(&self, txn: &RoTxn<'_>) -> heed::Result<bool> {
1465        // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
1466        // identify 0 as being false, and anything else as true. The absence of a value is true,
1467        // because by default, we authorize typos.
1468        match self.main.remap_types::<Str, U8>().get(txn, main_key::AUTHORIZE_TYPOS)? {
1469            Some(0) => Ok(false),
1470            _ => Ok(true),
1471        }
1472    }
1473
1474    pub(crate) fn put_authorize_typos(&self, txn: &mut RwTxn<'_>, flag: bool) -> heed::Result<()> {
1475        // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
1476        // identify 0 as being false, and anything else as true. The absence of a value is true,
1477        // because by default, we authorize typos.
1478        self.main.remap_types::<Str, U8>().put(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?;
1479
1480        Ok(())
1481    }
1482
1483    pub fn min_word_len_one_typo(&self, txn: &RoTxn<'_>) -> heed::Result<u8> {
1484        // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
1485        // identify 0 as being false, and anything else as true. The absence of a value is true,
1486        // because by default, we authorize typos.
1487        Ok(self
1488            .main
1489            .remap_types::<Str, U8>()
1490            .get(txn, main_key::ONE_TYPO_WORD_LEN)?
1491            .unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO))
1492    }
1493
1494    pub(crate) fn put_min_word_len_one_typo(
1495        &self,
1496        txn: &mut RwTxn<'_>,
1497        val: u8,
1498    ) -> heed::Result<()> {
1499        // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
1500        // identify 0 as being false, and anything else as true. The absence of a value is true,
1501        // because by default, we authorize typos.
1502        self.main.remap_types::<Str, U8>().put(txn, main_key::ONE_TYPO_WORD_LEN, &val)?;
1503        Ok(())
1504    }
1505
1506    pub fn min_word_len_two_typos(&self, txn: &RoTxn<'_>) -> heed::Result<u8> {
1507        // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
1508        // identify 0 as being false, and anything else as true. The absence of a value is true,
1509        // because by default, we authorize typos.
1510        Ok(self
1511            .main
1512            .remap_types::<Str, U8>()
1513            .get(txn, main_key::TWO_TYPOS_WORD_LEN)?
1514            .unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
1515    }
1516
1517    pub(crate) fn put_min_word_len_two_typos(
1518        &self,
1519        txn: &mut RwTxn<'_>,
1520        val: u8,
1521    ) -> heed::Result<()> {
1522        // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
1523        // identify 0 as being false, and anything else as true. The absence of a value is true,
1524        // because by default, we authorize typos.
1525        self.main.remap_types::<Str, U8>().put(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?;
1526        Ok(())
1527    }
1528
1529    /// List the words on which typo are not allowed
1530    pub fn exact_words<'t>(&self, txn: &'t RoTxn<'t>) -> Result<Option<fst::Set<Cow<'t, [u8]>>>> {
1531        match self.main.remap_types::<Str, Bytes>().get(txn, main_key::EXACT_WORDS)? {
1532            Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)),
1533            None => Ok(None),
1534        }
1535    }
1536
1537    pub(crate) fn put_exact_words<A: AsRef<[u8]>>(
1538        &self,
1539        txn: &mut RwTxn<'_>,
1540        words: &fst::Set<A>,
1541    ) -> Result<()> {
1542        self.main.remap_types::<Str, Bytes>().put(
1543            txn,
1544            main_key::EXACT_WORDS,
1545            words.as_fst().as_bytes(),
1546        )?;
1547        Ok(())
1548    }
1549
1550    /// Returns the exact attributes: attributes for which typo is disallowed.
1551    pub fn exact_attributes<'t>(&self, txn: &'t RoTxn<'t>) -> Result<Vec<&'t str>> {
1552        Ok(self
1553            .main
1554            .remap_types::<Str, SerdeBincode<Vec<&str>>>()
1555            .get(txn, main_key::EXACT_ATTRIBUTES)?
1556            .unwrap_or_default())
1557    }
1558
1559    /// Returns the list of exact attributes field ids.
1560    pub fn exact_attributes_ids(&self, txn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
1561        let attrs = self.exact_attributes(txn)?;
1562        let fid_map = self.fields_ids_map(txn)?;
1563        Ok(attrs.iter().filter_map(|attr| fid_map.id(attr)).collect())
1564    }
1565
1566    /// Writes the exact attributes to the database.
1567    pub(crate) fn put_exact_attributes(&self, txn: &mut RwTxn<'_>, attrs: &[&str]) -> Result<()> {
1568        self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
1569            txn,
1570            main_key::EXACT_ATTRIBUTES,
1571            &attrs,
1572        )?;
1573        Ok(())
1574    }
1575
1576    /// Clears the exact attributes from the store.
1577    pub(crate) fn delete_exact_attributes(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
1578        self.main.remap_key_type::<Str>().delete(txn, main_key::EXACT_ATTRIBUTES)
1579    }
1580
1581    pub fn max_values_per_facet(&self, txn: &RoTxn<'_>) -> heed::Result<Option<u64>> {
1582        self.main.remap_types::<Str, BEU64>().get(txn, main_key::MAX_VALUES_PER_FACET)
1583    }
1584
1585    pub(crate) fn put_max_values_per_facet(
1586        &self,
1587        txn: &mut RwTxn<'_>,
1588        val: u64,
1589    ) -> heed::Result<()> {
1590        self.main.remap_types::<Str, BEU64>().put(txn, main_key::MAX_VALUES_PER_FACET, &val)
1591    }
1592
1593    pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
1594        self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET)
1595    }
1596
1597    pub fn sort_facet_values_by(&self, txn: &RoTxn<'_>) -> heed::Result<OrderByMap> {
1598        let orders = self
1599            .main
1600            .remap_types::<Str, SerdeJson<OrderByMap>>()
1601            .get(txn, main_key::SORT_FACET_VALUES_BY)?
1602            .unwrap_or_default();
1603        Ok(orders)
1604    }
1605
1606    pub(crate) fn put_sort_facet_values_by(
1607        &self,
1608        txn: &mut RwTxn<'_>,
1609        val: &OrderByMap,
1610    ) -> heed::Result<()> {
1611        self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val)
1612    }
1613
1614    pub(crate) fn delete_sort_facet_values_by(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
1615        self.main.remap_key_type::<Str>().delete(txn, main_key::SORT_FACET_VALUES_BY)
1616    }
1617
1618    pub fn pagination_max_total_hits(&self, txn: &RoTxn<'_>) -> heed::Result<Option<u64>> {
1619        self.main.remap_types::<Str, BEU64>().get(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
1620    }
1621
1622    pub(crate) fn put_pagination_max_total_hits(
1623        &self,
1624        txn: &mut RwTxn<'_>,
1625        val: u64,
1626    ) -> heed::Result<()> {
1627        self.main.remap_types::<Str, BEU64>().put(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val)
1628    }
1629
1630    pub(crate) fn delete_pagination_max_total_hits(
1631        &self,
1632        txn: &mut RwTxn<'_>,
1633    ) -> heed::Result<bool> {
1634        self.main.remap_key_type::<Str>().delete(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
1635    }
1636
1637    pub fn proximity_precision(&self, txn: &RoTxn<'_>) -> heed::Result<Option<ProximityPrecision>> {
1638        self.main
1639            .remap_types::<Str, SerdeBincode<ProximityPrecision>>()
1640            .get(txn, main_key::PROXIMITY_PRECISION)
1641    }
1642
1643    pub(crate) fn put_proximity_precision(
1644        &self,
1645        txn: &mut RwTxn<'_>,
1646        val: ProximityPrecision,
1647    ) -> heed::Result<()> {
1648        self.main.remap_types::<Str, SerdeBincode<ProximityPrecision>>().put(
1649            txn,
1650            main_key::PROXIMITY_PRECISION,
1651            &val,
1652        )
1653    }
1654
1655    pub(crate) fn delete_proximity_precision(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
1656        self.main.remap_key_type::<Str>().delete(txn, main_key::PROXIMITY_PRECISION)
1657    }
1658
1659    pub fn prefix_search(&self, txn: &RoTxn<'_>) -> heed::Result<Option<PrefixSearch>> {
1660        self.main.remap_types::<Str, SerdeBincode<PrefixSearch>>().get(txn, main_key::PREFIX_SEARCH)
1661    }
1662
1663    pub(crate) fn put_prefix_search(
1664        &self,
1665        txn: &mut RwTxn<'_>,
1666        val: PrefixSearch,
1667    ) -> heed::Result<()> {
1668        self.main.remap_types::<Str, SerdeBincode<PrefixSearch>>().put(
1669            txn,
1670            main_key::PREFIX_SEARCH,
1671            &val,
1672        )
1673    }
1674
1675    pub(crate) fn delete_prefix_search(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
1676        self.main.remap_key_type::<Str>().delete(txn, main_key::PREFIX_SEARCH)
1677    }
1678
1679    pub fn facet_search(&self, txn: &RoTxn<'_>) -> heed::Result<bool> {
1680        self.main
1681            .remap_types::<Str, SerdeBincode<bool>>()
1682            .get(txn, main_key::FACET_SEARCH)
1683            .map(|v| v.unwrap_or(true))
1684    }
1685
1686    pub(crate) fn put_facet_search(&self, txn: &mut RwTxn<'_>, val: bool) -> heed::Result<()> {
1687        self.main.remap_types::<Str, SerdeBincode<bool>>().put(txn, main_key::FACET_SEARCH, &val)
1688    }
1689
1690    pub(crate) fn delete_facet_search(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
1691        self.main.remap_key_type::<Str>().delete(txn, main_key::FACET_SEARCH)
1692    }
1693
1694    pub fn localized_attributes_rules(
1695        &self,
1696        rtxn: &RoTxn<'_>,
1697    ) -> heed::Result<Option<Vec<LocalizedAttributesRule>>> {
1698        self.main
1699            .remap_types::<Str, SerdeJson<Vec<LocalizedAttributesRule>>>()
1700            .get(rtxn, main_key::LOCALIZED_ATTRIBUTES_RULES)
1701    }
1702
1703    pub(crate) fn put_localized_attributes_rules(
1704        &self,
1705        txn: &mut RwTxn<'_>,
1706        val: Vec<LocalizedAttributesRule>,
1707    ) -> heed::Result<()> {
1708        self.main.remap_types::<Str, SerdeJson<Vec<LocalizedAttributesRule>>>().put(
1709            txn,
1710            main_key::LOCALIZED_ATTRIBUTES_RULES,
1711            &val,
1712        )
1713    }
1714
1715    pub(crate) fn delete_localized_attributes_rules(
1716        &self,
1717        txn: &mut RwTxn<'_>,
1718    ) -> heed::Result<bool> {
1719        self.main.remap_key_type::<Str>().delete(txn, main_key::LOCALIZED_ATTRIBUTES_RULES)
1720    }
1721
1722    /// Put the embedding configs:
1723    /// 1. The name of the embedder
1724    /// 2. The configuration option for this embedder
1725    /// 3. The list of documents with a user provided embedding
1726    pub(crate) fn put_embedding_configs(
1727        &self,
1728        wtxn: &mut RwTxn<'_>,
1729        configs: Vec<IndexEmbeddingConfig>,
1730    ) -> heed::Result<()> {
1731        self.main.remap_types::<Str, SerdeJson<Vec<IndexEmbeddingConfig>>>().put(
1732            wtxn,
1733            main_key::EMBEDDING_CONFIGS,
1734            &configs,
1735        )
1736    }
1737
1738    pub(crate) fn delete_embedding_configs(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1739        self.main.remap_key_type::<Str>().delete(wtxn, main_key::EMBEDDING_CONFIGS)
1740    }
1741
1742    pub fn embedding_configs(&self, rtxn: &RoTxn<'_>) -> Result<Vec<IndexEmbeddingConfig>> {
1743        Ok(self
1744            .main
1745            .remap_types::<Str, SerdeJson<Vec<IndexEmbeddingConfig>>>()
1746            .get(rtxn, main_key::EMBEDDING_CONFIGS)?
1747            .unwrap_or_default())
1748    }
1749
1750    pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
1751        self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
1752    }
1753
1754    pub fn search_cutoff(&self, rtxn: &RoTxn<'_>) -> Result<Option<u64>> {
1755        Ok(self.main.remap_types::<Str, BEU64>().get(rtxn, main_key::SEARCH_CUTOFF)?)
1756    }
1757
1758    pub(crate) fn delete_search_cutoff(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1759        self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCH_CUTOFF)
1760    }
1761
1762    pub fn embeddings(
1763        &self,
1764        rtxn: &RoTxn<'_>,
1765        docid: DocumentId,
1766    ) -> Result<BTreeMap<String, Vec<Embedding>>> {
1767        let mut res = BTreeMap::new();
1768        let embedding_configs = self.embedding_configs(rtxn)?;
1769        for config in embedding_configs {
1770            let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
1771            let reader =
1772                ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
1773            let embeddings = reader.item_vectors(rtxn, docid)?;
1774            res.insert(config.name.to_owned(), embeddings);
1775        }
1776        Ok(res)
1777    }
1778
1779    pub fn prefix_settings(&self, rtxn: &RoTxn<'_>) -> Result<PrefixSettings> {
1780        let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default();
1781        Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
1782    }
1783
1784    pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result<ArroyStats> {
1785        let mut stats = ArroyStats::default();
1786        let embedding_configs = self.embedding_configs(rtxn)?;
1787        for config in embedding_configs {
1788            let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
1789            let reader =
1790                ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
1791            reader.aggregate_stats(rtxn, &mut stats)?;
1792        }
1793        Ok(stats)
1794    }
1795
1796    /// Check if the word is indexed in the index.
1797    ///
1798    /// This function checks if the word is indexed in the index by looking at the word_docids and exact_word_docids.
1799    ///
1800    /// # Arguments
1801    ///
1802    /// * `rtxn`: The read transaction.
1803    /// * `word`: The word to check.
1804    pub fn contains_word(&self, rtxn: &RoTxn<'_>, word: &str) -> Result<bool> {
1805        Ok(self.word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some()
1806            || self.exact_word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some())
1807    }
1808
1809    /// Returns the sizes in bytes of each of the index database at the given rtxn.
1810    pub fn database_sizes(&self, rtxn: &RoTxn<'_>) -> heed::Result<IndexMap<&'static str, usize>> {
1811        let Self {
1812            env: _,
1813            main,
1814            external_documents_ids,
1815            word_docids,
1816            exact_word_docids,
1817            word_prefix_docids,
1818            exact_word_prefix_docids,
1819            word_pair_proximity_docids,
1820            word_position_docids,
1821            word_fid_docids,
1822            word_prefix_position_docids,
1823            word_prefix_fid_docids,
1824            field_id_word_count_docids,
1825            facet_id_f64_docids,
1826            facet_id_string_docids,
1827            facet_id_normalized_string_strings,
1828            facet_id_string_fst,
1829            facet_id_exists_docids,
1830            facet_id_is_null_docids,
1831            facet_id_is_empty_docids,
1832            field_id_docid_facet_f64s,
1833            field_id_docid_facet_strings,
1834            vector_arroy,
1835            embedder_category_id,
1836            documents,
1837        } = self;
1838
1839        fn compute_size(stats: DatabaseStat) -> usize {
1840            let DatabaseStat {
1841                page_size,
1842                depth: _,
1843                branch_pages,
1844                leaf_pages,
1845                overflow_pages,
1846                entries: _,
1847            } = stats;
1848
1849            (branch_pages + leaf_pages + overflow_pages) * page_size as usize
1850        }
1851
1852        let mut sizes = IndexMap::new();
1853        sizes.insert("main", main.stat(rtxn).map(compute_size)?);
1854        sizes
1855            .insert("external_documents_ids", external_documents_ids.stat(rtxn).map(compute_size)?);
1856        sizes.insert("word_docids", word_docids.stat(rtxn).map(compute_size)?);
1857        sizes.insert("exact_word_docids", exact_word_docids.stat(rtxn).map(compute_size)?);
1858        sizes.insert("word_prefix_docids", word_prefix_docids.stat(rtxn).map(compute_size)?);
1859        sizes.insert(
1860            "exact_word_prefix_docids",
1861            exact_word_prefix_docids.stat(rtxn).map(compute_size)?,
1862        );
1863        sizes.insert(
1864            "word_pair_proximity_docids",
1865            word_pair_proximity_docids.stat(rtxn).map(compute_size)?,
1866        );
1867        sizes.insert("word_position_docids", word_position_docids.stat(rtxn).map(compute_size)?);
1868        sizes.insert("word_fid_docids", word_fid_docids.stat(rtxn).map(compute_size)?);
1869        sizes.insert(
1870            "word_prefix_position_docids",
1871            word_prefix_position_docids.stat(rtxn).map(compute_size)?,
1872        );
1873        sizes
1874            .insert("word_prefix_fid_docids", word_prefix_fid_docids.stat(rtxn).map(compute_size)?);
1875        sizes.insert(
1876            "field_id_word_count_docids",
1877            field_id_word_count_docids.stat(rtxn).map(compute_size)?,
1878        );
1879        sizes.insert("facet_id_f64_docids", facet_id_f64_docids.stat(rtxn).map(compute_size)?);
1880        sizes
1881            .insert("facet_id_string_docids", facet_id_string_docids.stat(rtxn).map(compute_size)?);
1882        sizes.insert(
1883            "facet_id_normalized_string_strings",
1884            facet_id_normalized_string_strings.stat(rtxn).map(compute_size)?,
1885        );
1886        sizes.insert("facet_id_string_fst", facet_id_string_fst.stat(rtxn).map(compute_size)?);
1887        sizes
1888            .insert("facet_id_exists_docids", facet_id_exists_docids.stat(rtxn).map(compute_size)?);
1889        sizes.insert(
1890            "facet_id_is_null_docids",
1891            facet_id_is_null_docids.stat(rtxn).map(compute_size)?,
1892        );
1893        sizes.insert(
1894            "facet_id_is_empty_docids",
1895            facet_id_is_empty_docids.stat(rtxn).map(compute_size)?,
1896        );
1897        sizes.insert(
1898            "field_id_docid_facet_f64s",
1899            field_id_docid_facet_f64s.stat(rtxn).map(compute_size)?,
1900        );
1901        sizes.insert(
1902            "field_id_docid_facet_strings",
1903            field_id_docid_facet_strings.stat(rtxn).map(compute_size)?,
1904        );
1905        sizes.insert("vector_arroy", vector_arroy.stat(rtxn).map(compute_size)?);
1906        sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?);
1907        sizes.insert("documents", documents.stat(rtxn).map(compute_size)?);
1908
1909        Ok(sizes)
1910    }
1911}
1912
1913#[derive(Debug, Deserialize, Serialize)]
1914pub struct IndexEmbeddingConfig {
1915    pub name: String,
1916    pub config: EmbeddingConfig,
1917    pub user_provided: RoaringBitmap,
1918}
1919
1920#[derive(Debug, Deserialize, Serialize)]
1921pub struct PrefixSettings {
1922    pub prefix_count_threshold: usize,
1923    pub max_prefix_length: usize,
1924    pub compute_prefixes: PrefixSearch,
1925}
1926
1927#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
1928#[serde(rename_all = "camelCase")]
1929pub enum PrefixSearch {
1930    #[default]
1931    IndexingTime,
1932    Disabled,
1933}
1934
1935#[derive(Debug)]
1936pub enum RollbackOutcome {
1937    VersionMismatch {
1938        requested_version: (u32, u32, u32),
1939        rollback_version: Option<(u32, u32, u32)>,
1940    },
1941    Rollback,
1942    NoRollback,
1943}
1944
1945impl RollbackOutcome {
1946    pub fn succeeded(&self) -> bool {
1947        matches!(self, RollbackOutcome::Rollback | RollbackOutcome::NoRollback)
1948    }
1949}
1950
1951impl std::fmt::Display for RollbackOutcome {
1952    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1953        match self {
1954            RollbackOutcome::VersionMismatch { requested_version, rollback_version: Some(rollback_version) } => write!(f, "cannot rollback to the requested version\n  - note: requested version is v{}.{}.{}\n  - note: only possible to rollback to v{}.{}.{}",
1955              requested_version.0, requested_version.1, requested_version.2, rollback_version.0, rollback_version.1, rollback_version.2),
1956            RollbackOutcome::VersionMismatch { requested_version, rollback_version: None } => write!(f, "cannot rollback to the requested version\n  - note: requested version is v{}.{}.{}\n  - note: only possible to rollback to an unknown version",
1957                requested_version.0, requested_version.1, requested_version.2),
1958            RollbackOutcome::Rollback => f.write_str("rollback complete"),
1959            RollbackOutcome::NoRollback => f.write_str("no rollback necessary"),
1960        }
1961    }
1962}
1963
1964#[derive(Serialize, Deserialize)]
1965#[serde(transparent)]
1966struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] time::OffsetDateTime);
1967
1968#[cfg(test)]
1969#[path = "test_index.rs"]
1970pub(crate) mod tests;