1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
3use std::fs::File;
4use std::path::Path;
5
6use heed::types::*;
7use heed::{CompactionOption, Database, DatabaseStat, RoTxn, RwTxn, Unspecified, WithoutTls};
8use indexmap::IndexMap;
9use roaring::RoaringBitmap;
10use rstar::RTree;
11use serde::{Deserialize, Serialize};
12
13use crate::constants::{self, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
14use crate::database_stats::DatabaseStats;
15use crate::documents::PrimaryKey;
16use crate::error::{InternalError, UserError};
17use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
18use crate::fields_ids_map::FieldsIdsMap;
19use crate::heed_codec::facet::{
20 FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
21 FieldIdCodec, OrderedF64Codec,
22};
23use crate::heed_codec::version::VersionCodec;
24use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
25use crate::order_by_map::OrderByMap;
26use crate::proximity::ProximityPrecision;
27use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig};
28use crate::{
29 default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
30 FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
31 FieldidsWeightsMap, FilterableAttributesRule, GeoPoint, LocalizedAttributesRule, ObkvCodec,
32 Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32,
33 BEU64,
34};
35
36pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
37pub const DEFAULT_MIN_WORD_LEN_TWO_TYPOS: u8 = 9;
38
39pub mod main_key {
40 pub const VERSION_KEY: &str = "version";
41 pub const CRITERIA_KEY: &str = "criteria";
42 pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
43 pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
44 pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
45 pub const HIDDEN_FACETED_FIELDS_KEY: &str = "hidden-faceted-fields";
46 pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
47 pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
48 pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
49 pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
50 pub const FIELDIDS_WEIGHTS_MAP_KEY: &str = "fieldids-weights-map";
51 pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
52 pub const GEO_RTREE_KEY: &str = "geo-rtree";
53 pub const PRIMARY_KEY_KEY: &str = "primary-key";
54 pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
55 pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
56 pub const STOP_WORDS_KEY: &str = "stop-words";
57 pub const NON_SEPARATOR_TOKENS_KEY: &str = "non-separator-tokens";
58 pub const SEPARATOR_TOKENS_KEY: &str = "separator-tokens";
59 pub const DICTIONARY_KEY: &str = "dictionary";
60 pub const SYNONYMS_KEY: &str = "synonyms";
61 pub const USER_DEFINED_SYNONYMS_KEY: &str = "user-defined-synonyms";
62 pub const WORDS_FST_KEY: &str = "words-fst";
63 pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
64 pub const CREATED_AT_KEY: &str = "created-at";
65 pub const UPDATED_AT_KEY: &str = "updated-at";
66 pub const AUTHORIZE_TYPOS: &str = "authorize-typos";
67 pub const ONE_TYPO_WORD_LEN: &str = "one-typo-word-len";
68 pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len";
69 pub const EXACT_WORDS: &str = "exact-words";
70 pub const EXACT_ATTRIBUTES: &str = "exact-attributes";
71 pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet";
72 pub const SORT_FACET_VALUES_BY: &str = "sort-facet-values-by";
73 pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
74 pub const PROXIMITY_PRECISION: &str = "proximity-precision";
75 pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
76 pub const SEARCH_CUTOFF: &str = "search_cutoff";
77 pub const LOCALIZED_ATTRIBUTES_RULES: &str = "localized_attributes_rules";
78 pub const FACET_SEARCH: &str = "facet_search";
79 pub const PREFIX_SEARCH: &str = "prefix_search";
80 pub const DOCUMENTS_STATS: &str = "documents_stats";
81 pub const DISABLED_TYPOS_TERMS: &str = "disabled_typos_terms";
82}
83
84pub mod db_name {
85 pub const MAIN: &str = "main";
86 pub const WORD_DOCIDS: &str = "word-docids";
87 pub const EXACT_WORD_DOCIDS: &str = "exact-word-docids";
88 pub const WORD_PREFIX_DOCIDS: &str = "word-prefix-docids";
89 pub const EXACT_WORD_PREFIX_DOCIDS: &str = "exact-word-prefix-docids";
90 pub const EXTERNAL_DOCUMENTS_IDS: &str = "external-documents-ids";
91 pub const DOCID_WORD_POSITIONS: &str = "docid-word-positions";
92 pub const WORD_PAIR_PROXIMITY_DOCIDS: &str = "word-pair-proximity-docids";
93 pub const WORD_POSITION_DOCIDS: &str = "word-position-docids";
94 pub const WORD_FIELD_ID_DOCIDS: &str = "word-field-id-docids";
95 pub const WORD_PREFIX_POSITION_DOCIDS: &str = "word-prefix-position-docids";
96 pub const WORD_PREFIX_FIELD_ID_DOCIDS: &str = "word-prefix-field-id-docids";
97 pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids";
98 pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
99 pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids";
100 pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
101 pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
102 pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
103 pub const FACET_ID_NORMALIZED_STRING_STRINGS: &str = "facet-id-normalized-string-strings";
104 pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
105 pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
106 pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
107 pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id";
108 pub const VECTOR_ARROY: &str = "vector-arroy";
109 pub const DOCUMENTS: &str = "documents";
110}
111const NUMBER_OF_DBS: u32 = 25;
112
113#[derive(Clone)]
114pub struct Index {
115 pub(crate) env: heed::Env<WithoutTls>,
117
118 pub(crate) main: Database<Unspecified, Unspecified>,
120
121 pub external_documents_ids: Database<Str, BEU32>,
123
124 pub word_docids: Database<Str, CboRoaringBitmapCodec>,
126
127 pub exact_word_docids: Database<Str, CboRoaringBitmapCodec>,
129
130 pub word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
132
133 pub exact_word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
135
136 pub word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
138
139 pub word_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
141 pub word_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
143
144 pub field_id_word_count_docids: Database<FieldIdWordCountCodec, CboRoaringBitmapCodec>,
146 pub word_prefix_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
148 pub word_prefix_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
150
151 pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
153 pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
155 pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
157
158 pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
160 pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
162 pub facet_id_normalized_string_strings: Database<BEU16StrCodec, SerdeJson<BTreeSet<String>>>,
164 pub facet_id_string_fst: Database<BEU16, FstSetCodec>,
166
167 pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
169 pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
171
172 pub embedder_category_id: Database<Str, U8>,
174 pub vector_arroy: arroy::Database<Unspecified>,
176
177 pub(crate) documents: Database<BEU32, ObkvCodec>,
179}
180
181impl Index {
182 pub fn new_with_creation_dates<P: AsRef<Path>>(
183 mut options: heed::EnvOpenOptions<WithoutTls>,
184 path: P,
185 created_at: time::OffsetDateTime,
186 updated_at: time::OffsetDateTime,
187 creation: bool,
188 ) -> Result<Index> {
189 use db_name::*;
190
191 options.max_dbs(NUMBER_OF_DBS);
192
193 let env = unsafe { options.open(path) }?;
194 let mut wtxn = env.write_txn()?;
195 let main = env.database_options().name(MAIN).create(&mut wtxn)?;
196 let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
197 let external_documents_ids =
198 env.create_database(&mut wtxn, Some(EXTERNAL_DOCUMENTS_IDS))?;
199 let exact_word_docids = env.create_database(&mut wtxn, Some(EXACT_WORD_DOCIDS))?;
200 let word_prefix_docids = env.create_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?;
201 let exact_word_prefix_docids =
202 env.create_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?;
203 let word_pair_proximity_docids =
204 env.create_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
205 let word_position_docids = env.create_database(&mut wtxn, Some(WORD_POSITION_DOCIDS))?;
206 let word_fid_docids = env.create_database(&mut wtxn, Some(WORD_FIELD_ID_DOCIDS))?;
207 let field_id_word_count_docids =
208 env.create_database(&mut wtxn, Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
209 let word_prefix_position_docids =
210 env.create_database(&mut wtxn, Some(WORD_PREFIX_POSITION_DOCIDS))?;
211 let word_prefix_fid_docids =
212 env.create_database(&mut wtxn, Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
213 let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
214 let facet_id_string_docids =
215 env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
216 let facet_id_normalized_string_strings =
217 env.create_database(&mut wtxn, Some(FACET_ID_NORMALIZED_STRING_STRINGS))?;
218 let facet_id_string_fst = env.create_database(&mut wtxn, Some(FACET_ID_STRING_FST))?;
219 let facet_id_exists_docids =
220 env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
221 let facet_id_is_null_docids =
222 env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?;
223 let facet_id_is_empty_docids =
224 env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?;
225 let field_id_docid_facet_f64s =
226 env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
227 let field_id_docid_facet_strings =
228 env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?;
229 let embedder_category_id =
231 env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?;
232 let vector_arroy = env.create_database(&mut wtxn, Some(VECTOR_ARROY))?;
233
234 let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
235
236 let this = Index {
237 env: env.clone(),
238 main,
239 external_documents_ids,
240 word_docids,
241 exact_word_docids,
242 word_prefix_docids,
243 exact_word_prefix_docids,
244 word_pair_proximity_docids,
245 word_position_docids,
246 word_fid_docids,
247 word_prefix_position_docids,
248 word_prefix_fid_docids,
249 field_id_word_count_docids,
250 facet_id_f64_docids,
251 facet_id_string_docids,
252 facet_id_normalized_string_strings,
253 facet_id_string_fst,
254 facet_id_exists_docids,
255 facet_id_is_null_docids,
256 facet_id_is_empty_docids,
257 field_id_docid_facet_f64s,
258 field_id_docid_facet_strings,
259 vector_arroy,
260 embedder_category_id,
261 documents,
262 };
263 if this.get_version(&wtxn)?.is_none() && creation {
264 this.put_version(
265 &mut wtxn,
266 (constants::VERSION_MAJOR, constants::VERSION_MINOR, constants::VERSION_PATCH),
267 )?;
268 }
269 wtxn.commit()?;
270
271 Index::set_creation_dates(&this.env, this.main, created_at, updated_at)?;
272
273 Ok(this)
274 }
275
276 pub fn new<P: AsRef<Path>>(
277 options: heed::EnvOpenOptions<WithoutTls>,
278 path: P,
279 creation: bool,
280 ) -> Result<Index> {
281 let now = time::OffsetDateTime::now_utc();
282 Self::new_with_creation_dates(options, path, now, now, creation)
283 }
284
285 pub fn rollback<P: AsRef<Path>>(
287 mut options: heed::EnvOpenOptions<WithoutTls>,
288 path: P,
289 requested_version: (u32, u32, u32),
290 ) -> Result<RollbackOutcome> {
291 options.max_dbs(NUMBER_OF_DBS);
292
293 let env = unsafe { options.open(path.as_ref()) }?;
295 let rtxn = env.read_txn()?;
296 let Some(main) = env.database_options().name(db_name::MAIN).open(&rtxn)? else {
297 return Err(crate::Error::InternalError(crate::InternalError::DatabaseMissingEntry {
298 db_name: db_name::MAIN,
299 key: None,
300 }));
301 };
302 let rollback_version =
303 main.remap_types::<Str, VersionCodec>().get(&rtxn, main_key::VERSION_KEY)?;
304 if rollback_version == Some(requested_version) {
305 return Ok(RollbackOutcome::NoRollback);
306 }
307
308 drop(rtxn);
310 drop(env);
311
312 unsafe { options.flags(heed::EnvFlags::PREV_SNAPSHOT) };
314 let env = unsafe { options.open(path) }?;
315 let mut wtxn = env.write_txn()?;
316 let Some(main) = env.database_options().name(db_name::MAIN).open(&wtxn)? else {
317 return Err(crate::Error::InternalError(crate::InternalError::DatabaseMissingEntry {
318 db_name: db_name::MAIN,
319 key: None,
320 }));
321 };
322
323 let main = main.remap_key_type::<Str>();
324
325 let Some(rollback_version) =
326 main.remap_data_type::<VersionCodec>().get(&wtxn, main_key::VERSION_KEY)?
327 else {
328 return Ok(RollbackOutcome::VersionMismatch {
329 requested_version,
330 rollback_version: None,
331 });
332 };
333
334 if requested_version != rollback_version {
335 return Ok(RollbackOutcome::VersionMismatch {
336 requested_version,
337 rollback_version: Some(rollback_version),
338 });
339 }
340
341 let now = time::OffsetDateTime::now_utc();
344 main.remap_data_type::<SerdeJson<OffsetDateTime>>().put(
345 &mut wtxn,
346 main_key::UPDATED_AT_KEY,
347 &OffsetDateTime(now),
348 )?;
349
350 wtxn.commit()?;
351
352 Ok(RollbackOutcome::Rollback)
353 }
354
355 fn set_creation_dates(
356 env: &heed::Env<WithoutTls>,
357 main: Database<Unspecified, Unspecified>,
358 created_at: time::OffsetDateTime,
359 updated_at: time::OffsetDateTime,
360 ) -> heed::Result<()> {
361 let mut txn = env.write_txn()?;
362 let main = main.remap_types::<Str, SerdeJson<OffsetDateTime>>();
364 if main.get(&txn, main_key::CREATED_AT_KEY)?.is_none() {
365 main.put(&mut txn, main_key::UPDATED_AT_KEY, &OffsetDateTime(updated_at))?;
366 main.put(&mut txn, main_key::CREATED_AT_KEY, &OffsetDateTime(created_at))?;
367 txn.commit()?;
368 }
369 Ok(())
370 }
371
372 pub fn write_txn(&self) -> heed::Result<RwTxn<'_>> {
374 self.env.write_txn()
375 }
376
377 pub fn read_txn(&self) -> heed::Result<RoTxn<'_, WithoutTls>> {
379 self.env.read_txn()
380 }
381
382 pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static, WithoutTls>> {
384 self.env.clone().static_read_txn()
385 }
386
387 pub fn path(&self) -> &Path {
389 self.env.path()
390 }
391
392 pub fn used_size(&self) -> Result<u64> {
394 Ok(self.env.non_free_pages_size()?)
395 }
396
397 pub fn on_disk_size(&self) -> Result<u64> {
399 Ok(self.env.real_disk_size()?)
400 }
401
402 pub fn map_size(&self) -> usize {
409 self.env.info().map_size
410 }
411
412 pub fn copy_to_file(&self, file: &mut File, option: CompactionOption) -> Result<()> {
413 self.env.copy_to_file(file, option).map_err(Into::into)
414 }
415
416 pub fn copy_to_path<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
417 self.env.copy_to_path(path, option).map_err(Into::into)
418 }
419
420 pub fn prepare_for_closing(self) -> heed::EnvClosingEvent {
426 self.env.prepare_for_closing()
427 }
428
429 pub(crate) fn put_version(
433 &self,
434 wtxn: &mut RwTxn<'_>,
435 (major, minor, patch): (u32, u32, u32),
436 ) -> heed::Result<()> {
437 self.main.remap_types::<Str, VersionCodec>().put(
438 wtxn,
439 main_key::VERSION_KEY,
440 &(major, minor, patch),
441 )
442 }
443
444 pub fn get_version(&self, rtxn: &RoTxn<'_>) -> heed::Result<Option<(u32, u32, u32)>> {
446 self.main.remap_types::<Str, VersionCodec>().get(rtxn, main_key::VERSION_KEY)
447 }
448
449 pub(crate) fn put_documents_ids(
453 &self,
454 wtxn: &mut RwTxn<'_>,
455 docids: &RoaringBitmap,
456 ) -> heed::Result<()> {
457 self.main.remap_types::<Str, RoaringBitmapCodec>().put(
458 wtxn,
459 main_key::DOCUMENTS_IDS_KEY,
460 docids,
461 )
462 }
463
464 pub fn documents_ids(&self, rtxn: &RoTxn<'_>) -> heed::Result<RoaringBitmap> {
466 Ok(self
467 .main
468 .remap_types::<Str, RoaringBitmapCodec>()
469 .get(rtxn, main_key::DOCUMENTS_IDS_KEY)?
470 .unwrap_or_default())
471 }
472
473 pub fn number_of_documents(&self, rtxn: &RoTxn<'_>) -> Result<u64> {
475 let count = self
476 .main
477 .remap_types::<Str, RoaringBitmapLenCodec>()
478 .get(rtxn, main_key::DOCUMENTS_IDS_KEY)?;
479 Ok(count.unwrap_or_default())
480 }
481
482 pub fn put_documents_stats(
484 &self,
485 wtxn: &mut RwTxn<'_>,
486 stats: DatabaseStats,
487 ) -> heed::Result<()> {
488 self.main.remap_types::<Str, SerdeJson<DatabaseStats>>().put(
489 wtxn,
490 main_key::DOCUMENTS_STATS,
491 &stats,
492 )
493 }
494
495 pub fn documents_stats(&self, rtxn: &RoTxn<'_>) -> heed::Result<Option<DatabaseStats>> {
497 self.main
498 .remap_types::<Str, SerdeJson<DatabaseStats>>()
499 .get(rtxn, main_key::DOCUMENTS_STATS)
500 }
501
502 pub(crate) fn put_primary_key(
506 &self,
507 wtxn: &mut RwTxn<'_>,
508 primary_key: &str,
509 ) -> heed::Result<()> {
510 self.set_updated_at(wtxn, &time::OffsetDateTime::now_utc())?;
511 self.main.remap_types::<Str, Str>().put(wtxn, main_key::PRIMARY_KEY_KEY, primary_key)
512 }
513
514 pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
516 self.main.remap_key_type::<Str>().delete(wtxn, main_key::PRIMARY_KEY_KEY)
517 }
518
519 pub fn primary_key<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<Option<&'t str>> {
521 self.main.remap_types::<Str, Str>().get(rtxn, main_key::PRIMARY_KEY_KEY)
522 }
523
524 pub fn external_documents_ids(&self) -> ExternalDocumentsIds {
529 ExternalDocumentsIds::new(self.external_documents_ids)
530 }
531
532 pub(crate) fn put_fields_ids_map(
537 &self,
538 wtxn: &mut RwTxn<'_>,
539 map: &FieldsIdsMap,
540 ) -> heed::Result<()> {
541 self.main.remap_types::<Str, SerdeJson<FieldsIdsMap>>().put(
542 wtxn,
543 main_key::FIELDS_IDS_MAP_KEY,
544 map,
545 )
546 }
547
548 pub fn fields_ids_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<FieldsIdsMap> {
551 Ok(self
552 .main
553 .remap_types::<Str, SerdeJson<FieldsIdsMap>>()
554 .get(rtxn, main_key::FIELDS_IDS_MAP_KEY)?
555 .unwrap_or_default())
556 }
557
558 pub fn fields_ids_map_with_metadata(&self, rtxn: &RoTxn<'_>) -> Result<FieldIdMapWithMetadata> {
562 Ok(FieldIdMapWithMetadata::new(
563 self.fields_ids_map(rtxn)?,
564 MetadataBuilder::from_index(self, rtxn)?,
565 ))
566 }
567
568 pub(crate) fn put_fieldids_weights_map(
574 &self,
575 wtxn: &mut RwTxn<'_>,
576 map: &FieldidsWeightsMap,
577 ) -> heed::Result<()> {
578 self.main.remap_types::<Str, SerdeJson<_>>().put(
579 wtxn,
580 main_key::FIELDIDS_WEIGHTS_MAP_KEY,
581 map,
582 )
583 }
584
585 pub fn fieldids_weights_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<FieldidsWeightsMap> {
587 self.main
588 .remap_types::<Str, SerdeJson<_>>()
589 .get(rtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)?
590 .map(Ok)
591 .unwrap_or_else(|| {
592 Ok(FieldidsWeightsMap::from_field_id_map_without_searchable(
593 &self.fields_ids_map(rtxn)?,
594 ))
595 })
596 }
597
598 pub fn delete_fieldids_weights_map(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
600 self.main.remap_key_type::<Str>().delete(wtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)
601 }
602
603 pub fn max_searchable_attribute_weight(&self, rtxn: &RoTxn<'_>) -> Result<Option<Weight>> {
604 let user_defined_searchable_fields = self.user_defined_searchable_fields(rtxn)?;
605 if let Some(user_defined_searchable_fields) = user_defined_searchable_fields {
606 if !user_defined_searchable_fields.contains(&"*") {
607 return Ok(Some(user_defined_searchable_fields.len().saturating_sub(1) as Weight));
608 }
609 }
610
611 Ok(None)
612 }
613
614 pub fn searchable_fields_and_weights<'a>(
615 &self,
616 rtxn: &'a RoTxn<'a>,
617 ) -> Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
618 let fid_map = self.fields_ids_map(rtxn)?;
619 let weight_map = self.fieldids_weights_map(rtxn)?;
620 let searchable = self.searchable_fields(rtxn)?;
621
622 searchable
623 .into_iter()
624 .map(|field| -> Result<_> {
625 let fid = fid_map.id(&field).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
626 field_name: field.to_string(),
627 process: "searchable_fields_and_weights",
628 })?;
629 let weight = weight_map
630 .weight(fid)
631 .ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
632
633 Ok((field, fid, weight))
634 })
635 .collect()
636 }
637
638 pub(crate) fn put_geo_rtree(
642 &self,
643 wtxn: &mut RwTxn<'_>,
644 rtree: &RTree<GeoPoint>,
645 ) -> heed::Result<()> {
646 self.main.remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>().put(
647 wtxn,
648 main_key::GEO_RTREE_KEY,
649 rtree,
650 )
651 }
652
653 pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
655 self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_RTREE_KEY)
656 }
657
658 pub fn geo_rtree(&self, rtxn: &RoTxn<'_>) -> Result<Option<RTree<GeoPoint>>> {
660 match self
661 .main
662 .remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>()
663 .get(rtxn, main_key::GEO_RTREE_KEY)?
664 {
665 Some(rtree) => Ok(Some(rtree)),
666 None => Ok(None),
667 }
668 }
669
670 pub(crate) fn put_geo_faceted_documents_ids(
674 &self,
675 wtxn: &mut RwTxn<'_>,
676 docids: &RoaringBitmap,
677 ) -> heed::Result<()> {
678 self.main.remap_types::<Str, RoaringBitmapCodec>().put(
679 wtxn,
680 main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
681 docids,
682 )
683 }
684
685 pub(crate) fn delete_geo_faceted_documents_ids(
687 &self,
688 wtxn: &mut RwTxn<'_>,
689 ) -> heed::Result<bool> {
690 self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)
691 }
692
693 pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn<'_>) -> heed::Result<RoaringBitmap> {
695 match self
696 .main
697 .remap_types::<Str, RoaringBitmapCodec>()
698 .get(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
699 {
700 Some(docids) => Ok(docids),
701 None => Ok(RoaringBitmap::new()),
702 }
703 }
704 pub(crate) fn put_field_distribution(
709 &self,
710 wtxn: &mut RwTxn<'_>,
711 distribution: &FieldDistribution,
712 ) -> heed::Result<()> {
713 self.main.remap_types::<Str, SerdeJson<FieldDistribution>>().put(
714 wtxn,
715 main_key::FIELD_DISTRIBUTION_KEY,
716 distribution,
717 )
718 }
719
720 pub fn field_distribution(&self, rtxn: &RoTxn<'_>) -> heed::Result<FieldDistribution> {
723 Ok(self
724 .main
725 .remap_types::<Str, SerdeJson<FieldDistribution>>()
726 .get(rtxn, main_key::FIELD_DISTRIBUTION_KEY)?
727 .unwrap_or_default())
728 }
729
730 pub(crate) fn put_displayed_fields(
735 &self,
736 wtxn: &mut RwTxn<'_>,
737 fields: &[&str],
738 ) -> heed::Result<()> {
739 self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
740 wtxn,
741 main_key::DISPLAYED_FIELDS_KEY,
742 &fields,
743 )
744 }
745
746 pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
749 self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISPLAYED_FIELDS_KEY)
750 }
751
752 pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<Option<Vec<&'t str>>> {
755 self.main
756 .remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
757 .get(rtxn, main_key::DISPLAYED_FIELDS_KEY)
758 }
759
760 pub fn displayed_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<Option<Vec<FieldId>>> {
762 match self.displayed_fields(rtxn)? {
763 Some(fields) => {
764 let fields_ids_map = self.fields_ids_map(rtxn)?;
765 let mut fields_ids = Vec::new();
766 for name in fields.into_iter() {
767 if let Some(field_id) = fields_ids_map.id(name) {
768 fields_ids.push(field_id);
769 }
770 }
771 Ok(Some(fields_ids))
772 }
773 None => Ok(None),
774 }
775 }
776
777 pub fn remove_hidden_fields(
779 &self,
780 rtxn: &RoTxn<'_>,
781 fields: impl IntoIterator<Item = impl AsRef<str>>,
782 ) -> Result<(BTreeSet<String>, bool)> {
783 let mut valid_fields =
784 fields.into_iter().map(|f| f.as_ref().to_string()).collect::<BTreeSet<String>>();
785
786 let fields_len = valid_fields.len();
787
788 if let Some(dn) = self.displayed_fields(rtxn)? {
789 let displayable_names = dn.iter().map(|s| s.to_string()).collect();
790 valid_fields = &valid_fields & &displayable_names;
791 }
792
793 let hidden_fields = fields_len > valid_fields.len();
794 Ok((valid_fields, hidden_fields))
795 }
796
797 pub(crate) fn put_all_searchable_fields_from_fields_ids_map(
801 &self,
802 wtxn: &mut RwTxn<'_>,
803 user_fields: &[&str],
804 fields_ids_map: &FieldIdMapWithMetadata,
805 ) -> Result<()> {
806 self.put_user_defined_searchable_fields(wtxn, user_fields)?;
808
809 let mut weights = FieldidsWeightsMap::default();
810
811 let mut real_fields = Vec::new();
813 for (id, field_from_map, metadata) in fields_ids_map.iter() {
814 if let Some(weight) = metadata.searchable_weight() {
815 real_fields.push(field_from_map);
816 weights.insert(id, weight);
817 }
818 }
819
820 self.put_searchable_fields(wtxn, &real_fields)?;
821 self.put_fieldids_weights_map(wtxn, &weights)?;
822
823 Ok(())
824 }
825
826 pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
827 let did_delete_searchable = self.delete_searchable_fields(wtxn)?;
828 let did_delete_user_defined = self.delete_user_defined_searchable_fields(wtxn)?;
829 self.delete_fieldids_weights_map(wtxn)?;
830 Ok(did_delete_searchable || did_delete_user_defined)
831 }
832
833 fn put_searchable_fields(&self, wtxn: &mut RwTxn<'_>, fields: &[&str]) -> heed::Result<()> {
835 self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
836 wtxn,
837 main_key::SEARCHABLE_FIELDS_KEY,
838 &fields,
839 )
840 }
841
842 fn delete_searchable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
844 self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
845 }
846
847 pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<Vec<Cow<'t, str>>> {
849 self.main
850 .remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
851 .get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)?
852 .map(|fields| Ok(fields.into_iter().map(Cow::Borrowed).collect()))
853 .unwrap_or_else(|| {
854 Ok(self
855 .fields_ids_map(rtxn)?
856 .names()
857 .filter(|name| !crate::is_faceted_by(name, RESERVED_VECTORS_FIELD_NAME))
858 .map(|field| Cow::Owned(field.to_string()))
859 .collect())
860 })
861 }
862
863 pub fn searchable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<Vec<FieldId>> {
865 let fields = self.searchable_fields(rtxn)?;
866 let fields_ids_map = self.fields_ids_map(rtxn)?;
867 let mut fields_ids = Vec::new();
868 for name in fields {
869 if let Some(field_id) = fields_ids_map.id(&name) {
870 fields_ids.push(field_id);
871 }
872 }
873 Ok(fields_ids)
874 }
875
876 pub(crate) fn put_user_defined_searchable_fields(
878 &self,
879 wtxn: &mut RwTxn<'_>,
880 fields: &[&str],
881 ) -> heed::Result<()> {
882 self.main.remap_types::<Str, SerdeBincode<_>>().put(
883 wtxn,
884 main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY,
885 &fields,
886 )
887 }
888
889 pub(crate) fn delete_user_defined_searchable_fields(
891 &self,
892 wtxn: &mut RwTxn<'_>,
893 ) -> heed::Result<bool> {
894 self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
895 }
896
897 pub fn user_defined_searchable_fields<'t>(
899 &self,
900 rtxn: &'t RoTxn<'t>,
901 ) -> heed::Result<Option<Vec<&'t str>>> {
902 self.main
903 .remap_types::<Str, SerdeBincode<Vec<_>>>()
904 .get(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
905 }
906
907 pub fn user_defined_searchable_fields_ids(
909 &self,
910 rtxn: &RoTxn<'_>,
911 ) -> Result<Option<Vec<FieldId>>> {
912 match self.user_defined_searchable_fields(rtxn)? {
913 Some(fields) => {
914 let fields_ids_map = self.fields_ids_map(rtxn)?;
915 let mut fields_ids = Vec::new();
916 for name in fields {
917 if let Some(field_id) = fields_ids_map.id(name) {
918 fields_ids.push(field_id);
919 }
920 }
921 Ok(Some(fields_ids))
922 }
923 None => Ok(None),
924 }
925 }
926
927 pub(crate) fn put_filterable_attributes_rules(
931 &self,
932 wtxn: &mut RwTxn<'_>,
933 fields: &[FilterableAttributesRule],
934 ) -> heed::Result<()> {
935 self.main.remap_types::<Str, SerdeJson<_>>().put(
936 wtxn,
937 main_key::FILTERABLE_FIELDS_KEY,
938 &fields,
939 )
940 }
941
942 pub(crate) fn delete_filterable_attributes_rules(
944 &self,
945 wtxn: &mut RwTxn<'_>,
946 ) -> heed::Result<bool> {
947 self.main.remap_key_type::<Str>().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY)
948 }
949
950 pub fn filterable_attributes_rules(
952 &self,
953 rtxn: &RoTxn<'_>,
954 ) -> heed::Result<Vec<FilterableAttributesRule>> {
955 Ok(self
956 .main
957 .remap_types::<Str, SerdeJson<_>>()
958 .get(rtxn, main_key::FILTERABLE_FIELDS_KEY)?
959 .unwrap_or_default())
960 }
961
962 pub(crate) fn put_sortable_fields(
966 &self,
967 wtxn: &mut RwTxn<'_>,
968 fields: &HashSet<String>,
969 ) -> heed::Result<()> {
970 self.main.remap_types::<Str, SerdeJson<_>>().put(
971 wtxn,
972 main_key::SORTABLE_FIELDS_KEY,
973 fields,
974 )
975 }
976
977 pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
979 self.main.remap_key_type::<Str>().delete(wtxn, main_key::SORTABLE_FIELDS_KEY)
980 }
981
982 pub fn sortable_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
984 Ok(self
985 .main
986 .remap_types::<Str, SerdeJson<_>>()
987 .get(rtxn, main_key::SORTABLE_FIELDS_KEY)?
988 .unwrap_or_default())
989 }
990
991 pub fn sortable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
993 let fields = self.sortable_fields(rtxn)?;
994 let fields_ids_map = self.fields_ids_map(rtxn)?;
995 Ok(fields.into_iter().filter_map(|name| fields_ids_map.id(&name)).collect())
996 }
997
998 pub fn is_geo_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
1000 let geo_filter = self.is_geo_filtering_enabled(rtxn)?;
1001 let geo_sortable = self.is_geo_sorting_enabled(rtxn)?;
1002 Ok(geo_filter || geo_sortable)
1003 }
1004
1005 pub fn is_geo_sorting_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
1007 let geo_sortable = self.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
1008 Ok(geo_sortable)
1009 }
1010
1011 pub fn is_geo_filtering_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
1013 let geo_filter =
1014 self.filterable_attributes_rules(rtxn)?.iter().any(|field| field.has_geo());
1015 Ok(geo_filter)
1016 }
1017
1018 pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
1019 let asc_desc_fields = self
1020 .criteria(rtxn)?
1021 .into_iter()
1022 .filter_map(|criterion| match criterion {
1023 Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
1024 _otherwise => None,
1025 })
1026 .collect();
1027
1028 Ok(asc_desc_fields)
1029 }
1030
1031 pub fn null_faceted_documents_ids(
1035 &self,
1036 rtxn: &RoTxn<'_>,
1037 field_id: FieldId,
1038 ) -> heed::Result<RoaringBitmap> {
1039 match self.facet_id_is_null_docids.get(rtxn, &field_id)? {
1040 Some(docids) => Ok(docids),
1041 None => Ok(RoaringBitmap::new()),
1042 }
1043 }
1044
1045 pub fn empty_faceted_documents_ids(
1047 &self,
1048 rtxn: &RoTxn<'_>,
1049 field_id: FieldId,
1050 ) -> heed::Result<RoaringBitmap> {
1051 match self.facet_id_is_empty_docids.get(rtxn, &field_id)? {
1052 Some(docids) => Ok(docids),
1053 None => Ok(RoaringBitmap::new()),
1054 }
1055 }
1056
1057 pub fn exists_faceted_documents_ids(
1059 &self,
1060 rtxn: &RoTxn<'_>,
1061 field_id: FieldId,
1062 ) -> heed::Result<RoaringBitmap> {
1063 match self.facet_id_exists_docids.get(rtxn, &field_id)? {
1064 Some(docids) => Ok(docids),
1065 None => Ok(RoaringBitmap::new()),
1066 }
1067 }
1068
1069 pub(crate) fn put_distinct_field(
1072 &self,
1073 wtxn: &mut RwTxn<'_>,
1074 distinct_field: &str,
1075 ) -> heed::Result<()> {
1076 self.main.remap_types::<Str, Str>().put(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field)
1077 }
1078
1079 pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn<'_>) -> heed::Result<Option<&'a str>> {
1080 self.main.remap_types::<Str, Str>().get(rtxn, main_key::DISTINCT_FIELD_KEY)
1081 }
1082
1083 pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1084 self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISTINCT_FIELD_KEY)
1085 }
1086
1087 pub(crate) fn put_criteria(
1090 &self,
1091 wtxn: &mut RwTxn<'_>,
1092 criteria: &[Criterion],
1093 ) -> heed::Result<()> {
1094 self.main.remap_types::<Str, SerdeJson<&[Criterion]>>().put(
1095 wtxn,
1096 main_key::CRITERIA_KEY,
1097 &criteria,
1098 )
1099 }
1100
1101 pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1102 self.main.remap_key_type::<Str>().delete(wtxn, main_key::CRITERIA_KEY)
1103 }
1104
1105 pub fn criteria(&self, rtxn: &RoTxn<'_>) -> heed::Result<Vec<Criterion>> {
1106 match self
1107 .main
1108 .remap_types::<Str, SerdeJson<Vec<Criterion>>>()
1109 .get(rtxn, main_key::CRITERIA_KEY)?
1110 {
1111 Some(criteria) => Ok(criteria),
1112 None => Ok(default_criteria()),
1113 }
1114 }
1115
1116 pub(crate) fn put_words_fst<A: AsRef<[u8]>>(
1120 &self,
1121 wtxn: &mut RwTxn<'_>,
1122 fst: &fst::Set<A>,
1123 ) -> heed::Result<()> {
1124 self.main.remap_types::<Str, Bytes>().put(
1125 wtxn,
1126 main_key::WORDS_FST_KEY,
1127 fst.as_fst().as_bytes(),
1128 )
1129 }
1130
1131 pub fn words_fst<'t>(&self, rtxn: &'t RoTxn<'_>) -> Result<fst::Set<Cow<'t, [u8]>>> {
1133 match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_FST_KEY)? {
1134 Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
1135 None => Ok(fst::Set::default().map_data(Cow::Owned)?),
1136 }
1137 }
1138
1139 pub(crate) fn put_stop_words<A: AsRef<[u8]>>(
1142 &self,
1143 wtxn: &mut RwTxn<'_>,
1144 fst: &fst::Set<A>,
1145 ) -> heed::Result<()> {
1146 self.main.remap_types::<Str, Bytes>().put(
1147 wtxn,
1148 main_key::STOP_WORDS_KEY,
1149 fst.as_fst().as_bytes(),
1150 )
1151 }
1152
1153 pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1154 self.main.remap_key_type::<Str>().delete(wtxn, main_key::STOP_WORDS_KEY)
1155 }
1156
1157 pub fn stop_words<'t>(&self, rtxn: &'t RoTxn<'t>) -> Result<Option<fst::Set<&'t [u8]>>> {
1158 match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::STOP_WORDS_KEY)? {
1159 Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
1160 None => Ok(None),
1161 }
1162 }
1163
1164 pub(crate) fn put_non_separator_tokens(
1167 &self,
1168 wtxn: &mut RwTxn<'_>,
1169 set: &BTreeSet<String>,
1170 ) -> heed::Result<()> {
1171 self.main.remap_types::<Str, SerdeBincode<_>>().put(
1172 wtxn,
1173 main_key::NON_SEPARATOR_TOKENS_KEY,
1174 set,
1175 )
1176 }
1177
1178 pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1179 self.main.remap_key_type::<Str>().delete(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
1180 }
1181
1182 pub fn non_separator_tokens(&self, rtxn: &RoTxn<'_>) -> Result<Option<BTreeSet<String>>> {
1183 Ok(self
1184 .main
1185 .remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
1186 .get(rtxn, main_key::NON_SEPARATOR_TOKENS_KEY)?)
1187 }
1188
1189 pub(crate) fn put_separator_tokens(
1192 &self,
1193 wtxn: &mut RwTxn<'_>,
1194 set: &BTreeSet<String>,
1195 ) -> heed::Result<()> {
1196 self.main.remap_types::<Str, SerdeBincode<_>>().put(
1197 wtxn,
1198 main_key::SEPARATOR_TOKENS_KEY,
1199 set,
1200 )
1201 }
1202
1203 pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1204 self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEPARATOR_TOKENS_KEY)
1205 }
1206
1207 pub fn separator_tokens(&self, rtxn: &RoTxn<'_>) -> Result<Option<BTreeSet<String>>> {
1208 Ok(self
1209 .main
1210 .remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
1211 .get(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
1212 }
1213
1214 pub fn allowed_separators(&self, rtxn: &RoTxn<'_>) -> Result<Option<BTreeSet<String>>> {
1217 let default_separators =
1218 charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string());
1219 let mut separators: Option<BTreeSet<_>> = None;
1220 if let Some(mut separator_tokens) = self.separator_tokens(rtxn)? {
1221 separator_tokens.extend(default_separators.clone());
1222 separators = Some(separator_tokens);
1223 }
1224
1225 if let Some(non_separator_tokens) = self.non_separator_tokens(rtxn)? {
1226 separators = separators
1227 .or_else(|| Some(default_separators.collect()))
1228 .map(|separators| &separators - &non_separator_tokens);
1229 }
1230
1231 Ok(separators)
1232 }
1233
1234 pub(crate) fn put_dictionary(
1237 &self,
1238 wtxn: &mut RwTxn<'_>,
1239 set: &BTreeSet<String>,
1240 ) -> heed::Result<()> {
1241 self.main.remap_types::<Str, SerdeBincode<_>>().put(wtxn, main_key::DICTIONARY_KEY, set)
1242 }
1243
1244 pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1245 self.main.remap_key_type::<Str>().delete(wtxn, main_key::DICTIONARY_KEY)
1246 }
1247
1248 pub fn dictionary(&self, rtxn: &RoTxn<'_>) -> Result<Option<BTreeSet<String>>> {
1249 Ok(self
1250 .main
1251 .remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
1252 .get(rtxn, main_key::DICTIONARY_KEY)?)
1253 }
1254
1255 pub(crate) fn put_synonyms(
1258 &self,
1259 wtxn: &mut RwTxn<'_>,
1260 synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
1261 user_defined_synonyms: &BTreeMap<String, Vec<String>>,
1262 ) -> heed::Result<()> {
1263 self.main.remap_types::<Str, SerdeBincode<_>>().put(
1264 wtxn,
1265 main_key::SYNONYMS_KEY,
1266 synonyms,
1267 )?;
1268 self.main.remap_types::<Str, SerdeBincode<_>>().put(
1269 wtxn,
1270 main_key::USER_DEFINED_SYNONYMS_KEY,
1271 user_defined_synonyms,
1272 )
1273 }
1274
1275 pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1276 self.main.remap_key_type::<Str>().delete(wtxn, main_key::SYNONYMS_KEY)?;
1277 self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
1278 }
1279
1280 pub fn user_defined_synonyms(
1281 &self,
1282 rtxn: &RoTxn<'_>,
1283 ) -> heed::Result<BTreeMap<String, Vec<String>>> {
1284 Ok(self
1285 .main
1286 .remap_types::<Str, SerdeBincode<_>>()
1287 .get(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
1288 .unwrap_or_default())
1289 }
1290
1291 pub fn synonyms(
1292 &self,
1293 rtxn: &RoTxn<'_>,
1294 ) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
1295 Ok(self
1296 .main
1297 .remap_types::<Str, SerdeBincode<_>>()
1298 .get(rtxn, main_key::SYNONYMS_KEY)?
1299 .unwrap_or_default())
1300 }
1301
1302 pub fn words_synonyms<S: AsRef<str>>(
1303 &self,
1304 rtxn: &RoTxn<'_>,
1305 words: &[S],
1306 ) -> heed::Result<Option<Vec<Vec<String>>>> {
1307 let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect();
1308 Ok(self.synonyms(rtxn)?.remove(&words))
1309 }
1310
1311 pub(crate) fn put_words_prefixes_fst<A: AsRef<[u8]>>(
1315 &self,
1316 wtxn: &mut RwTxn<'_>,
1317 fst: &fst::Set<A>,
1318 ) -> heed::Result<()> {
1319 self.main.remap_types::<Str, Bytes>().put(
1320 wtxn,
1321 main_key::WORDS_PREFIXES_FST_KEY,
1322 fst.as_fst().as_bytes(),
1323 )
1324 }
1325
1326 pub(crate) fn delete_words_prefixes_fst(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1327 self.main.remap_key_type::<Str>().delete(wtxn, main_key::WORDS_PREFIXES_FST_KEY)
1328 }
1329
1330 pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn<'t>) -> Result<fst::Set<Cow<'t, [u8]>>> {
1332 match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
1333 Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
1334 None => Ok(fst::Set::default().map_data(Cow::Owned)?),
1335 }
1336 }
1337
1338 pub fn word_documents_count(&self, rtxn: &RoTxn<'_>, word: &str) -> heed::Result<Option<u64>> {
1343 self.word_docids.remap_data_type::<RoaringBitmapLenCodec>().get(rtxn, word)
1344 }
1345
1346 pub fn document<'t>(&self, rtxn: &'t RoTxn, id: DocumentId) -> Result<&'t obkv::KvReaderU16> {
1350 self.documents
1351 .get(rtxn, &id)?
1352 .ok_or(UserError::UnknownInternalDocumentId { document_id: id })
1353 .map_err(Into::into)
1354 }
1355
1356 pub fn iter_documents<'a, 't: 'a>(
1358 &'a self,
1359 rtxn: &'t RoTxn<'t>,
1360 ids: impl IntoIterator<Item = DocumentId> + 'a,
1361 ) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
1362 Ok(ids.into_iter().map(move |id| {
1363 let kv = self
1364 .documents
1365 .get(rtxn, &id)?
1366 .ok_or(UserError::UnknownInternalDocumentId { document_id: id })?;
1367 Ok((id, kv))
1368 }))
1369 }
1370
1371 pub fn documents<'t>(
1373 &self,
1374 rtxn: &'t RoTxn<'t>,
1375 ids: impl IntoIterator<Item = DocumentId>,
1376 ) -> Result<Vec<(DocumentId, &'t obkv::KvReaderU16)>> {
1377 self.iter_documents(rtxn, ids)?.collect()
1378 }
1379
1380 pub fn all_documents<'a, 't: 'a>(
1382 &'a self,
1383 rtxn: &'t RoTxn<'t>,
1384 ) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
1385 self.iter_documents(rtxn, self.documents_ids(rtxn)?)
1386 }
1387
1388 pub fn external_id_of<'a, 't: 'a>(
1389 &'a self,
1390 rtxn: &'t RoTxn<'t>,
1391 ids: impl IntoIterator<Item = DocumentId> + 'a,
1392 ) -> Result<impl IntoIterator<Item = Result<String>> + 'a> {
1393 let fields = self.fields_ids_map(rtxn)?;
1394
1395 let primary_key = self.primary_key(rtxn)?.ok_or(InternalError::DatabaseMissingEntry {
1397 db_name: db_name::MAIN,
1398 key: Some(main_key::PRIMARY_KEY_KEY),
1399 })?;
1400 let primary_key = PrimaryKey::new(primary_key, &fields).ok_or_else(|| {
1401 InternalError::FieldIdMapMissingEntry(crate::FieldIdMapMissingEntry::FieldName {
1402 field_name: primary_key.to_owned(),
1403 process: "external_id_of",
1404 })
1405 })?;
1406 Ok(self.iter_documents(rtxn, ids)?.map(move |entry| -> Result<_> {
1407 let (_docid, obkv) = entry?;
1408 match primary_key.document_id(obkv, &fields)? {
1409 Ok(document_id) => Ok(document_id),
1410 Err(_) => Err(InternalError::DocumentsError(
1411 crate::documents::Error::InvalidDocumentFormat,
1412 )
1413 .into()),
1414 }
1415 }))
1416 }
1417
1418 pub fn facets_distribution<'a>(&'a self, rtxn: &'a RoTxn<'a>) -> FacetDistribution<'a> {
1419 FacetDistribution::new(rtxn, self)
1420 }
1421
1422 pub fn search<'a>(&'a self, rtxn: &'a RoTxn<'a>) -> Search<'a> {
1423 Search::new(rtxn, self)
1424 }
1425
1426 pub fn created_at(&self, rtxn: &RoTxn<'_>) -> Result<time::OffsetDateTime> {
1428 Ok(self
1429 .main
1430 .remap_types::<Str, SerdeJson<OffsetDateTime>>()
1431 .get(rtxn, main_key::CREATED_AT_KEY)?
1432 .ok_or(InternalError::DatabaseMissingEntry {
1433 db_name: db_name::MAIN,
1434 key: Some(main_key::CREATED_AT_KEY),
1435 })?
1436 .0)
1437 }
1438
1439 pub fn updated_at(&self, rtxn: &RoTxn<'_>) -> Result<time::OffsetDateTime> {
1441 Ok(self
1442 .main
1443 .remap_types::<Str, SerdeJson<OffsetDateTime>>()
1444 .get(rtxn, main_key::UPDATED_AT_KEY)?
1445 .ok_or(InternalError::DatabaseMissingEntry {
1446 db_name: db_name::MAIN,
1447 key: Some(main_key::UPDATED_AT_KEY),
1448 })?
1449 .0)
1450 }
1451
1452 pub(crate) fn set_updated_at(
1453 &self,
1454 wtxn: &mut RwTxn<'_>,
1455 time: &time::OffsetDateTime,
1456 ) -> heed::Result<()> {
1457 self.main.remap_types::<Str, SerdeJson<OffsetDateTime>>().put(
1458 wtxn,
1459 main_key::UPDATED_AT_KEY,
1460 &OffsetDateTime(*time),
1461 )
1462 }
1463
1464 pub fn authorize_typos(&self, txn: &RoTxn<'_>) -> heed::Result<bool> {
1465 match self.main.remap_types::<Str, U8>().get(txn, main_key::AUTHORIZE_TYPOS)? {
1469 Some(0) => Ok(false),
1470 _ => Ok(true),
1471 }
1472 }
1473
1474 pub(crate) fn put_authorize_typos(&self, txn: &mut RwTxn<'_>, flag: bool) -> heed::Result<()> {
1475 self.main.remap_types::<Str, U8>().put(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?;
1479
1480 Ok(())
1481 }
1482
1483 pub fn min_word_len_one_typo(&self, txn: &RoTxn<'_>) -> heed::Result<u8> {
1484 Ok(self
1488 .main
1489 .remap_types::<Str, U8>()
1490 .get(txn, main_key::ONE_TYPO_WORD_LEN)?
1491 .unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO))
1492 }
1493
1494 pub(crate) fn put_min_word_len_one_typo(
1495 &self,
1496 txn: &mut RwTxn<'_>,
1497 val: u8,
1498 ) -> heed::Result<()> {
1499 self.main.remap_types::<Str, U8>().put(txn, main_key::ONE_TYPO_WORD_LEN, &val)?;
1503 Ok(())
1504 }
1505
1506 pub fn min_word_len_two_typos(&self, txn: &RoTxn<'_>) -> heed::Result<u8> {
1507 Ok(self
1511 .main
1512 .remap_types::<Str, U8>()
1513 .get(txn, main_key::TWO_TYPOS_WORD_LEN)?
1514 .unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
1515 }
1516
1517 pub(crate) fn put_min_word_len_two_typos(
1518 &self,
1519 txn: &mut RwTxn<'_>,
1520 val: u8,
1521 ) -> heed::Result<()> {
1522 self.main.remap_types::<Str, U8>().put(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?;
1526 Ok(())
1527 }
1528
1529 pub fn exact_words<'t>(&self, txn: &'t RoTxn<'t>) -> Result<Option<fst::Set<Cow<'t, [u8]>>>> {
1531 match self.main.remap_types::<Str, Bytes>().get(txn, main_key::EXACT_WORDS)? {
1532 Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)),
1533 None => Ok(None),
1534 }
1535 }
1536
1537 pub(crate) fn put_exact_words<A: AsRef<[u8]>>(
1538 &self,
1539 txn: &mut RwTxn<'_>,
1540 words: &fst::Set<A>,
1541 ) -> Result<()> {
1542 self.main.remap_types::<Str, Bytes>().put(
1543 txn,
1544 main_key::EXACT_WORDS,
1545 words.as_fst().as_bytes(),
1546 )?;
1547 Ok(())
1548 }
1549
1550 pub fn exact_attributes<'t>(&self, txn: &'t RoTxn<'t>) -> Result<Vec<&'t str>> {
1552 Ok(self
1553 .main
1554 .remap_types::<Str, SerdeBincode<Vec<&str>>>()
1555 .get(txn, main_key::EXACT_ATTRIBUTES)?
1556 .unwrap_or_default())
1557 }
1558
1559 pub fn exact_attributes_ids(&self, txn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
1561 let attrs = self.exact_attributes(txn)?;
1562 let fid_map = self.fields_ids_map(txn)?;
1563 Ok(attrs.iter().filter_map(|attr| fid_map.id(attr)).collect())
1564 }
1565
1566 pub(crate) fn put_exact_attributes(&self, txn: &mut RwTxn<'_>, attrs: &[&str]) -> Result<()> {
1568 self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
1569 txn,
1570 main_key::EXACT_ATTRIBUTES,
1571 &attrs,
1572 )?;
1573 Ok(())
1574 }
1575
1576 pub(crate) fn delete_exact_attributes(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
1578 self.main.remap_key_type::<Str>().delete(txn, main_key::EXACT_ATTRIBUTES)
1579 }
1580
1581 pub fn max_values_per_facet(&self, txn: &RoTxn<'_>) -> heed::Result<Option<u64>> {
1582 self.main.remap_types::<Str, BEU64>().get(txn, main_key::MAX_VALUES_PER_FACET)
1583 }
1584
1585 pub(crate) fn put_max_values_per_facet(
1586 &self,
1587 txn: &mut RwTxn<'_>,
1588 val: u64,
1589 ) -> heed::Result<()> {
1590 self.main.remap_types::<Str, BEU64>().put(txn, main_key::MAX_VALUES_PER_FACET, &val)
1591 }
1592
1593 pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
1594 self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET)
1595 }
1596
1597 pub fn sort_facet_values_by(&self, txn: &RoTxn<'_>) -> heed::Result<OrderByMap> {
1598 let orders = self
1599 .main
1600 .remap_types::<Str, SerdeJson<OrderByMap>>()
1601 .get(txn, main_key::SORT_FACET_VALUES_BY)?
1602 .unwrap_or_default();
1603 Ok(orders)
1604 }
1605
1606 pub(crate) fn put_sort_facet_values_by(
1607 &self,
1608 txn: &mut RwTxn<'_>,
1609 val: &OrderByMap,
1610 ) -> heed::Result<()> {
1611 self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val)
1612 }
1613
1614 pub(crate) fn delete_sort_facet_values_by(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
1615 self.main.remap_key_type::<Str>().delete(txn, main_key::SORT_FACET_VALUES_BY)
1616 }
1617
1618 pub fn pagination_max_total_hits(&self, txn: &RoTxn<'_>) -> heed::Result<Option<u64>> {
1619 self.main.remap_types::<Str, BEU64>().get(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
1620 }
1621
1622 pub(crate) fn put_pagination_max_total_hits(
1623 &self,
1624 txn: &mut RwTxn<'_>,
1625 val: u64,
1626 ) -> heed::Result<()> {
1627 self.main.remap_types::<Str, BEU64>().put(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val)
1628 }
1629
1630 pub(crate) fn delete_pagination_max_total_hits(
1631 &self,
1632 txn: &mut RwTxn<'_>,
1633 ) -> heed::Result<bool> {
1634 self.main.remap_key_type::<Str>().delete(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
1635 }
1636
1637 pub fn proximity_precision(&self, txn: &RoTxn<'_>) -> heed::Result<Option<ProximityPrecision>> {
1638 self.main
1639 .remap_types::<Str, SerdeBincode<ProximityPrecision>>()
1640 .get(txn, main_key::PROXIMITY_PRECISION)
1641 }
1642
1643 pub(crate) fn put_proximity_precision(
1644 &self,
1645 txn: &mut RwTxn<'_>,
1646 val: ProximityPrecision,
1647 ) -> heed::Result<()> {
1648 self.main.remap_types::<Str, SerdeBincode<ProximityPrecision>>().put(
1649 txn,
1650 main_key::PROXIMITY_PRECISION,
1651 &val,
1652 )
1653 }
1654
1655 pub(crate) fn delete_proximity_precision(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
1656 self.main.remap_key_type::<Str>().delete(txn, main_key::PROXIMITY_PRECISION)
1657 }
1658
1659 pub fn prefix_search(&self, txn: &RoTxn<'_>) -> heed::Result<Option<PrefixSearch>> {
1660 self.main.remap_types::<Str, SerdeBincode<PrefixSearch>>().get(txn, main_key::PREFIX_SEARCH)
1661 }
1662
1663 pub(crate) fn put_prefix_search(
1664 &self,
1665 txn: &mut RwTxn<'_>,
1666 val: PrefixSearch,
1667 ) -> heed::Result<()> {
1668 self.main.remap_types::<Str, SerdeBincode<PrefixSearch>>().put(
1669 txn,
1670 main_key::PREFIX_SEARCH,
1671 &val,
1672 )
1673 }
1674
1675 pub(crate) fn delete_prefix_search(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
1676 self.main.remap_key_type::<Str>().delete(txn, main_key::PREFIX_SEARCH)
1677 }
1678
1679 pub fn facet_search(&self, txn: &RoTxn<'_>) -> heed::Result<bool> {
1680 self.main
1681 .remap_types::<Str, SerdeBincode<bool>>()
1682 .get(txn, main_key::FACET_SEARCH)
1683 .map(|v| v.unwrap_or(true))
1684 }
1685
1686 pub(crate) fn put_facet_search(&self, txn: &mut RwTxn<'_>, val: bool) -> heed::Result<()> {
1687 self.main.remap_types::<Str, SerdeBincode<bool>>().put(txn, main_key::FACET_SEARCH, &val)
1688 }
1689
1690 pub(crate) fn delete_facet_search(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
1691 self.main.remap_key_type::<Str>().delete(txn, main_key::FACET_SEARCH)
1692 }
1693
1694 pub fn localized_attributes_rules(
1695 &self,
1696 rtxn: &RoTxn<'_>,
1697 ) -> heed::Result<Option<Vec<LocalizedAttributesRule>>> {
1698 self.main
1699 .remap_types::<Str, SerdeJson<Vec<LocalizedAttributesRule>>>()
1700 .get(rtxn, main_key::LOCALIZED_ATTRIBUTES_RULES)
1701 }
1702
1703 pub(crate) fn put_localized_attributes_rules(
1704 &self,
1705 txn: &mut RwTxn<'_>,
1706 val: Vec<LocalizedAttributesRule>,
1707 ) -> heed::Result<()> {
1708 self.main.remap_types::<Str, SerdeJson<Vec<LocalizedAttributesRule>>>().put(
1709 txn,
1710 main_key::LOCALIZED_ATTRIBUTES_RULES,
1711 &val,
1712 )
1713 }
1714
1715 pub(crate) fn delete_localized_attributes_rules(
1716 &self,
1717 txn: &mut RwTxn<'_>,
1718 ) -> heed::Result<bool> {
1719 self.main.remap_key_type::<Str>().delete(txn, main_key::LOCALIZED_ATTRIBUTES_RULES)
1720 }
1721
1722 pub(crate) fn put_embedding_configs(
1727 &self,
1728 wtxn: &mut RwTxn<'_>,
1729 configs: Vec<IndexEmbeddingConfig>,
1730 ) -> heed::Result<()> {
1731 self.main.remap_types::<Str, SerdeJson<Vec<IndexEmbeddingConfig>>>().put(
1732 wtxn,
1733 main_key::EMBEDDING_CONFIGS,
1734 &configs,
1735 )
1736 }
1737
1738 pub(crate) fn delete_embedding_configs(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1739 self.main.remap_key_type::<Str>().delete(wtxn, main_key::EMBEDDING_CONFIGS)
1740 }
1741
1742 pub fn embedding_configs(&self, rtxn: &RoTxn<'_>) -> Result<Vec<IndexEmbeddingConfig>> {
1743 Ok(self
1744 .main
1745 .remap_types::<Str, SerdeJson<Vec<IndexEmbeddingConfig>>>()
1746 .get(rtxn, main_key::EMBEDDING_CONFIGS)?
1747 .unwrap_or_default())
1748 }
1749
1750 pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
1751 self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
1752 }
1753
1754 pub fn search_cutoff(&self, rtxn: &RoTxn<'_>) -> Result<Option<u64>> {
1755 Ok(self.main.remap_types::<Str, BEU64>().get(rtxn, main_key::SEARCH_CUTOFF)?)
1756 }
1757
1758 pub(crate) fn delete_search_cutoff(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
1759 self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCH_CUTOFF)
1760 }
1761
1762 pub fn embeddings(
1763 &self,
1764 rtxn: &RoTxn<'_>,
1765 docid: DocumentId,
1766 ) -> Result<BTreeMap<String, Vec<Embedding>>> {
1767 let mut res = BTreeMap::new();
1768 let embedding_configs = self.embedding_configs(rtxn)?;
1769 for config in embedding_configs {
1770 let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
1771 let reader =
1772 ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
1773 let embeddings = reader.item_vectors(rtxn, docid)?;
1774 res.insert(config.name.to_owned(), embeddings);
1775 }
1776 Ok(res)
1777 }
1778
1779 pub fn prefix_settings(&self, rtxn: &RoTxn<'_>) -> Result<PrefixSettings> {
1780 let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default();
1781 Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
1782 }
1783
1784 pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result<ArroyStats> {
1785 let mut stats = ArroyStats::default();
1786 let embedding_configs = self.embedding_configs(rtxn)?;
1787 for config in embedding_configs {
1788 let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
1789 let reader =
1790 ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
1791 reader.aggregate_stats(rtxn, &mut stats)?;
1792 }
1793 Ok(stats)
1794 }
1795
1796 pub fn contains_word(&self, rtxn: &RoTxn<'_>, word: &str) -> Result<bool> {
1805 Ok(self.word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some()
1806 || self.exact_word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some())
1807 }
1808
1809 pub fn database_sizes(&self, rtxn: &RoTxn<'_>) -> heed::Result<IndexMap<&'static str, usize>> {
1811 let Self {
1812 env: _,
1813 main,
1814 external_documents_ids,
1815 word_docids,
1816 exact_word_docids,
1817 word_prefix_docids,
1818 exact_word_prefix_docids,
1819 word_pair_proximity_docids,
1820 word_position_docids,
1821 word_fid_docids,
1822 word_prefix_position_docids,
1823 word_prefix_fid_docids,
1824 field_id_word_count_docids,
1825 facet_id_f64_docids,
1826 facet_id_string_docids,
1827 facet_id_normalized_string_strings,
1828 facet_id_string_fst,
1829 facet_id_exists_docids,
1830 facet_id_is_null_docids,
1831 facet_id_is_empty_docids,
1832 field_id_docid_facet_f64s,
1833 field_id_docid_facet_strings,
1834 vector_arroy,
1835 embedder_category_id,
1836 documents,
1837 } = self;
1838
1839 fn compute_size(stats: DatabaseStat) -> usize {
1840 let DatabaseStat {
1841 page_size,
1842 depth: _,
1843 branch_pages,
1844 leaf_pages,
1845 overflow_pages,
1846 entries: _,
1847 } = stats;
1848
1849 (branch_pages + leaf_pages + overflow_pages) * page_size as usize
1850 }
1851
1852 let mut sizes = IndexMap::new();
1853 sizes.insert("main", main.stat(rtxn).map(compute_size)?);
1854 sizes
1855 .insert("external_documents_ids", external_documents_ids.stat(rtxn).map(compute_size)?);
1856 sizes.insert("word_docids", word_docids.stat(rtxn).map(compute_size)?);
1857 sizes.insert("exact_word_docids", exact_word_docids.stat(rtxn).map(compute_size)?);
1858 sizes.insert("word_prefix_docids", word_prefix_docids.stat(rtxn).map(compute_size)?);
1859 sizes.insert(
1860 "exact_word_prefix_docids",
1861 exact_word_prefix_docids.stat(rtxn).map(compute_size)?,
1862 );
1863 sizes.insert(
1864 "word_pair_proximity_docids",
1865 word_pair_proximity_docids.stat(rtxn).map(compute_size)?,
1866 );
1867 sizes.insert("word_position_docids", word_position_docids.stat(rtxn).map(compute_size)?);
1868 sizes.insert("word_fid_docids", word_fid_docids.stat(rtxn).map(compute_size)?);
1869 sizes.insert(
1870 "word_prefix_position_docids",
1871 word_prefix_position_docids.stat(rtxn).map(compute_size)?,
1872 );
1873 sizes
1874 .insert("word_prefix_fid_docids", word_prefix_fid_docids.stat(rtxn).map(compute_size)?);
1875 sizes.insert(
1876 "field_id_word_count_docids",
1877 field_id_word_count_docids.stat(rtxn).map(compute_size)?,
1878 );
1879 sizes.insert("facet_id_f64_docids", facet_id_f64_docids.stat(rtxn).map(compute_size)?);
1880 sizes
1881 .insert("facet_id_string_docids", facet_id_string_docids.stat(rtxn).map(compute_size)?);
1882 sizes.insert(
1883 "facet_id_normalized_string_strings",
1884 facet_id_normalized_string_strings.stat(rtxn).map(compute_size)?,
1885 );
1886 sizes.insert("facet_id_string_fst", facet_id_string_fst.stat(rtxn).map(compute_size)?);
1887 sizes
1888 .insert("facet_id_exists_docids", facet_id_exists_docids.stat(rtxn).map(compute_size)?);
1889 sizes.insert(
1890 "facet_id_is_null_docids",
1891 facet_id_is_null_docids.stat(rtxn).map(compute_size)?,
1892 );
1893 sizes.insert(
1894 "facet_id_is_empty_docids",
1895 facet_id_is_empty_docids.stat(rtxn).map(compute_size)?,
1896 );
1897 sizes.insert(
1898 "field_id_docid_facet_f64s",
1899 field_id_docid_facet_f64s.stat(rtxn).map(compute_size)?,
1900 );
1901 sizes.insert(
1902 "field_id_docid_facet_strings",
1903 field_id_docid_facet_strings.stat(rtxn).map(compute_size)?,
1904 );
1905 sizes.insert("vector_arroy", vector_arroy.stat(rtxn).map(compute_size)?);
1906 sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?);
1907 sizes.insert("documents", documents.stat(rtxn).map(compute_size)?);
1908
1909 Ok(sizes)
1910 }
1911}
1912
1913#[derive(Debug, Deserialize, Serialize)]
1914pub struct IndexEmbeddingConfig {
1915 pub name: String,
1916 pub config: EmbeddingConfig,
1917 pub user_provided: RoaringBitmap,
1918}
1919
1920#[derive(Debug, Deserialize, Serialize)]
1921pub struct PrefixSettings {
1922 pub prefix_count_threshold: usize,
1923 pub max_prefix_length: usize,
1924 pub compute_prefixes: PrefixSearch,
1925}
1926
1927#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
1928#[serde(rename_all = "camelCase")]
1929pub enum PrefixSearch {
1930 #[default]
1931 IndexingTime,
1932 Disabled,
1933}
1934
1935#[derive(Debug)]
1936pub enum RollbackOutcome {
1937 VersionMismatch {
1938 requested_version: (u32, u32, u32),
1939 rollback_version: Option<(u32, u32, u32)>,
1940 },
1941 Rollback,
1942 NoRollback,
1943}
1944
1945impl RollbackOutcome {
1946 pub fn succeeded(&self) -> bool {
1947 matches!(self, RollbackOutcome::Rollback | RollbackOutcome::NoRollback)
1948 }
1949}
1950
1951impl std::fmt::Display for RollbackOutcome {
1952 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1953 match self {
1954 RollbackOutcome::VersionMismatch { requested_version, rollback_version: Some(rollback_version) } => write!(f, "cannot rollback to the requested version\n - note: requested version is v{}.{}.{}\n - note: only possible to rollback to v{}.{}.{}",
1955 requested_version.0, requested_version.1, requested_version.2, rollback_version.0, rollback_version.1, rollback_version.2),
1956 RollbackOutcome::VersionMismatch { requested_version, rollback_version: None } => write!(f, "cannot rollback to the requested version\n - note: requested version is v{}.{}.{}\n - note: only possible to rollback to an unknown version",
1957 requested_version.0, requested_version.1, requested_version.2),
1958 RollbackOutcome::Rollback => f.write_str("rollback complete"),
1959 RollbackOutcome::NoRollback => f.write_str("no rollback necessary"),
1960 }
1961 }
1962}
1963
1964#[derive(Serialize, Deserialize)]
1965#[serde(transparent)]
1966struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] time::OffsetDateTime);
1967
1968#[cfg(test)]
1969#[path = "test_index.rs"]
1970pub(crate) mod tests;