1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
2use std::convert::TryInto;
3use std::num::NonZeroUsize;
4use std::result::Result as StdResult;
5use std::sync::Arc;
6
7use charabia::{Normalize, Tokenizer, TokenizerBuilder};
8use deserr::{DeserializeError, Deserr};
9use itertools::{merge_join_by, EitherOrBoth, Itertools};
10use roaring::RoaringBitmap;
11use serde::{Deserialize, Deserializer, Serialize, Serializer};
12use time::OffsetDateTime;
13
14use super::del_add::{DelAdd, DelAddOperation};
15use super::index_documents::{IndexDocumentsConfig, Transform};
16use super::IndexerConfig;
17use crate::attribute_patterns::PatternMatch;
18use crate::constants::RESERVED_GEO_FIELD_NAME;
19use crate::criterion::Criterion;
20use crate::disabled_typos_terms::DisabledTyposTerms;
21use crate::error::UserError;
22use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
23use crate::filterable_attributes_rules::match_faceted_field;
24use crate::index::{
25 IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO,
26 DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
27};
28use crate::order_by_map::OrderByMap;
29use crate::prompt::default_max_bytes;
30use crate::proximity::ProximityPrecision;
31use crate::update::index_documents::IndexDocumentsMethod;
32use crate::update::{IndexDocuments, UpdateIndexingStep};
33use crate::vector::settings::{
34 EmbedderAction, EmbedderSource, EmbeddingSettings, NestingContext, ReindexAction,
35 SubEmbeddingSettings, WriteBackToDocuments,
36};
37use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
38use crate::{FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result};
39
40#[derive(Debug, Clone, PartialEq, Eq, Copy)]
41pub enum Setting<T> {
42 Set(T),
43 Reset,
44 NotSet,
45}
46
47impl<T, E> Deserr<E> for Setting<T>
48where
49 T: Deserr<E>,
50 E: DeserializeError,
51{
52 fn deserialize_from_value<V: deserr::IntoValue>(
53 value: deserr::Value<V>,
54 location: deserr::ValuePointerRef<'_>,
55 ) -> std::result::Result<Self, E> {
56 match value {
57 deserr::Value::Null => Ok(Setting::Reset),
58 _ => T::deserialize_from_value(value, location).map(Setting::Set),
59 }
60 }
61}
62
63impl<T> Default for Setting<T> {
64 fn default() -> Self {
65 Self::NotSet
66 }
67}
68
69impl<T> Setting<T> {
70 pub fn set(self) -> Option<T> {
71 match self {
72 Self::Set(value) => Some(value),
73 _ => None,
74 }
75 }
76
77 pub fn some_or_not_set(option: Option<T>) -> Self {
78 match option {
79 Some(value) => Setting::Set(value),
80 None => Setting::NotSet,
81 }
82 }
83
84 pub const fn as_ref(&self) -> Setting<&T> {
85 match *self {
86 Self::Set(ref value) => Setting::Set(value),
87 Self::Reset => Setting::Reset,
88 Self::NotSet => Setting::NotSet,
89 }
90 }
91
92 pub const fn is_not_set(&self) -> bool {
93 matches!(self, Self::NotSet)
94 }
95
96 pub fn or_reset(self, val: T) -> Self {
98 match self {
99 Self::Reset => Self::Set(val),
100 otherwise => otherwise,
101 }
102 }
103
104 pub fn or(self, other: Self) -> Self {
106 match self {
107 Setting::Set(_) | Setting::Reset => self,
108 Setting::NotSet => other,
109 }
110 }
111
112 pub fn apply(&mut self, new: Self) -> bool
114 where
115 T: PartialEq + Eq,
116 {
117 if let Setting::NotSet = new {
118 return false;
119 }
120 if self == &new {
121 return false;
122 }
123 *self = new;
124 true
125 }
126}
127
128impl<T: Serialize> Serialize for Setting<T> {
129 fn serialize<S>(&self, serializer: S) -> StdResult<S::Ok, S::Error>
130 where
131 S: Serializer,
132 {
133 match self {
134 Self::Set(value) => Some(value),
135 Self::NotSet | Self::Reset => None,
137 }
138 .serialize(serializer)
139 }
140}
141
142impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
143 fn deserialize<D>(deserializer: D) -> StdResult<Self, D::Error>
144 where
145 D: Deserializer<'de>,
146 {
147 Deserialize::deserialize(deserializer).map(|x| match x {
148 Some(x) => Self::Set(x),
149 None => Self::Reset, })
151 }
152}
153
154pub struct Settings<'a, 't, 'i> {
155 wtxn: &'t mut heed::RwTxn<'i>,
156 index: &'i Index,
157
158 indexer_config: &'a IndexerConfig,
159
160 searchable_fields: Setting<Vec<String>>,
161 displayed_fields: Setting<Vec<String>>,
162 filterable_fields: Setting<Vec<FilterableAttributesRule>>,
163 sortable_fields: Setting<HashSet<String>>,
164 criteria: Setting<Vec<Criterion>>,
165 stop_words: Setting<BTreeSet<String>>,
166 non_separator_tokens: Setting<BTreeSet<String>>,
167 separator_tokens: Setting<BTreeSet<String>>,
168 dictionary: Setting<BTreeSet<String>>,
169 distinct_field: Setting<String>,
170 synonyms: Setting<BTreeMap<String, Vec<String>>>,
171 primary_key: Setting<String>,
172 authorize_typos: Setting<bool>,
173 disable_on_numbers: Setting<bool>,
174 min_word_len_two_typos: Setting<u8>,
175 min_word_len_one_typo: Setting<u8>,
176 exact_words: Setting<BTreeSet<String>>,
177 exact_attributes: Setting<HashSet<String>>,
179 max_values_per_facet: Setting<usize>,
180 sort_facet_values_by: Setting<OrderByMap>,
181 pagination_max_total_hits: Setting<usize>,
182 proximity_precision: Setting<ProximityPrecision>,
183 embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
184 search_cutoff: Setting<u64>,
185 localized_attributes_rules: Setting<Vec<LocalizedAttributesRule>>,
186 prefix_search: Setting<PrefixSearch>,
187 facet_search: Setting<bool>,
188}
189
190impl<'a, 't, 'i> Settings<'a, 't, 'i> {
191 pub fn new(
192 wtxn: &'t mut heed::RwTxn<'i>,
193 index: &'i Index,
194 indexer_config: &'a IndexerConfig,
195 ) -> Settings<'a, 't, 'i> {
196 Settings {
197 wtxn,
198 index,
199 searchable_fields: Setting::NotSet,
200 displayed_fields: Setting::NotSet,
201 filterable_fields: Setting::NotSet,
202 sortable_fields: Setting::NotSet,
203 criteria: Setting::NotSet,
204 stop_words: Setting::NotSet,
205 non_separator_tokens: Setting::NotSet,
206 separator_tokens: Setting::NotSet,
207 dictionary: Setting::NotSet,
208 distinct_field: Setting::NotSet,
209 synonyms: Setting::NotSet,
210 primary_key: Setting::NotSet,
211 authorize_typos: Setting::NotSet,
212 disable_on_numbers: Setting::NotSet,
213 exact_words: Setting::NotSet,
214 min_word_len_two_typos: Setting::NotSet,
215 min_word_len_one_typo: Setting::NotSet,
216 exact_attributes: Setting::NotSet,
217 max_values_per_facet: Setting::NotSet,
218 sort_facet_values_by: Setting::NotSet,
219 pagination_max_total_hits: Setting::NotSet,
220 proximity_precision: Setting::NotSet,
221 embedder_settings: Setting::NotSet,
222 search_cutoff: Setting::NotSet,
223 localized_attributes_rules: Setting::NotSet,
224 prefix_search: Setting::NotSet,
225 facet_search: Setting::NotSet,
226 indexer_config,
227 }
228 }
229
230 pub fn reset_searchable_fields(&mut self) {
231 self.searchable_fields = Setting::Reset;
232 }
233
234 pub fn set_searchable_fields(&mut self, names: Vec<String>) {
235 self.searchable_fields = Setting::Set(names);
236 }
237
238 pub fn reset_displayed_fields(&mut self) {
239 self.displayed_fields = Setting::Reset;
240 }
241
242 pub fn set_displayed_fields(&mut self, names: Vec<String>) {
243 self.displayed_fields = Setting::Set(names);
244 }
245
246 pub fn reset_filterable_fields(&mut self) {
247 self.filterable_fields = Setting::Reset;
248 }
249
250 pub fn set_filterable_fields(&mut self, rules: Vec<FilterableAttributesRule>) {
251 self.filterable_fields = Setting::Set(rules);
252 }
253
254 pub fn set_sortable_fields(&mut self, names: HashSet<String>) {
255 self.sortable_fields = Setting::Set(names);
256 }
257
258 pub fn reset_sortable_fields(&mut self) {
259 self.sortable_fields = Setting::Reset;
260 }
261
262 pub fn reset_criteria(&mut self) {
263 self.criteria = Setting::Reset;
264 }
265
266 pub fn set_criteria(&mut self, criteria: Vec<Criterion>) {
267 self.criteria = Setting::Set(criteria);
268 }
269
270 pub fn reset_stop_words(&mut self) {
271 self.stop_words = Setting::Reset;
272 }
273
274 pub fn set_stop_words(&mut self, stop_words: BTreeSet<String>) {
275 self.stop_words =
276 if stop_words.is_empty() { Setting::Reset } else { Setting::Set(stop_words) }
277 }
278
279 pub fn reset_non_separator_tokens(&mut self) {
280 self.non_separator_tokens = Setting::Reset;
281 }
282
283 pub fn set_non_separator_tokens(&mut self, non_separator_tokens: BTreeSet<String>) {
284 self.non_separator_tokens = if non_separator_tokens.is_empty() {
285 Setting::Reset
286 } else {
287 Setting::Set(non_separator_tokens)
288 }
289 }
290
291 pub fn reset_separator_tokens(&mut self) {
292 self.separator_tokens = Setting::Reset;
293 }
294
295 pub fn set_separator_tokens(&mut self, separator_tokens: BTreeSet<String>) {
296 self.separator_tokens = if separator_tokens.is_empty() {
297 Setting::Reset
298 } else {
299 Setting::Set(separator_tokens)
300 }
301 }
302
303 pub fn reset_dictionary(&mut self) {
304 self.dictionary = Setting::Reset;
305 }
306
307 pub fn set_dictionary(&mut self, dictionary: BTreeSet<String>) {
308 self.dictionary =
309 if dictionary.is_empty() { Setting::Reset } else { Setting::Set(dictionary) }
310 }
311
312 pub fn reset_distinct_field(&mut self) {
313 self.distinct_field = Setting::Reset;
314 }
315
316 pub fn set_distinct_field(&mut self, distinct_field: String) {
317 self.distinct_field = Setting::Set(distinct_field);
318 }
319
320 pub fn reset_synonyms(&mut self) {
321 self.synonyms = Setting::Reset;
322 }
323
324 pub fn set_synonyms(&mut self, synonyms: BTreeMap<String, Vec<String>>) {
325 self.synonyms = if synonyms.is_empty() { Setting::Reset } else { Setting::Set(synonyms) }
326 }
327
328 pub fn reset_primary_key(&mut self) {
329 self.primary_key = Setting::Reset;
330 }
331
332 pub fn set_primary_key(&mut self, primary_key: String) {
333 self.primary_key = Setting::Set(primary_key);
334 }
335
336 pub fn set_autorize_typos(&mut self, val: bool) {
337 self.authorize_typos = Setting::Set(val);
338 }
339
340 pub fn reset_authorize_typos(&mut self) {
341 self.authorize_typos = Setting::Reset;
342 }
343
344 pub fn set_min_word_len_two_typos(&mut self, val: u8) {
345 self.min_word_len_two_typos = Setting::Set(val);
346 }
347
348 pub fn reset_min_word_len_two_typos(&mut self) {
349 self.min_word_len_two_typos = Setting::Reset;
350 }
351
352 pub fn set_min_word_len_one_typo(&mut self, val: u8) {
353 self.min_word_len_one_typo = Setting::Set(val);
354 }
355
356 pub fn reset_min_word_len_one_typo(&mut self) {
357 self.min_word_len_one_typo = Setting::Reset;
358 }
359
360 pub fn set_disable_on_numbers(&mut self, disable_on_numbers: bool) {
361 self.disable_on_numbers = Setting::Set(disable_on_numbers);
362 }
363
364 pub fn reset_disable_on_numbers(&mut self) {
365 self.disable_on_numbers = Setting::Reset;
366 }
367
368 pub fn set_exact_words(&mut self, words: BTreeSet<String>) {
369 self.exact_words = Setting::Set(words);
370 }
371
372 pub fn reset_exact_words(&mut self) {
373 self.exact_words = Setting::Reset;
374 }
375
376 pub fn set_exact_attributes(&mut self, attrs: HashSet<String>) {
377 self.exact_attributes = Setting::Set(attrs);
378 }
379
380 pub fn reset_exact_attributes(&mut self) {
381 self.exact_attributes = Setting::Reset;
382 }
383
384 pub fn set_max_values_per_facet(&mut self, value: usize) {
385 self.max_values_per_facet = Setting::Set(value);
386 }
387
388 pub fn reset_max_values_per_facet(&mut self) {
389 self.max_values_per_facet = Setting::Reset;
390 }
391
392 pub fn set_sort_facet_values_by(&mut self, value: OrderByMap) {
393 self.sort_facet_values_by = Setting::Set(value);
394 }
395
396 pub fn reset_sort_facet_values_by(&mut self) {
397 self.sort_facet_values_by = Setting::Reset;
398 }
399
400 pub fn set_pagination_max_total_hits(&mut self, value: usize) {
401 self.pagination_max_total_hits = Setting::Set(value);
402 }
403
404 pub fn reset_pagination_max_total_hits(&mut self) {
405 self.pagination_max_total_hits = Setting::Reset;
406 }
407
408 pub fn set_proximity_precision(&mut self, value: ProximityPrecision) {
409 self.proximity_precision = Setting::Set(value);
410 }
411
412 pub fn reset_proximity_precision(&mut self) {
413 self.proximity_precision = Setting::Reset;
414 }
415
416 pub fn set_embedder_settings(&mut self, value: BTreeMap<String, Setting<EmbeddingSettings>>) {
417 self.embedder_settings = Setting::Set(value);
418 }
419
420 pub fn reset_embedder_settings(&mut self) {
421 self.embedder_settings = Setting::Reset;
422 }
423
424 pub fn set_search_cutoff(&mut self, value: u64) {
425 self.search_cutoff = Setting::Set(value);
426 }
427
428 pub fn reset_search_cutoff(&mut self) {
429 self.search_cutoff = Setting::Reset;
430 }
431
432 pub fn set_localized_attributes_rules(&mut self, value: Vec<LocalizedAttributesRule>) {
433 self.localized_attributes_rules = Setting::Set(value);
434 }
435
436 pub fn reset_localized_attributes_rules(&mut self) {
437 self.localized_attributes_rules = Setting::Reset;
438 }
439
440 pub fn set_prefix_search(&mut self, value: PrefixSearch) {
441 self.prefix_search = Setting::Set(value);
442 }
443
444 pub fn reset_prefix_search(&mut self) {
445 self.prefix_search = Setting::Reset;
446 }
447
448 pub fn set_facet_search(&mut self, value: bool) {
449 self.facet_search = Setting::Set(value);
450 }
451
452 pub fn reset_facet_search(&mut self) {
453 self.facet_search = Setting::Reset;
454 }
455
456 #[tracing::instrument(
457 level = "trace"
458 skip(self, progress_callback, should_abort, settings_diff),
459 target = "indexing::documents"
460 )]
461 fn reindex<FP, FA>(
462 &mut self,
463 progress_callback: &FP,
464 should_abort: &FA,
465 settings_diff: InnerIndexSettingsDiff,
466 ) -> Result<()>
467 where
468 FP: Fn(UpdateIndexingStep) + Sync,
469 FA: Fn() -> bool + Sync,
470 {
471 if self.index.number_of_documents(self.wtxn)? == 0 {
474 return Ok(());
475 }
476
477 let transform = Transform::new(
478 self.wtxn,
479 self.index,
480 self.indexer_config,
481 IndexDocumentsMethod::ReplaceDocuments,
482 false,
483 )?;
484
485 let output = transform.prepare_for_documents_reindexing(self.wtxn, settings_diff)?;
487
488 let indexing_builder = IndexDocuments::new(
491 self.wtxn,
492 self.index,
493 self.indexer_config,
494 IndexDocumentsConfig::default(),
495 &progress_callback,
496 &should_abort,
497 )?;
498
499 indexing_builder.execute_raw(output)?;
500
501 Ok(())
502 }
503
504 fn update_displayed(&mut self) -> Result<bool> {
505 match self.displayed_fields {
506 Setting::Set(ref fields) => {
507 let names: Vec<_> = fields.iter().unique().map(String::as_str).collect();
509 self.index.put_displayed_fields(self.wtxn, &names)?;
510 }
511 Setting::Reset => {
512 self.index.delete_displayed_fields(self.wtxn)?;
513 }
514 Setting::NotSet => return Ok(false),
515 }
516 Ok(true)
517 }
518
519 fn update_distinct_field(&mut self) -> Result<bool> {
520 match self.distinct_field {
521 Setting::Set(ref attr) => {
522 self.index.put_distinct_field(self.wtxn, attr)?;
523 }
524 Setting::Reset => {
525 self.index.delete_distinct_field(self.wtxn)?;
526 }
527 Setting::NotSet => return Ok(false),
528 }
529 Ok(true)
530 }
531
532 fn update_user_defined_searchable_attributes(&mut self) -> Result<bool> {
534 match self.searchable_fields {
535 Setting::Set(ref fields) => {
536 let old_fields = self.index.searchable_fields(self.wtxn)?;
538 let did_change = {
539 let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
540 new_fields != old_fields
541 };
542 if !did_change {
543 return Ok(false);
544 }
545
546 let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
548
549 self.index.put_user_defined_searchable_fields(self.wtxn, &names)?;
550 Ok(true)
551 }
552 Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),
553 Setting::NotSet => Ok(false),
554 }
555 }
556
557 fn update_stop_words(&mut self) -> Result<bool> {
558 match self.stop_words {
559 Setting::Set(ref stop_words) => {
560 let current = self.index.stop_words(self.wtxn)?;
561
562 let stop_words: BTreeSet<String> = stop_words
564 .iter()
565 .map(|w| w.as_str().normalize(&Default::default()).into_owned())
566 .collect();
567
568 let fst = fst::Set::from_iter(stop_words.into_iter())?;
571
572 if current
574 .is_none_or(|current| current.as_fst().as_bytes() != fst.as_fst().as_bytes())
575 {
576 self.index.put_stop_words(self.wtxn, &fst)?;
578 Ok(true)
579 } else {
580 Ok(false)
581 }
582 }
583 Setting::Reset => Ok(self.index.delete_stop_words(self.wtxn)?),
584 Setting::NotSet => Ok(false),
585 }
586 }
587
588 fn update_non_separator_tokens(&mut self) -> Result<bool> {
589 let changes = match self.non_separator_tokens {
590 Setting::Set(ref non_separator_tokens) => {
591 let current = self.index.non_separator_tokens(self.wtxn)?;
592
593 if current.is_none_or(|current| ¤t != non_separator_tokens) {
595 self.index.put_non_separator_tokens(self.wtxn, non_separator_tokens)?;
596 true
597 } else {
598 false
599 }
600 }
601 Setting::Reset => self.index.delete_non_separator_tokens(self.wtxn)?,
602 Setting::NotSet => false,
603 };
604
605 if changes && self.synonyms == Setting::NotSet {
607 self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
608 }
609
610 Ok(changes)
611 }
612
613 fn update_separator_tokens(&mut self) -> Result<bool> {
614 let changes = match self.separator_tokens {
615 Setting::Set(ref separator_tokens) => {
616 let current = self.index.separator_tokens(self.wtxn)?;
617
618 if current.is_none_or(|current| ¤t != separator_tokens) {
620 self.index.put_separator_tokens(self.wtxn, separator_tokens)?;
621 true
622 } else {
623 false
624 }
625 }
626 Setting::Reset => self.index.delete_separator_tokens(self.wtxn)?,
627 Setting::NotSet => false,
628 };
629
630 if changes && self.synonyms == Setting::NotSet {
632 self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
633 }
634
635 Ok(changes)
636 }
637
638 fn update_dictionary(&mut self) -> Result<bool> {
639 let changes = match self.dictionary {
640 Setting::Set(ref dictionary) => {
641 let current = self.index.dictionary(self.wtxn)?;
642
643 if current.is_none_or(|current| ¤t != dictionary) {
645 self.index.put_dictionary(self.wtxn, dictionary)?;
646 true
647 } else {
648 false
649 }
650 }
651 Setting::Reset => self.index.delete_dictionary(self.wtxn)?,
652 Setting::NotSet => false,
653 };
654
655 if changes && self.synonyms == Setting::NotSet {
657 self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
658 }
659
660 Ok(changes)
661 }
662
663 fn update_synonyms(&mut self) -> Result<bool> {
664 match self.synonyms {
665 Setting::Set(ref user_synonyms) => {
666 fn normalize(tokenizer: &Tokenizer<'_>, text: &str) -> Vec<String> {
667 tokenizer
668 .tokenize(text)
669 .filter_map(|token| {
670 if token.is_word() && !token.lemma().is_empty() {
671 Some(token.lemma().to_string())
672 } else {
673 None
674 }
675 })
676 .collect::<Vec<_>>()
677 }
678
679 let mut builder = TokenizerBuilder::new();
680 let stop_words = self.index.stop_words(self.wtxn)?;
681 if let Some(ref stop_words) = stop_words {
682 builder.stop_words(stop_words);
683 }
684
685 let separators = self.index.allowed_separators(self.wtxn)?;
686 let separators: Option<Vec<_>> =
687 separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
688 if let Some(ref separators) = separators {
689 builder.separators(separators);
690 }
691
692 let dictionary = self.index.dictionary(self.wtxn)?;
693 let dictionary: Option<Vec<_>> =
694 dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
695 if let Some(ref dictionary) = dictionary {
696 builder.words_dict(dictionary);
697 }
698
699 let tokenizer = builder.build();
700
701 let mut new_synonyms = HashMap::new();
702 for (word, synonyms) in user_synonyms {
703 let normalized_word = normalize(&tokenizer, word);
705 let normalized_synonyms: Vec<_> = synonyms
706 .iter()
707 .map(|synonym| normalize(&tokenizer, synonym))
708 .filter(|synonym| !synonym.is_empty())
709 .collect();
710
711 if !normalized_word.is_empty() && !normalized_synonyms.is_empty() {
714 let entry = new_synonyms.entry(normalized_word).or_insert_with(Vec::new);
715 entry.extend(normalized_synonyms.into_iter());
716 }
717 }
718
719 new_synonyms.iter_mut().for_each(|(_, synonyms)| {
721 synonyms.sort_unstable();
722 synonyms.dedup();
723 });
724
725 let old_synonyms = self.index.synonyms(self.wtxn)?;
726
727 if new_synonyms != old_synonyms {
728 self.index.put_synonyms(self.wtxn, &new_synonyms, user_synonyms)?;
729 Ok(true)
730 } else {
731 Ok(false)
732 }
733 }
734 Setting::Reset => Ok(self.index.delete_synonyms(self.wtxn)?),
735 Setting::NotSet => Ok(false),
736 }
737 }
738
739 fn update_exact_attributes(&mut self) -> Result<bool> {
740 match self.exact_attributes {
741 Setting::Set(ref attrs) => {
742 let old_attrs = self.index.exact_attributes(self.wtxn)?;
743 let old_attrs = old_attrs.into_iter().map(String::from).collect::<HashSet<_>>();
744
745 if attrs != &old_attrs {
746 let attrs = attrs.iter().map(String::as_str).collect::<Vec<_>>();
747 self.index.put_exact_attributes(self.wtxn, &attrs)?;
748 Ok(true)
749 } else {
750 Ok(false)
751 }
752 }
753 Setting::Reset => Ok(self.index.delete_exact_attributes(self.wtxn)?),
754 Setting::NotSet => Ok(false),
755 }
756 }
757
758 fn update_filterable(&mut self) -> Result<()> {
759 match self.filterable_fields {
760 Setting::Set(ref fields) => {
761 self.index.put_filterable_attributes_rules(self.wtxn, fields)?;
762 }
763 Setting::Reset => {
764 self.index.delete_filterable_attributes_rules(self.wtxn)?;
765 }
766 Setting::NotSet => (),
767 }
768 Ok(())
769 }
770
771 fn update_sortable(&mut self) -> Result<()> {
772 match self.sortable_fields {
773 Setting::Set(ref fields) => {
774 let mut new_fields = HashSet::new();
775 for name in fields {
776 new_fields.insert(name.clone());
777 }
778 self.index.put_sortable_fields(self.wtxn, &new_fields)?;
779 }
780 Setting::Reset => {
781 self.index.delete_sortable_fields(self.wtxn)?;
782 }
783 Setting::NotSet => (),
784 }
785 Ok(())
786 }
787
788 fn update_criteria(&mut self) -> Result<()> {
789 match &self.criteria {
790 Setting::Set(criteria) => {
791 self.index.put_criteria(self.wtxn, criteria)?;
792 }
793 Setting::Reset => {
794 self.index.delete_criteria(self.wtxn)?;
795 }
796 Setting::NotSet => (),
797 }
798 Ok(())
799 }
800
801 fn update_primary_key(&mut self) -> Result<()> {
802 match self.primary_key {
803 Setting::Set(ref primary_key) => {
804 if self.index.number_of_documents(self.wtxn)? == 0 {
805 let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
806 fields_ids_map.insert(primary_key).ok_or(UserError::AttributeLimitReached)?;
807 self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
808 self.index.put_primary_key(self.wtxn, primary_key)?;
809 Ok(())
810 } else {
811 let curr_primary_key = self.index.primary_key(self.wtxn)?.unwrap().to_string();
812 if primary_key == &curr_primary_key {
813 Ok(())
814 } else {
815 Err(UserError::PrimaryKeyCannotBeChanged(curr_primary_key).into())
816 }
817 }
818 }
819 Setting::Reset => {
820 if self.index.number_of_documents(self.wtxn)? == 0 {
821 self.index.delete_primary_key(self.wtxn)?;
822 Ok(())
823 } else {
824 let primary_key = self.index.primary_key(self.wtxn)?.unwrap();
825 Err(UserError::PrimaryKeyCannotBeChanged(primary_key.to_string()).into())
826 }
827 }
828 Setting::NotSet => Ok(()),
829 }
830 }
831
832 fn update_authorize_typos(&mut self) -> Result<()> {
833 match self.authorize_typos {
834 Setting::Set(flag) => {
835 self.index.put_authorize_typos(self.wtxn, flag)?;
836 Ok(())
837 }
838 Setting::Reset => {
839 self.index.put_authorize_typos(self.wtxn, true)?;
840 Ok(())
841 }
842 Setting::NotSet => Ok(()),
843 }
844 }
845
846 fn update_min_typo_word_len(&mut self) -> Result<()> {
847 let one = self.min_word_len_one_typo.or_reset(DEFAULT_MIN_WORD_LEN_ONE_TYPO);
848 let two = self.min_word_len_two_typos.or_reset(DEFAULT_MIN_WORD_LEN_TWO_TYPOS);
849 match (one, two) {
850 (Setting::Set(one), Setting::Set(two)) => {
851 if one > two {
852 return Err(UserError::InvalidMinTypoWordLenSetting(one, two).into());
853 } else {
854 self.index.put_min_word_len_one_typo(self.wtxn, one)?;
855 self.index.put_min_word_len_two_typos(self.wtxn, two)?;
856 }
857 }
858 (Setting::Set(one), _) => {
859 let two = self.index.min_word_len_two_typos(self.wtxn)?;
860 if one > two {
861 return Err(UserError::InvalidMinTypoWordLenSetting(one, two).into());
862 } else {
863 self.index.put_min_word_len_one_typo(self.wtxn, one)?;
864 }
865 }
866 (_, Setting::Set(two)) => {
867 let one = self.index.min_word_len_one_typo(self.wtxn)?;
868 if one > two {
869 return Err(UserError::InvalidMinTypoWordLenSetting(one, two).into());
870 } else {
871 self.index.put_min_word_len_two_typos(self.wtxn, two)?;
872 }
873 }
874 _ => (),
875 }
876
877 Ok(())
878 }
879
880 fn update_disabled_typos_terms(&mut self) -> Result<()> {
881 let mut disabled_typos_terms = self.index.disabled_typos_terms(self.wtxn)?;
882 match self.disable_on_numbers {
883 Setting::Set(disable_on_numbers) => {
884 disabled_typos_terms.disable_on_numbers = disable_on_numbers;
885 }
886 Setting::Reset => {
887 self.index.delete_disabled_typos_terms(self.wtxn)?;
888 disabled_typos_terms.disable_on_numbers =
889 DisabledTyposTerms::default().disable_on_numbers;
890 }
891 Setting::NotSet => (),
892 }
893
894 self.index.put_disabled_typos_terms(self.wtxn, &disabled_typos_terms)?;
895 Ok(())
896 }
897
898 fn update_exact_words(&mut self) -> Result<()> {
899 match self.exact_words {
900 Setting::Set(ref mut words) => {
901 fn normalize(tokenizer: &Tokenizer<'_>, text: &str) -> String {
902 tokenizer.tokenize(text).map(|token| token.lemma().to_string()).collect()
903 }
904
905 let mut builder = TokenizerBuilder::new();
906 let stop_words = self.index.stop_words(self.wtxn)?;
907 if let Some(ref stop_words) = stop_words {
908 builder.stop_words(stop_words);
909 }
910 let tokenizer = builder.build();
911
912 let mut words: Vec<_> =
913 words.iter().map(|word| normalize(&tokenizer, word)).collect();
914
915 words.sort_unstable();
917
918 let words = fst::Set::from_iter(words.iter())?;
919 self.index.put_exact_words(self.wtxn, &words)?;
920 }
921 Setting::Reset => {
922 self.index.put_exact_words(self.wtxn, &fst::Set::default())?;
923 }
924 Setting::NotSet => (),
925 }
926
927 Ok(())
928 }
929
930 fn update_max_values_per_facet(&mut self) -> Result<()> {
931 match self.max_values_per_facet {
932 Setting::Set(max) => {
933 self.index.put_max_values_per_facet(self.wtxn, max as u64)?;
934 }
935 Setting::Reset => {
936 self.index.delete_max_values_per_facet(self.wtxn)?;
937 }
938 Setting::NotSet => (),
939 }
940
941 Ok(())
942 }
943
944 fn update_sort_facet_values_by(&mut self) -> Result<()> {
945 match self.sort_facet_values_by.as_ref() {
946 Setting::Set(value) => {
947 self.index.put_sort_facet_values_by(self.wtxn, value)?;
948 }
949 Setting::Reset => {
950 self.index.delete_sort_facet_values_by(self.wtxn)?;
951 }
952 Setting::NotSet => (),
953 }
954
955 Ok(())
956 }
957
958 fn update_pagination_max_total_hits(&mut self) -> Result<()> {
959 match self.pagination_max_total_hits {
960 Setting::Set(max) => {
961 self.index.put_pagination_max_total_hits(self.wtxn, max as u64)?;
962 }
963 Setting::Reset => {
964 self.index.delete_pagination_max_total_hits(self.wtxn)?;
965 }
966 Setting::NotSet => (),
967 }
968
969 Ok(())
970 }
971
972 fn update_proximity_precision(&mut self) -> Result<bool> {
973 let changed = match self.proximity_precision {
974 Setting::Set(new) => {
975 let old = self.index.proximity_precision(self.wtxn)?;
976 if old == Some(new) {
977 false
978 } else {
979 self.index.put_proximity_precision(self.wtxn, new)?;
980 old.is_some() || new != ProximityPrecision::default()
981 }
982 }
983 Setting::Reset => self.index.delete_proximity_precision(self.wtxn)?,
984 Setting::NotSet => false,
985 };
986
987 Ok(changed)
988 }
989
990 fn update_prefix_search(&mut self) -> Result<bool> {
991 let changed = match self.prefix_search {
992 Setting::Set(new) => {
993 let old = self.index.prefix_search(self.wtxn)?;
994 if old == Some(new) {
995 false
996 } else {
997 self.index.put_prefix_search(self.wtxn, new)?;
998 old.is_some() || new != PrefixSearch::default()
999 }
1000 }
1001 Setting::Reset => self.index.delete_prefix_search(self.wtxn)?,
1002 Setting::NotSet => false,
1003 };
1004
1005 Ok(changed)
1006 }
1007
1008 fn update_facet_search(&mut self) -> Result<bool> {
1009 let changed = match self.facet_search {
1010 Setting::Set(new) => {
1011 let old = self.index.facet_search(self.wtxn)?;
1012 if old == new {
1013 false
1014 } else {
1015 self.index.put_facet_search(self.wtxn, new)?;
1016 true
1017 }
1018 }
1019 Setting::Reset => self.index.delete_facet_search(self.wtxn)?,
1020 Setting::NotSet => false,
1021 };
1022
1023 Ok(changed)
1024 }
1025
1026 fn update_embedding_configs(&mut self) -> Result<BTreeMap<String, EmbedderAction>> {
1027 match std::mem::take(&mut self.embedder_settings) {
1028 Setting::Set(configs) => self.update_embedding_configs_set(configs),
1029 Setting::Reset => {
1030 let old_configs = self.index.embedding_configs(self.wtxn)?;
1032 let remove_all: Result<BTreeMap<String, EmbedderAction>> = old_configs
1033 .into_iter()
1034 .map(|IndexEmbeddingConfig { name, config, user_provided }| -> Result<_> {
1035 let embedder_id =
1036 self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or(
1037 crate::InternalError::DatabaseMissingEntry {
1038 db_name: crate::index::db_name::VECTOR_EMBEDDER_CATEGORY_ID,
1039 key: None,
1040 },
1041 )?;
1042 Ok((
1043 name,
1044 EmbedderAction::with_write_back(
1045 WriteBackToDocuments { embedder_id, user_provided },
1046 config.quantized(),
1047 ),
1048 ))
1049 })
1050 .collect();
1051
1052 let remove_all = remove_all?;
1053
1054 self.index.embedder_category_id.clear(self.wtxn)?;
1055 self.index.delete_embedding_configs(self.wtxn)?;
1056 Ok(remove_all)
1057 }
1058 Setting::NotSet => Ok(Default::default()),
1059 }
1060 }
1061
1062 fn update_embedding_configs_set(
1063 &mut self,
1064 configs: BTreeMap<String, Setting<EmbeddingSettings>>,
1065 ) -> Result<BTreeMap<String, EmbedderAction>> {
1066 use crate::vector::settings::SettingsDiff;
1067
1068 let old_configs = self.index.embedding_configs(self.wtxn)?;
1069 let old_configs: BTreeMap<String, (EmbeddingSettings, RoaringBitmap)> = old_configs
1070 .into_iter()
1071 .map(|IndexEmbeddingConfig { name, config, user_provided }| {
1072 (name, (config.into(), user_provided))
1073 })
1074 .collect();
1075 let mut updated_configs = BTreeMap::new();
1076 let mut embedder_actions = BTreeMap::new();
1077 for joined in old_configs
1078 .into_iter()
1079 .merge_join_by(configs.into_iter(), |(left, _), (right, _)| left.cmp(right))
1080 {
1081 match joined {
1082 EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => {
1084 let was_quantized = old.binary_quantized.set().unwrap_or_default();
1085 let settings_diff = SettingsDiff::from_settings(&name, old, new)?;
1086 match settings_diff {
1087 SettingsDiff::Remove => {
1088 tracing::debug!(
1089 embedder = name,
1090 user_provided = user_provided.len(),
1091 "removing embedder"
1092 );
1093 let embedder_id =
1094 self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or(
1095 crate::InternalError::DatabaseMissingEntry {
1096 db_name: crate::index::db_name::VECTOR_EMBEDDER_CATEGORY_ID,
1097 key: None,
1098 },
1099 )?;
1100 self.index.embedder_category_id.delete(self.wtxn, &name)?;
1102 embedder_actions.insert(
1103 name,
1104 EmbedderAction::with_write_back(
1105 WriteBackToDocuments { embedder_id, user_provided },
1106 was_quantized,
1107 ),
1108 );
1109 }
1110 SettingsDiff::Reindex { action, updated_settings, quantize } => {
1111 tracing::debug!(
1112 embedder = name,
1113 user_provided = user_provided.len(),
1114 ?action,
1115 "reindex embedder"
1116 );
1117 embedder_actions.insert(
1118 name.clone(),
1119 EmbedderAction::with_reindex(action, was_quantized)
1120 .with_is_being_quantized(quantize),
1121 );
1122 let new =
1123 validate_embedding_settings(Setting::Set(updated_settings), &name)?;
1124 updated_configs.insert(name, (new, user_provided));
1125 }
1126 SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => {
1127 tracing::debug!(
1128 embedder = name,
1129 user_provided = user_provided.len(),
1130 "update without reindex embedder"
1131 );
1132 let new =
1133 validate_embedding_settings(Setting::Set(updated_settings), &name)?;
1134 if quantize {
1135 embedder_actions.insert(
1136 name.clone(),
1137 EmbedderAction::default().with_is_being_quantized(true),
1138 );
1139 }
1140 updated_configs.insert(name, (new, user_provided));
1141 }
1142 }
1143 }
1144 EitherOrBoth::Left((name, (setting, user_provided))) => {
1146 tracing::debug!(embedder = name, "unchanged embedder");
1147 updated_configs.insert(name, (Setting::Set(setting), user_provided));
1148 }
1149 EitherOrBoth::Right((name, mut setting)) => {
1151 tracing::debug!(embedder = name, "new embedder");
1152 crate::vector::settings::EmbeddingSettings::apply_default_source(&mut setting);
1154 crate::vector::settings::EmbeddingSettings::apply_default_openai_model(
1155 &mut setting,
1156 );
1157 let setting = validate_embedding_settings(setting, &name)?;
1158 embedder_actions.insert(
1159 name.clone(),
1160 EmbedderAction::with_reindex(ReindexAction::FullReindex, false),
1161 );
1162 updated_configs.insert(name, (setting, RoaringBitmap::new()));
1163 }
1164 }
1165 }
1166 let mut free_indices: [bool; u8::MAX as usize] = [true; u8::MAX as usize];
1167 for res in self.index.embedder_category_id.iter(self.wtxn)? {
1168 let (_name, id) = res?;
1169 free_indices[id as usize] = false;
1170 }
1171 let mut free_indices = free_indices.iter_mut().enumerate();
1172 let mut find_free_index =
1173 move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8);
1174 for (name, action) in embedder_actions.iter() {
1175 if matches!(action.reindex(), Some(ReindexAction::FullReindex))
1177 && self.index.embedder_category_id.get(self.wtxn, name)?.is_none()
1178 {
1179 let id =
1180 find_free_index().ok_or(UserError::TooManyEmbedders(updated_configs.len()))?;
1181 tracing::debug!(embedder = name, id, "assigning free id to new embedder");
1182 self.index.embedder_category_id.put(self.wtxn, name, &id)?;
1183 }
1184 }
1185 let updated_configs: Vec<IndexEmbeddingConfig> = updated_configs
1186 .into_iter()
1187 .filter_map(|(name, (config, user_provided))| match config {
1188 Setting::Set(config) => {
1189 Some(IndexEmbeddingConfig { name, config: config.into(), user_provided })
1190 }
1191 Setting::Reset => None,
1192 Setting::NotSet => Some(IndexEmbeddingConfig {
1193 name,
1194 config: EmbeddingSettings::default().into(),
1195 user_provided,
1196 }),
1197 })
1198 .collect();
1199 if updated_configs.is_empty() {
1200 self.index.delete_embedding_configs(self.wtxn)?;
1201 } else {
1202 self.index.put_embedding_configs(self.wtxn, updated_configs)?;
1203 }
1204 Ok(embedder_actions)
1205 }
1206
1207 fn update_search_cutoff(&mut self) -> Result<bool> {
1208 let changed = match self.search_cutoff {
1209 Setting::Set(new) => {
1210 let old = self.index.search_cutoff(self.wtxn)?;
1211 if old == Some(new) {
1212 false
1213 } else {
1214 self.index.put_search_cutoff(self.wtxn, new)?;
1215 true
1216 }
1217 }
1218 Setting::Reset => self.index.delete_search_cutoff(self.wtxn)?,
1219 Setting::NotSet => false,
1220 };
1221
1222 Ok(changed)
1223 }
1224
1225 fn update_localized_attributes_rules(&mut self) -> Result<()> {
1226 match &self.localized_attributes_rules {
1227 Setting::Set(new) => {
1228 let old = self.index.localized_attributes_rules(self.wtxn)?;
1229 if old.as_ref() != Some(new) {
1230 self.index.put_localized_attributes_rules(self.wtxn, new.clone())?;
1231 }
1232 }
1233 Setting::Reset => {
1234 self.index.delete_localized_attributes_rules(self.wtxn)?;
1235 }
1236 Setting::NotSet => (),
1237 }
1238
1239 Ok(())
1240 }
1241
1242 pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
1243 where
1244 FP: Fn(UpdateIndexingStep) + Sync,
1245 FA: Fn() -> bool + Sync,
1246 {
1247 self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
1248
1249 let old_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
1250
1251 self.update_displayed()?;
1253 self.update_distinct_field()?;
1254 self.update_criteria()?;
1255 self.update_primary_key()?;
1256 self.update_authorize_typos()?;
1257 self.update_min_typo_word_len()?;
1258 self.update_exact_words()?;
1259 self.update_max_values_per_facet()?;
1260 self.update_sort_facet_values_by()?;
1261 self.update_pagination_max_total_hits()?;
1262 self.update_search_cutoff()?;
1263
1264 self.update_filterable()?;
1266 self.update_sortable()?;
1267 self.update_stop_words()?;
1268 self.update_non_separator_tokens()?;
1269 self.update_separator_tokens()?;
1270 self.update_dictionary()?;
1271 self.update_synonyms()?;
1272 self.update_user_defined_searchable_attributes()?;
1273 self.update_exact_attributes()?;
1274 self.update_proximity_precision()?;
1275 self.update_prefix_search()?;
1276 self.update_facet_search()?;
1277 self.update_localized_attributes_rules()?;
1278 self.update_disabled_typos_terms()?;
1279
1280 let embedding_config_updates = self.update_embedding_configs()?;
1281
1282 let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
1283 new_inner_settings.recompute_searchables(self.wtxn, self.index)?;
1284
1285 let primary_key_id = self
1286 .index
1287 .primary_key(self.wtxn)?
1288 .and_then(|name| new_inner_settings.fields_ids_map.id(name));
1289 let settings_update_only = true;
1290 let inner_settings_diff = InnerIndexSettingsDiff::new(
1291 old_inner_settings,
1292 new_inner_settings,
1293 primary_key_id,
1294 embedding_config_updates,
1295 settings_update_only,
1296 );
1297
1298 if inner_settings_diff.any_reindexing_needed() {
1299 self.reindex(&progress_callback, &should_abort, inner_settings_diff)?;
1300 }
1301
1302 Ok(())
1303 }
1304}
1305
1306pub struct InnerIndexSettingsDiff {
1307 pub(crate) old: InnerIndexSettings,
1308 pub(crate) new: InnerIndexSettings,
1309 pub(crate) primary_key_id: Option<FieldId>,
1310 pub(crate) embedding_config_updates: BTreeMap<String, EmbedderAction>,
1311 pub(crate) settings_update_only: bool,
1312 pub(crate) only_additional_fields: Option<HashSet<String>>,
1315
1316 pub(crate) cache_reindex_searchable_without_user_defined: bool,
1319 pub(crate) cache_user_defined_searchables: bool,
1321 pub(crate) cache_exact_attributes: bool,
1323}
1324
1325impl InnerIndexSettingsDiff {
1326 #[tracing::instrument(level = "trace", skip_all, target = "indexing::settings")]
1327 pub(crate) fn new(
1328 old_settings: InnerIndexSettings,
1329 new_settings: InnerIndexSettings,
1330 primary_key_id: Option<FieldId>,
1331 mut embedding_config_updates: BTreeMap<String, EmbedderAction>,
1332 settings_update_only: bool,
1333 ) -> Self {
1334 let only_additional_fields = match (
1335 &old_settings.user_defined_searchable_attributes,
1336 &new_settings.user_defined_searchable_attributes,
1337 ) {
1338 (None, None) | (Some(_), None) | (None, Some(_)) => None, (Some(old), Some(new)) => {
1340 let old: HashSet<_> = old.iter().cloned().collect();
1341 let new: HashSet<_> = new.iter().cloned().collect();
1342 if old.difference(&new).next().is_none() {
1343 Some(&new - &old).filter(|x| !x.is_empty())
1345 } else {
1346 None
1347 }
1348 }
1349 };
1350
1351 let cache_reindex_searchable_without_user_defined = {
1352 old_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
1353 != new_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
1354 || old_settings.allowed_separators != new_settings.allowed_separators
1355 || old_settings.dictionary != new_settings.dictionary
1356 || old_settings.proximity_precision != new_settings.proximity_precision
1357 || old_settings.prefix_search != new_settings.prefix_search
1358 || old_settings.localized_attributes_rules
1359 != new_settings.localized_attributes_rules
1360 || old_settings.disabled_typos_terms != new_settings.disabled_typos_terms
1361 };
1362
1363 let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
1364
1365 let cache_user_defined_searchables = match (
1368 &old_settings.user_defined_searchable_attributes,
1369 &new_settings.user_defined_searchable_attributes,
1370 ) {
1371 (Some(old), Some(new)) => {
1372 let old: BTreeSet<_> = old.iter().collect();
1373 let new: BTreeSet<_> = new.iter().collect();
1374
1375 old != new
1376 }
1377 (None, None) => false,
1378 _otherwise => true,
1379 };
1380
1381 if cache_user_defined_searchables {
1383 for (embedder_name, (config, _, _quantized)) in
1384 new_settings.embedding_configs.inner_as_ref()
1385 {
1386 let was_quantized =
1387 old_settings.embedding_configs.get(embedder_name).is_some_and(|conf| conf.2);
1388 if !config.uses_document_template() {
1390 continue;
1391 }
1392
1393 match embedding_config_updates.entry(embedder_name.clone()) {
1396 std::collections::btree_map::Entry::Vacant(entry) => {
1397 entry.insert(EmbedderAction::with_reindex(
1398 ReindexAction::RegeneratePrompts,
1399 was_quantized,
1400 ));
1401 }
1402 std::collections::btree_map::Entry::Occupied(entry) => {
1403 let EmbedderAction {
1404 was_quantized: _,
1405 is_being_quantized: _,
1406 write_back: _, reindex: _, } = entry.get();
1409 }
1410 };
1411 }
1412 }
1413
1414 InnerIndexSettingsDiff {
1415 old: old_settings,
1416 new: new_settings,
1417 primary_key_id,
1418 embedding_config_updates,
1419 settings_update_only,
1420 only_additional_fields,
1421 cache_reindex_searchable_without_user_defined,
1422 cache_user_defined_searchables,
1423 cache_exact_attributes,
1424 }
1425 }
1426
1427 pub fn any_reindexing_needed(&self) -> bool {
1428 self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors()
1429 }
1430
1431 pub fn reindex_searchable(&self) -> bool {
1432 self.cache_reindex_searchable_without_user_defined
1433 || self.cache_exact_attributes
1434 || self.cache_user_defined_searchables
1435 }
1436
1437 pub fn reindex_proximities(&self) -> bool {
1438 (self.cache_reindex_searchable_without_user_defined || self.cache_user_defined_searchables)
1440 && (self.old.proximity_precision == ProximityPrecision::ByAttribute
1442 || self.new.proximity_precision == ProximityPrecision::ByAttribute)
1443 }
1444
1445 pub fn reindex_searchable_id(&self, id: FieldId) -> Option<DelAddOperation> {
1446 if self.cache_reindex_searchable_without_user_defined || self.cache_exact_attributes {
1447 Some(DelAddOperation::DeletionAndAddition)
1448 } else if let Some(only_additional_fields) = &self.only_additional_fields {
1449 let additional_field = self.new.fields_ids_map.name(id).unwrap();
1450 if only_additional_fields.contains(additional_field) {
1451 Some(DelAddOperation::Addition)
1452 } else {
1453 None
1454 }
1455 } else if self.cache_user_defined_searchables {
1456 Some(DelAddOperation::DeletionAndAddition)
1457 } else {
1458 None
1459 }
1460 }
1461
1462 pub fn list_faceted_fields_from_fid_map(&self, del_add: DelAdd) -> BTreeSet<FieldId> {
1466 let settings = match del_add {
1467 DelAdd::Deletion => &self.old,
1468 DelAdd::Addition => &self.new,
1469 };
1470
1471 settings
1472 .fields_ids_map
1473 .iter_id_metadata()
1474 .filter(|(_, metadata)| metadata.is_faceted(&settings.filterable_attributes_rules))
1475 .map(|(id, _)| id)
1476 .collect()
1477 }
1478
1479 pub fn facet_fids_changed(&self) -> bool {
1480 for eob in merge_join_by(
1481 self.old.fields_ids_map.iter().filter(|(_, _, metadata)| {
1482 metadata.is_faceted(&self.old.filterable_attributes_rules)
1483 }),
1484 self.new.fields_ids_map.iter().filter(|(_, _, metadata)| {
1485 metadata.is_faceted(&self.new.filterable_attributes_rules)
1486 }),
1487 |(old_fid, _, _), (new_fid, _, _)| old_fid.cmp(new_fid),
1488 ) {
1489 match eob {
1490 EitherOrBoth::Left(_) | EitherOrBoth::Right(_) => return true,
1492 EitherOrBoth::Both((_, _, old_metadata), (_, _, new_metadata)) => {
1494 let old_filterable_features = old_metadata
1497 .filterable_attributes_features(&self.old.filterable_attributes_rules);
1498 let new_filterable_features = new_metadata
1499 .filterable_attributes_features(&self.new.filterable_attributes_rules);
1500 let is_old_facet_searchable =
1501 old_filterable_features.is_facet_searchable() && self.old.facet_search;
1502 let is_new_facet_searchable =
1503 new_filterable_features.is_facet_searchable() && self.new.facet_search;
1504 if is_old_facet_searchable != is_new_facet_searchable {
1505 return true;
1506 }
1507
1508 let old_facet_level_database = old_metadata
1511 .require_facet_level_database(&self.old.filterable_attributes_rules);
1512 let new_facet_level_database = new_metadata
1513 .require_facet_level_database(&self.new.filterable_attributes_rules);
1514 if old_facet_level_database != new_facet_level_database {
1515 return true;
1516 }
1517 }
1518 }
1519 }
1520
1521 false
1522 }
1523
1524 pub fn global_facet_settings_changed(&self) -> bool {
1525 self.old.localized_attributes_rules != self.new.localized_attributes_rules
1526 || self.old.facet_search != self.new.facet_search
1527 }
1528
1529 pub fn reindex_facets(&self) -> bool {
1530 self.facet_fids_changed() || self.global_facet_settings_changed()
1531 }
1532
1533 pub fn reindex_vectors(&self) -> bool {
1534 !self.embedding_config_updates.is_empty()
1535 }
1536
1537 pub fn settings_update_only(&self) -> bool {
1538 self.settings_update_only
1539 }
1540
1541 pub fn run_geo_indexing(&self) -> bool {
1542 self.old.geo_fields_ids != self.new.geo_fields_ids
1543 || (!self.settings_update_only && self.new.geo_fields_ids.is_some())
1544 }
1545}
1546
1547#[derive(Clone)]
1548pub(crate) struct InnerIndexSettings {
1549 pub stop_words: Option<fst::Set<Vec<u8>>>,
1550 pub allowed_separators: Option<BTreeSet<String>>,
1551 pub dictionary: Option<BTreeSet<String>>,
1552 pub fields_ids_map: FieldIdMapWithMetadata,
1553 pub localized_attributes_rules: Vec<LocalizedAttributesRule>,
1554 pub filterable_attributes_rules: Vec<FilterableAttributesRule>,
1555 pub asc_desc_fields: HashSet<String>,
1556 pub distinct_field: Option<String>,
1557 pub user_defined_searchable_attributes: Option<Vec<String>>,
1558 pub sortable_fields: HashSet<String>,
1559 pub exact_attributes: HashSet<FieldId>,
1560 pub disabled_typos_terms: DisabledTyposTerms,
1561 pub proximity_precision: ProximityPrecision,
1562 pub embedding_configs: EmbeddingConfigs,
1563 pub geo_fields_ids: Option<(FieldId, FieldId)>,
1564 pub prefix_search: PrefixSearch,
1565 pub facet_search: bool,
1566}
1567
1568impl InnerIndexSettings {
1569 pub fn from_index(
1570 index: &Index,
1571 rtxn: &heed::RoTxn<'_>,
1572 embedding_configs: Option<EmbeddingConfigs>,
1573 ) -> Result<Self> {
1574 let stop_words = index.stop_words(rtxn)?;
1575 let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap());
1576 let allowed_separators = index.allowed_separators(rtxn)?;
1577 let dictionary = index.dictionary(rtxn)?;
1578 let mut fields_ids_map = index.fields_ids_map(rtxn)?;
1579 let exact_attributes = index.exact_attributes_ids(rtxn)?;
1580 let proximity_precision = index.proximity_precision(rtxn)?.unwrap_or_default();
1581 let embedding_configs = match embedding_configs {
1582 Some(embedding_configs) => embedding_configs,
1583 None => embedders(index.embedding_configs(rtxn)?)?,
1584 };
1585 let prefix_search = index.prefix_search(rtxn)?.unwrap_or_default();
1586 let facet_search = index.facet_search(rtxn)?;
1587 let geo_fields_ids = match fields_ids_map.id(RESERVED_GEO_FIELD_NAME) {
1588 Some(_) if index.is_geo_enabled(rtxn)? => {
1589 let field_ids = fields_ids_map
1591 .insert("_geo.lat")
1592 .zip(fields_ids_map.insert("_geo.lng"))
1593 .ok_or(UserError::AttributeLimitReached)?;
1594 Some(field_ids)
1595 }
1596 _ => None,
1597 };
1598 let localized_attributes_rules =
1599 index.localized_attributes_rules(rtxn)?.unwrap_or_default();
1600 let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
1601 let sortable_fields = index.sortable_fields(rtxn)?;
1602 let asc_desc_fields = index.asc_desc_fields(rtxn)?;
1603 let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string());
1604 let user_defined_searchable_attributes = index
1605 .user_defined_searchable_fields(rtxn)?
1606 .map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
1607 let builder = MetadataBuilder::from_index(index, rtxn)?;
1608 let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
1609 let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
1610 Ok(Self {
1611 stop_words,
1612 allowed_separators,
1613 dictionary,
1614 fields_ids_map,
1615 localized_attributes_rules,
1616 filterable_attributes_rules,
1617 asc_desc_fields,
1618 distinct_field,
1619 user_defined_searchable_attributes,
1620 sortable_fields,
1621 exact_attributes,
1622 proximity_precision,
1623 embedding_configs,
1624 geo_fields_ids,
1625 prefix_search,
1626 facet_search,
1627 disabled_typos_terms,
1628 })
1629 }
1630
1631 pub fn match_faceted_field(&self, field: &str) -> PatternMatch {
1632 match_faceted_field(
1633 field,
1634 &self.filterable_attributes_rules,
1635 &self.sortable_fields,
1636 &self.asc_desc_fields,
1637 &self.distinct_field,
1638 )
1639 }
1640
1641 pub fn recompute_searchables(
1643 &mut self,
1644 wtxn: &mut heed::RwTxn<'_>,
1645 index: &Index,
1646 ) -> Result<()> {
1647 let searchable_fields = self
1648 .user_defined_searchable_attributes
1649 .as_ref()
1650 .map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
1651
1652 if let Some(searchable_fields) = searchable_fields {
1654 index.put_all_searchable_fields_from_fields_ids_map(
1655 wtxn,
1656 &searchable_fields,
1657 &self.fields_ids_map,
1658 )?;
1659 }
1660
1661 Ok(())
1662 }
1663}
1664
1665fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingConfigs> {
1666 let res: Result<_> = embedding_configs
1667 .into_iter()
1668 .map(
1669 |IndexEmbeddingConfig {
1670 name,
1671 config: EmbeddingConfig { embedder_options, prompt, quantized },
1672 ..
1673 }| {
1674 let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
1675
1676 let embedder = Arc::new(
1677 Embedder::new(embedder_options.clone(), 0)
1679 .map_err(crate::vector::Error::from)
1680 .map_err(crate::Error::from)?,
1681 );
1682 Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
1683 },
1684 )
1685 .collect();
1686 res.map(EmbeddingConfigs::new)
1687}
1688
1689fn validate_prompt(
1690 name: &str,
1691 new_prompt: Setting<String>,
1692 max_bytes: Setting<usize>,
1693) -> Result<Setting<String>> {
1694 match new_prompt {
1695 Setting::Set(template) => {
1696 let max_bytes = match max_bytes.set() {
1697 Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| {
1698 crate::error::UserError::InvalidSettingsDocumentTemplateMaxBytes {
1699 embedder_name: name.to_owned(),
1700 }
1701 })?,
1702 None => default_max_bytes(),
1703 };
1704
1705 let template = crate::prompt::Prompt::new(
1707 template,
1708 Some(max_bytes),
1710 )
1711 .map(|prompt| crate::prompt::PromptData::from(prompt).template)
1712 .map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?;
1713
1714 Ok(Setting::Set(template))
1715 }
1716 new => Ok(new),
1717 }
1718}
1719
1720pub fn validate_embedding_settings(
1721 settings: Setting<EmbeddingSettings>,
1722 name: &str,
1723) -> Result<Setting<EmbeddingSettings>> {
1724 let Setting::Set(settings) = settings else { return Ok(settings) };
1725 let EmbeddingSettings {
1726 source,
1727 model,
1728 revision,
1729 pooling,
1730 api_key,
1731 dimensions,
1732 document_template,
1733 document_template_max_bytes,
1734 url,
1735 request,
1736 response,
1737 search_embedder,
1738 mut indexing_embedder,
1739 distribution,
1740 headers,
1741 binary_quantized: binary_quantize,
1742 } = settings;
1743
1744 let document_template = validate_prompt(name, document_template, document_template_max_bytes)?;
1745
1746 if let Some(0) = dimensions.set() {
1747 return Err(crate::error::UserError::InvalidSettingsDimensions {
1748 embedder_name: name.to_owned(),
1749 }
1750 .into());
1751 }
1752
1753 if let Some(url) = url.as_ref().set() {
1754 url::Url::parse(url).map_err(|error| crate::error::UserError::InvalidUrl {
1755 embedder_name: name.to_owned(),
1756 inner_error: error,
1757 url: url.to_owned(),
1758 })?;
1759 }
1760
1761 if let Some(request) = request.as_ref().set() {
1762 let request = crate::vector::rest::Request::new(request.to_owned())
1763 .map_err(|error| crate::UserError::VectorEmbeddingError(error.into()))?;
1764 if let Some(response) = response.as_ref().set() {
1765 crate::vector::rest::Response::new(response.to_owned(), &request)
1766 .map_err(|error| crate::UserError::VectorEmbeddingError(error.into()))?;
1767 }
1768 }
1769
1770 let Some(inferred_source) = source.set() else {
1771 return Ok(Setting::Set(EmbeddingSettings {
1773 source,
1774 model,
1775 revision,
1776 pooling,
1777 api_key,
1778 dimensions,
1779 document_template,
1780 document_template_max_bytes,
1781 url,
1782 request,
1783 response,
1784 search_embedder,
1785 indexing_embedder,
1786 distribution,
1787 headers,
1788 binary_quantized: binary_quantize,
1789 }));
1790 };
1791 EmbeddingSettings::check_settings(
1792 name,
1793 inferred_source,
1794 NestingContext::NotNested,
1795 &model,
1796 &revision,
1797 &pooling,
1798 &dimensions,
1799 &api_key,
1800 &url,
1801 &request,
1802 &response,
1803 &document_template,
1804 &document_template_max_bytes,
1805 &headers,
1806 &search_embedder,
1807 &indexing_embedder,
1808 &binary_quantize,
1809 &distribution,
1810 )?;
1811 match inferred_source {
1812 EmbedderSource::OpenAi => {
1813 if let Setting::Set(model) = &model {
1814 let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str())
1815 .ok_or(crate::error::UserError::InvalidOpenAiModel {
1816 embedder_name: name.to_owned(),
1817 model: model.clone(),
1818 })?;
1819 if let Setting::Set(dimensions) = dimensions {
1820 if !model.supports_overriding_dimensions()
1821 && dimensions != model.default_dimensions()
1822 {
1823 return Err(crate::error::UserError::InvalidOpenAiModelDimensions {
1824 embedder_name: name.to_owned(),
1825 model: model.name(),
1826 dimensions,
1827 expected_dimensions: model.default_dimensions(),
1828 }
1829 .into());
1830 }
1831 if dimensions > model.default_dimensions() {
1832 return Err(crate::error::UserError::InvalidOpenAiModelDimensionsMax {
1833 embedder_name: name.to_owned(),
1834 model: model.name(),
1835 dimensions,
1836 max_dimensions: model.default_dimensions(),
1837 }
1838 .into());
1839 }
1840 }
1841 }
1842 }
1843 EmbedderSource::Ollama
1844 | EmbedderSource::HuggingFace
1845 | EmbedderSource::UserProvided
1846 | EmbedderSource::Rest => {}
1847 EmbedderSource::Composite => {
1848 if let Setting::Set(embedder) = &search_embedder {
1849 if let Some(source) = embedder.source.set() {
1850 let search_embedder = match embedder.search_embedder.clone() {
1851 Setting::Set(search_embedder) => Setting::Set(deserialize_sub_embedder(
1852 search_embedder,
1853 name,
1854 NestingContext::Search,
1855 )?),
1856 Setting::Reset => Setting::Reset,
1857 Setting::NotSet => Setting::NotSet,
1858 };
1859 let indexing_embedder = match embedder.indexing_embedder.clone() {
1860 Setting::Set(indexing_embedder) => Setting::Set(deserialize_sub_embedder(
1861 indexing_embedder,
1862 name,
1863 NestingContext::Search,
1864 )?),
1865 Setting::Reset => Setting::Reset,
1866 Setting::NotSet => Setting::NotSet,
1867 };
1868 EmbeddingSettings::check_nested_source(name, source, NestingContext::Search)?;
1869 EmbeddingSettings::check_settings(
1870 name,
1871 source,
1872 NestingContext::Search,
1873 &embedder.model,
1874 &embedder.revision,
1875 &embedder.pooling,
1876 &embedder.dimensions,
1877 &embedder.api_key,
1878 &embedder.url,
1879 &embedder.request,
1880 &embedder.response,
1881 &embedder.document_template,
1882 &embedder.document_template_max_bytes,
1883 &embedder.headers,
1884 &search_embedder,
1885 &indexing_embedder,
1886 &embedder.binary_quantized,
1887 &embedder.distribution,
1888 )?;
1889 } else {
1890 return Err(UserError::MissingSourceForNested {
1891 embedder_name: NestingContext::Search.embedder_name_with_context(name),
1892 }
1893 .into());
1894 }
1895 }
1896
1897 indexing_embedder = if let Setting::Set(mut embedder) = indexing_embedder {
1898 embedder.document_template = validate_prompt(
1899 name,
1900 embedder.document_template,
1901 embedder.document_template_max_bytes,
1902 )?;
1903
1904 if let Some(source) = embedder.source.set() {
1905 let search_embedder = match embedder.search_embedder.clone() {
1906 Setting::Set(search_embedder) => Setting::Set(deserialize_sub_embedder(
1907 search_embedder,
1908 name,
1909 NestingContext::Indexing,
1910 )?),
1911 Setting::Reset => Setting::Reset,
1912 Setting::NotSet => Setting::NotSet,
1913 };
1914 let indexing_embedder = match embedder.indexing_embedder.clone() {
1915 Setting::Set(indexing_embedder) => Setting::Set(deserialize_sub_embedder(
1916 indexing_embedder,
1917 name,
1918 NestingContext::Indexing,
1919 )?),
1920 Setting::Reset => Setting::Reset,
1921 Setting::NotSet => Setting::NotSet,
1922 };
1923 EmbeddingSettings::check_nested_source(name, source, NestingContext::Indexing)?;
1924 EmbeddingSettings::check_settings(
1925 name,
1926 source,
1927 NestingContext::Indexing,
1928 &embedder.model,
1929 &embedder.revision,
1930 &embedder.pooling,
1931 &embedder.dimensions,
1932 &embedder.api_key,
1933 &embedder.url,
1934 &embedder.request,
1935 &embedder.response,
1936 &embedder.document_template,
1937 &embedder.document_template_max_bytes,
1938 &embedder.headers,
1939 &search_embedder,
1940 &indexing_embedder,
1941 &embedder.binary_quantized,
1942 &embedder.distribution,
1943 )?;
1944 } else {
1945 return Err(UserError::MissingSourceForNested {
1946 embedder_name: NestingContext::Indexing.embedder_name_with_context(name),
1947 }
1948 .into());
1949 }
1950 Setting::Set(embedder)
1951 } else {
1952 indexing_embedder
1953 };
1954 }
1955 }
1956 Ok(Setting::Set(EmbeddingSettings {
1957 source,
1958 model,
1959 revision,
1960 pooling,
1961 api_key,
1962 dimensions,
1963 document_template,
1964 document_template_max_bytes,
1965 url,
1966 request,
1967 response,
1968 search_embedder,
1969 indexing_embedder,
1970 distribution,
1971 headers,
1972 binary_quantized: binary_quantize,
1973 }))
1974}
1975
1976fn deserialize_sub_embedder(
1977 sub_embedder: serde_json::Value,
1978 embedder_name: &str,
1979 context: NestingContext,
1980) -> std::result::Result<SubEmbeddingSettings, UserError> {
1981 match deserr::deserialize::<_, _, deserr::errors::JsonError>(sub_embedder) {
1982 Ok(sub_embedder) => Ok(sub_embedder),
1983 Err(error) => {
1984 let message = format!("{error}{}", context.nesting_embedders());
1985 Err(UserError::InvalidSettingsEmbedder {
1986 embedder_name: context.embedder_name_with_context(embedder_name),
1987 message,
1988 })
1989 }
1990 }
1991}
1992
1993#[cfg(test)]
1994#[path = "test_settings.rs"]
1995mod tests;