use std::collections::BTreeMap;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
index
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Words]);
})
.unwrap();
index
.add_documents(documents!([
{
"id": 0,
"text": "the quick brown fox jumps over the lazy dog"
},
{
"id": 1,
"text": "the quick brown foxes jump over the lazy dog"
},
{
"id": 2,
"text": "the quick brown fax sends a letter to the dog"
},
{
"id": 3,
"text": "the quickest brownest fox jumps over the laziest dog"
},
{
"id": 4,
"text": "a fox doesn't quack, that crown goes to the duck."
},
{
"id": 5,
"text": "the quicker browner fox jumped over the lazier dog"
},
{
"id": 6,
"text": "the extravagant fox skyrocketed over the languorous dog" },
{
"id": 7,
"text": "the quick brown fox jumps over the lazy"
},
{
"id": 8,
"text": "the quick brown fox jumps over the"
},
{
"id": 9,
"text": "the quick brown fox jumps over"
},
{
"id": 10,
"text": "the quick brown fox jumps"
},
{
"id": 11,
"text": "the quick brown fox"
},
{
"id": 12,
"text": "the quick brown"
},
{
"id": 13,
"text": "the quick"
},
{
"id": 14,
"text": "netwolk interconections sunflawar"
},
{
"id": 15,
"text": "network interconnections sunflawer"
},
{
"id": 16,
"text": "network interconnection sunflower"
},
{
"id": 17,
"text": "network interconnection sun flower"
},
{
"id": 18,
"text": "network interconnection sunflowering"
},
{
"id": 19,
"text": "network interconnection sun flowering"
},
{
"id": 20,
"text": "network interconnection sunflowar"
},
{
"id": 21,
"text": "the fast brownish fox jumps over the lackadaisical dog"
},
{
"id": 22,
"text": "the quick brown fox jumps over the lackadaisical dog"
},
{
"id": 23,
"text": "the quivk brown fox jumps over the lazy dog"
},
{
"id": 24,
"tolerant_text": "the quick brown fox jumps over the lazy dog",
},
{
"id": 25,
"tolerant_text": "the quivk brown fox jumps over the lazy dog",
},
]))
.unwrap();
index
}
#[test]
fn test_no_typo() {
let index = create_index();
index
.update_settings(|s| {
s.set_autorize_typos(false);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
insta::assert_snapshot!(format!("{document_scores:?}"), @"[[]]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quick brown fox jumps over the lazy dog\"",
]
"###);
}
#[test]
fn test_default_typo() {
let index = create_index();
let txn = index.read_txn().unwrap();
let ot = index.min_word_len_one_typo(&txn).unwrap();
let tt = index.min_word_len_two_typos(&txn).unwrap();
insta::assert_debug_snapshot!(ot, @"5");
insta::assert_debug_snapshot!(tt, @"9");
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 23]");
insta::assert_snapshot!(format!("{document_scores:#?}"), @r###"
[
[],
[],
]
"###);
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quick brown fox jumps over the lazy dog\"",
"\"the quivk brown fox jumps over the lazy dog\"",
]
"###);
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quack brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
insta::assert_snapshot!(format!("{document_scores:?}"), @"[[]]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quick brown fox jumps over the lazy dog\"",
]
"###);
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quicest brownest fox jummps over the laziest dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
insta::assert_snapshot!(format!("{document_scores:?}"), @"[[]]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quickest brownest fox jumps over the laziest dog\"",
]
"###);
}
#[test]
fn test_phrase_no_typo_allowed() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the \"quick brewn\" fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
insta::assert_snapshot!(format!("{document_scores:?}"), @"[]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @"[]");
}
#[test]
fn test_typo_exact_word() {
let index = create_index();
index
.update_settings(|s| {
s.set_exact_words(
["quick", "quack", "sunflower"].iter().map(ToString::to_string).collect(),
)
})
.unwrap();
let txn = index.read_txn().unwrap();
let ot = index.min_word_len_one_typo(&txn).unwrap();
let tt = index.min_word_len_two_typos(&txn).unwrap();
insta::assert_debug_snapshot!(ot, @"5");
insta::assert_debug_snapshot!(tt, @"9");
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
insta::assert_snapshot!(format!("{document_scores:?}"), @"[[]]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quick brown fox jumps over the lazy dog\"",
]
"###);
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quack brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
insta::assert_snapshot!(format!("{document_scores:?}"), @"[]");
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quicest brownest fox jummps over the laziest dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
insta::assert_snapshot!(format!("{document_scores:?}"), @"[[]]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quickest brownest fox jumps over the laziest dog\"",
]
"###);
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sunflower");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 17, 18]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"network interconnection sunflower\"",
"\"network interconnection sun flower\"",
"\"network interconnection sunflowering\"",
]
"###);
}
#[test]
fn test_typo_exact_attribute() {
let index = create_index();
index
.update_settings(|s| {
s.set_exact_attributes(["text"].iter().map(ToString::to_string).collect());
s.set_searchable_fields(
["text", "tolerant_text"].iter().map(ToString::to_string).collect(),
);
s.set_exact_words(["quivk"].iter().map(ToString::to_string).collect())
})
.unwrap();
let txn = index.read_txn().unwrap();
let ot = index.min_word_len_one_typo(&txn).unwrap();
let tt = index.min_word_len_two_typos(&txn).unwrap();
insta::assert_debug_snapshot!(ot, @"5");
insta::assert_debug_snapshot!(tt, @"9");
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 24, 25]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quick brown fox jumps over the lazy dog\"",
"__does_not_exist__",
"__does_not_exist__",
]
"###);
let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"__does_not_exist__",
"\"the quick brown fox jumps over the lazy dog\"",
"\"the quivk brown fox jumps over the lazy dog\"",
]
"###);
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quidk brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[24, 25]");
insta::assert_snapshot!(format!("{document_scores:#?}"), @r###"
[
[],
[],
]
"###);
let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quick brown fox jumps over the lazy dog\"",
"\"the quivk brown fox jumps over the lazy dog\"",
]
"###);
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quivk brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 25]");
insta::assert_snapshot!(format!("{document_scores:#?}"), @r###"
[
[],
[],
]
"###);
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quivk brown fox jumps over the lazy dog\"",
"__does_not_exist__",
]
"###);
let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"__does_not_exist__",
"\"the quivk brown fox jumps over the lazy dog\"",
]
"###);
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quicest brownest fox jummps over the laziest dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
insta::assert_snapshot!(format!("{document_scores:?}"), @"[]");
}
#[test]
fn test_ngram_typos() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the extra lagant fox skyrocketed over the languorous dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
insta::assert_snapshot!(format!("{document_scores:?}"), @"[[]]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the extravagant fox skyrocketed over the languorous dog\"",
]
"###);
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the ex tra lagant fox skyrocketed over the languorous dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
insta::assert_snapshot!(format!("{document_scores:#?}"), @"[]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @"[]");
}
#[test]
fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Typo]);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids: ids_1, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{ids_1:?}"), @"[0, 23, 7, 8, 9, 22, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &ids_1);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quick brown fox jumps over the lazy dog\"",
"\"the quivk brown fox jumps over the lazy dog\"",
"\"the quick brown fox jumps over the lazy\"",
"\"the quick brown fox jumps over the\"",
"\"the quick brown fox jumps over\"",
"\"the quick brown fox jumps over the lackadaisical dog\"",
"\"the quick brown fox jumps\"",
"\"the quick brown fox\"",
"\"the quick brown foxes jump over the lazy dog\"",
"\"the quick brown fax sends a letter to the dog\"",
"\"the quick brown\"",
"\"the quick\"",
"\"a fox doesn't quack, that crown goes to the duck.\"",
"\"the quickest brownest fox jumps over the laziest dog\"",
"\"the quicker browner fox jumped over the lazier dog\"",
"\"the extravagant fox skyrocketed over the languorous dog\"",
"\"the fast brownish fox jumps over the lackadaisical dog\"",
]
"###);
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
})
.unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids: ids_2, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{ids_2:?}"), @"[0, 23, 7, 8, 9, 22, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
assert_eq!(ids_1, ids_2);
}
#[test]
fn test_typo_bucketing() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sunflower");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 15, 16, 17, 18, 20]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"netwolk interconections sunflawar\"",
"\"network interconnections sunflawer\"",
"\"network interconnection sunflower\"",
"\"network interconnection sun flower\"",
"\"network interconnection sunflowering\"",
"\"network interconnection sunflowar\"",
]
"###);
drop(txn);
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Typo]);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sunflower");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 18, 17, 20, 15, 14]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"network interconnection sunflower\"",
"\"network interconnection sunflowering\"",
"\"network interconnection sun flower\"",
"\"network interconnection sunflowar\"",
"\"network interconnections sunflawer\"",
"\"netwolk interconections sunflawar\"",
]
"###);
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sun flower");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[17, 19, 16, 18, 20, 15]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"network interconnection sun flower\"",
"\"network interconnection sun flowering\"",
"\"network interconnection sunflower\"",
"\"network interconnection sunflowering\"",
"\"network interconnection sunflowar\"",
"\"network interconnections sunflawer\"",
]
"###);
}
#[test]
fn test_typo_synonyms() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Typo]);
let mut synonyms = BTreeMap::new();
synonyms.insert("lackadaisical".to_owned(), vec!["lazy".to_owned()]);
synonyms.insert("fast brownish".to_owned(), vec!["quick brown".to_owned()]);
s.set_synonyms(synonyms);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lackadaisical dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 22, 23]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quick brown fox jumps over the lazy dog\"",
"\"the quick brown fox jumps over the lackadaisical dog\"",
"\"the quivk brown fox jumps over the lazy dog\"",
]
"###);
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the fast brownish fox jumps over the lackadaisical dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0, 22]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the fast brownish fox jumps over the lackadaisical dog\"",
"\"the quick brown fox jumps over the lazy dog\"",
"\"the quick brown fox jumps over the lackadaisical dog\"",
]
"###);
}