1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
use crate::simple::internal::StrsimTopScores;
use crate::simple::search_index::SearchIndex;
use kstring::KString;
use std::{cmp::Ord, collections::BTreeSet, hash::Hash};
use strsim::jaro;
// -----------------------------------------------------------------------------
impl<K: Hash + Ord> SearchIndex<K> {
// -------------------------------------------------------------------------
//
/// Scans the entire search index for the closest matching _n_ keywords
/// using the Jaro string similarity metric from Danny Guo's
/// [strsim](https://crates.io/crates/strsim) crate.
///
/// When the user's last (partial) keyword that is meant to be autocompleted
/// returns no matches, these `strsim_autocomplete_*` methods can be used to
/// find the best match for substitution.
///
/// * `index_range` limits which keywords to compare the user's keyword
/// against. For example, if the `index_range` is "super" and the user's
/// keyword is "supersonic": only search index keywords beginning with
/// "super" will be compared against the user's keyword: "supersonic"
/// against "superalloy", "supersonic" against "supergiant" and so on...
///
/// * `key_set` limits which keywords to compare the user's keyword
/// against. For a search index keyword to be considered as a fuzzy match,
/// it must contain at least one key that is in this key set. This is how
/// fuzzy matching is made contextual.
//
// Note: these `strsim_autocomplete_*` methods are very similar and may seem
// repetitive with a lot of boiler plate. These were intentionally made more
// "concrete" and less modular in order to be more efficient.
pub(crate) fn strsim_autocomplete_context_jaro(
&self,
index_range: &str,
key_set: &BTreeSet<&K>,
user_keyword: &str,
) -> impl Iterator<Item = (&KString, &BTreeSet<K>)> {
// This structure will track the top scoring keywords:
let mut top_scores: StrsimTopScores<K, f64> =
StrsimTopScores::with_capacity(self.maximum_autocomplete_options);
// Scan the search index for the highest scoring keywords:
self.b_tree_map
// Get matching keywords starting with (partial) keyword string:
.range(KString::from_ref(index_range)..)
// We did not specify an end bound for our `range` function (see
// above.) `range` will return _every_ keyword greater than the
// supplied keyword. The below `take_while` will effectively break
// iteration when we reach a keyword that does not start with our
// supplied (partial) keyword.
.take_while(|(index_keyword, _keys)| index_keyword.starts_with(index_range))
// Only examine search index keywords that intersect with the caller
// provided key-set. This ensures contextual fuzzy matching. This
// will filter out search index keywords that don't contain any keys
// from the caller provided key set:
.filter(|(_index_keyword, index_keys)|
key_set.is_empty() ||
index_keys.iter().any(|index_key| key_set.contains(index_key))
) // filter
// For each keyword in the search index:
.for_each(|(index_keyword, index_keys)| {
// Using this keyword from the search index, calculate its
// similarity to the user's keyword:
let score = jaro(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
// Return the top scoring keywords that could be used as autocomplete
// options, and their keys, to the caller:
top_scores.results()
} // fn
} // impl