1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#![allow(clippy::inline_always)]
use kstring::KString;
// -----------------------------------------------------------------------------
impl<K: std::hash::Hash + Ord> crate::simple::search_index::SearchIndex<K> {
/// Scans the entire search index for the closest matching _n_ keywords
/// using the provided keyword and configured string similarity metric. This
/// feature relies on the [strsim](https://crates.io/crates/strsim)
/// crate.
///
/// When the user's last (partial) keyword that is meant to be autocompleted
/// returns no matches, this can be used to find the best matches for
/// substitution.
///
/// Only keywords associated with the provided `preceding_results` key-set
/// can potentially be returned. This effectively makes any fuzzy
/// autocompletion contextual.
///
/// # Input
///
/// * `preceding_keywords` · If the search string was `He who knows nothing,
/// loves nothing` the “preceding” keywords would be `["he", "who",
/// "knows", "nothing", "loves"]`.
///
/// This collection of keywords is used to prevent previously typed
/// keywords from being suggested. In this case, the system would _not_
/// suggest `nothing` as an autocomplete keyword since it's already
/// present in the search string.
///
/// * `preceding_results` · A collection of keys for the preceding keywords.
/// Using the above example, these keys are the product of searching for
/// `he who knows nothing loves`.
///
/// These keys represent records in the search index. They're used to
/// constrain the keywords that are returned from this method to the
/// caller, and ensure that all returned keywords relate to these keys.
/// This is how contextual fuzzy matching is acheived.
///
/// * `last_keyword` · If the search string was `He who knows nothing,
/// loves nothing` the “last” keyword would be `nothing`.
///
/// This keyword is used to search the search index. For example, this
/// could potentially return `nothingness`, and `nothings` as
/// autocompletion options if those words were present in the index.
///
/// # Output
///
/// This method returns an iterator over the top _n_ autocompletion options.
///
/// Each item the returned iterator is comprised of a keyword, and the
/// records associated with each keyword.
///
/// The number of autocompletion options are defined by the
/// `maximum_autocomplete_options` option in the search index.
///
/// If no keywords or reasonable matches are found, this method will return
/// an empty iterator.
///
/// # Notes
///
/// * This method differs from `strsim_context` in that this is a generic
/// method. This method will be monomorphized for each `strsim` string
/// similarity metric (`DamerauLevenshtein`, `Jaro`, `Osa`, etc.)
///
/// `strsim_context` will call these monomorphized methods using
/// dynamic-dispatch, based on the search index's string similarity metric
/// settings.
#[inline(always)]
pub(crate) fn strsim_context_metric<'s, M>(
&'s self,
preceding_keywords: &[KString],
preceding_results: &std::collections::BTreeSet<&'s K>,
last_keyword: &str,
index_range: &str,
top_scores: &mut crate::simple::internal::fuzzers::FuzzyTopScores::<'s, K, f64>,
)
where M: crate::simple::internal::fuzzers::strsim::Metric {
// This moves a call to `is_empty` from a `filter` function, and outside
// of a hot loop:
let preceding_results_is_empty: bool = preceding_results.is_empty();
// Scan the search index for the highest scoring keywords:
self.b_tree_map
// Get matching keywords starting with (partial) keyword string:
.range(KString::from_ref(index_range)..)
// We did not specify an end bound for our `range` function (see
// above.) `range` will return _every_ keyword greater than the
// supplied keyword. The below `take_while` will effectively break
// iteration when we reach a keyword that does not start with our
// supplied (partial) keyword.
.take_while(|(index_keyword, _keys)|
index_keyword.starts_with::<&str>(index_range.as_ref())
)
// Filter out repetitious keywords. If the keyword has already been
// used in the preceding keywords, or if it matches what the user
// has currently typed in (as the last partial keyword we're
// autocompleting), don't return it as an option:
.filter(|(index_keyword, _index_keys)|
// For example: if the user typed "gold", and the
// autocompletion options were "gold" and "golden", this filter
// would remove "gold" as an autocompletion option because it's
// currently what's already typed.
//
// This is commented-out because returning "gold" as an option
// provides feedback to the user that their keyword exists in
// the index.
// index_keyword.as_str() != last_keyword &&
// If the keyword is already present in the user's search
// string, then don't suggest it as an autocompletion option:
!preceding_keywords.contains(index_keyword)
) // filter
// Only examine search index keywords that intersect with the caller
// provided key-set. This ensures contextual fuzzy matching. This
// will filter out search index keywords that aren't related to the
// keys from the caller-provided `preceding_results` key set:
.filter(|(_index_keyword, index_keys)| {
preceding_results_is_empty || index_keys
.iter()
.any(|index_key| preceding_results.contains(index_key))
}) // filter
// For each keyword in the search index:
.for_each(|(index_keyword, index_keys)| {
// Using this keyword from the search index, calculate its
// similarity to the user's keyword:
let score = M::similarity(index_keyword, last_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score);
} // if
}); // for_each
} // fn
} // impl