1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
use crate::simple::internal::{string_keywords::SplitContext, SearchTopScores};
use kstring::KString;
// Conditionally select hash map type based on feature flags:
#[cfg(feature = "gxhash")]
type HashMap<K, V> = std::collections::HashMap<K, V, gxhash::GxBuildHasher>;
#[cfg(feature = "ahash")]
use ahash::HashMap;
#[cfg(feature = "rustc-hash")]
use rustc_hash::FxHashMap as HashMap;
#[cfg(all(
not(feature = "ahash"),
not(feature = "gxhash"),
not(feature = "rustc-hash")
))]
use std::collections::HashMap;
// Static dependencies:
use std::hash::Hash;
// -----------------------------------------------------------------------------
impl<'a, K: 'a + Hash + Ord> crate::simple::SearchIndex<K> {
/// This search function will return keys as the search results. Each
/// resulting key can then be used to retrieve the full record from its
/// collection. _This search method accepts multiple keywords in the search
/// string._ Search keywords must be an exact match.
///
/// With this search type, the logical conjuction for multiple keywords is
/// `Or`. For example, a search of `this that` will return records
/// containing keywords `this` or `that`. In other words, _any_ keyword can
/// be present in a record for it to be returned as a result.
///
/// For this search, the results are returned in order of descending
/// relevance. Records containing both keywords `this` and `that` will be
/// the top results. This conjuction uses more CPU resources than `And`
/// because the keyword hits must be tallied and sorted.
///
/// If your collection contains less than 10,000 records, `Or` might be a
/// good place to start. To me, `Or` effectively feels like "using these
/// keywords, find a record I might want" which works well if there aren't
/// too many records. It's also worth noting that this conjuction uses more
/// CPU resources because the results must be tallied and sorted in order of
/// relevance.
///
/// Search only supports exact keyword matches and does not use fuzzy
/// matching. Consider providing the `autocomplete` feature to your users as
/// an ergonomic alternative to fuzzy matching.
///
/// Basic usage:
///
/// ```ignore
/// # use indicium::simple::{AutocompleteType, Indexable, SearchIndex, SearchType};
/// # use pretty_assertions::assert_eq;
/// #
/// # struct MyStruct {
/// # title: String,
/// # year: u16,
/// # body: String,
/// # }
/// #
/// # impl Indexable for MyStruct {
/// # fn strings(&self) -> Vec<String> {
/// # vec![
/// # self.title.clone(),
/// # self.year.to_string(),
/// # self.body.clone(),
/// # ]
/// # }
/// # }
/// #
/// # let my_vec = vec![
/// # MyStruct {
/// # title: "Harold Godwinson".to_string(),
/// # year: 1066,
/// # body: "Last crowned Anglo-Saxon king of England.".to_string(),
/// # },
/// # MyStruct {
/// # title: "Edgar Ætheling".to_string(),
/// # year: 1066,
/// # body: "Last male member of the royal house of Cerdic of Wessex.".to_string(),
/// # },
/// # MyStruct {
/// # title: "William the Conqueror".to_string(),
/// # year: 1066,
/// # body: "First Norman monarch of England.".to_string(),
/// # },
/// # MyStruct {
/// # title: "William Rufus".to_string(),
/// # year: 1087,
/// # body: "Third son of William the Conqueror.".to_string(),
/// # },
/// # MyStruct {
/// # title: "Henry Beauclerc".to_string(),
/// # year: 1100,
/// # body: "Fourth son of William the Conqueror.".to_string(),
/// # },
/// # ];
/// #
/// # let mut search_index: SearchIndex<usize> = SearchIndex::default();
/// #
/// # my_vec
/// # .iter()
/// # .enumerate()
/// # .for_each(|(index, element)|
/// # search_index.insert(&index, element)
/// # );
/// #
/// let hit_counts = search_index.search_or(&20, "last England");
/// assert_eq!(hit_counts, vec![&0, &1, &2]);
/// ```
#[tracing::instrument(level = "trace", name = "or search", skip(self))]
pub(crate) fn search_or(
&'a self,
maximum_search_results: usize,
string: &'a str,
) -> Vec<&'a K> {
// Split search `String` into keywords (according to the `SearchIndex`
// settings). `string_keywords` will allow "use entire string as a
// keyword" if enabled in user settings:
let keywords: Vec<KString> =
self.string_keywords(string, &SplitContext::Searching);
// For debug builds:
#[cfg(debug_assertions)]
tracing::debug!("searching: {:?}", keywords);
// This `HashMap` is used to count the number of hits for each
// resulting key. This is so we can return search results in order of
// relevance:
let mut hit_counts: HashMap<&K, usize> = HashMap::default();
// Get each keyword from our search index, record the resulting keys in
// a our `HashMap`, and track the hit-count for each key:
for keyword in keywords {
// Search for keyword in our `HashMap`:
self.internal_keyword_search(&keyword)
// For each resulting key from the keyword search:
.for_each(|key| match hit_counts.get_mut(key) {
// Add "hit" to counter for an already existing key:
Some(result_entry) => *result_entry += 1,
// No record for this key, initialize to one hit:
None => {
hit_counts.insert(key, 1);
}
}); // for_each
} // for_each
// At this point, we have a list of resulting keys in a `HashMap`. The
// hash map value holds the number of times each key has been returned
// in the above keywords search.
// This structure will track the top scoring keys:
let mut top_scores: SearchTopScores<K> =
SearchTopScores::with_capacity(maximum_search_results);
// Populate the top scores by iterating over each key's tally-count:
#[allow(clippy::needless_for_each, reason = "better performance")]
hit_counts
// Iterate over keys in the hash map:
.into_iter()
// Collect the tuple elements into a `Vec`:
.for_each(|(key, hits)| top_scores.insert(key, hits));
// Return the search results to the user:
top_scores
// Get the top scoring results from the `SearchTopScores` struct:
.results()
// Remove the hit-count from the tuple, returning only the key:
.map(|(key, _hits)| key)
// Collect the keys into a `Vec`:
.collect()
} // fn
} // impl