1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
use crate::simple::search_index::SearchIndex;
use kstring::KString;
// -----------------------------------------------------------------------------
impl<K: Clone + Ord> SearchIndex<K> {
// -------------------------------------------------------------------------
//
/// **This method is only available in debug builds.**
///
/// Extremely high repetitions of single keywords can reduce the performance
/// of the search index. `profile` allows you to see the most repeated
/// keywords in your search index. Using the output from this method, you
/// can manually check for keywords that have high repetitions and that add
/// little value (such as conjuctions, articles, and prepositions) and put
/// them into your keyword exclusion list.
///
/// See also: the [`exclude_keywords`] method for the builder pattern.
///
/// [`exclude_keywords`]: struct.SearchIndexBuilder.html#method.exclude_keywords
#[tracing::instrument(level = "trace", name = "search index profile", skip(self))]
pub fn profile(&self, count: usize) -> impl Iterator<Item = (&str, usize)> {
// Get a list of all keywords and the number of attached keys for each
// keyword. For example: keyword "supercalifragilisticexpialidocious"
// has 28 keys (or records) attached to it:
let mut keywords: Vec<(&KString, usize)> = self
.b_tree_map
// Iterate over every entry (representing a keyword) in the search
// index:
.iter()
// Map `(String, BTreeSet<K>)` to `(String, usize)` by getting the
// length of the `BTreeSet`.
.map(|(key, value)| (key, value.len()))
// Collect the keyword and key count into a `Vec`:
.collect();
// Sort keywords by number of attached keys (i.e. associated records),
// in descending order:
keywords.sort_unstable_by(|a, b| b.1.cmp(&a.1));
// Return only `count` number of records to the caller:
keywords
.into_iter()
.take(count)
.map(|(kstring, entries)| (kstring.as_str(), entries))
} // fn
} // impl