indicium 0.6.9

Simple in-memory search for collections and key-value stores.
Documentation
use crate::simple::internal::{string_keywords::SplitContext, SearchTopScores};
use kstring::KString;

// Conditionally select hash map type based on feature flags:
#[cfg(feature = "gxhash")]
type HashMap<K, V> = std::collections::HashMap<K, V, gxhash::GxBuildHasher>;

#[cfg(feature = "ahash")]
use ahash::HashMap;

#[cfg(feature = "rustc-hash")]
use rustc_hash::FxHashMap as HashMap;

#[cfg(all(
    not(feature = "ahash"),
    not(feature = "gxhash"),
    not(feature = "rustc-hash")
))]
use std::collections::HashMap;

// Static dependencies:
use std::hash::Hash;

// -----------------------------------------------------------------------------

impl<'a, K: 'a + Hash + Ord> crate::simple::SearchIndex<K> {
    /// This search function will return keys as the search results. Each
    /// resulting key can then be used to retrieve the full record from its
    /// collection. _This search method accepts multiple keywords in the search
    /// string._ Search keywords must be an exact match.
    ///
    /// With this search type, the logical conjuction for multiple keywords is
    /// `Or`. For example, a search of `this that` will return records
    /// containing keywords `this` or `that`. In other words, _any_ keyword can
    /// be present in a record for it to be returned as a result.
    ///
    /// For this search, the results are returned in order of descending
    /// relevance. Records containing both keywords `this` and `that` will be
    /// the top results. This conjuction uses more CPU resources than `And`
    /// because the keyword hits must be tallied and sorted.
    ///
    /// If your collection contains less than 10,000 records, `Or` might be a
    /// good place to start. To me, `Or` effectively feels like "using these
    /// keywords, find a record I might want" which works well if there aren't
    /// too many records. It's also worth noting that this conjuction uses more
    /// CPU resources because the results must be tallied and sorted in order of
    /// relevance.
    ///
    /// Search only supports exact keyword matches and does not use fuzzy
    /// matching. Consider providing the `autocomplete` feature to your users as
    /// an ergonomic alternative to fuzzy matching.
    ///
    /// Basic usage:
    ///
    /// ```ignore
    /// # use indicium::simple::{AutocompleteType, Indexable, SearchIndex, SearchType};
    /// # use pretty_assertions::assert_eq;
    /// #
    /// # struct MyStruct {
    /// #   title: String,
    /// #   year: u16,
    /// #   body: String,
    /// # }
    /// #
    /// # impl Indexable for MyStruct {
    /// #   fn strings(&self) -> Vec<String> {
    /// #       vec![
    /// #           self.title.clone(),
    /// #           self.year.to_string(),
    /// #           self.body.clone(),
    /// #       ]
    /// #   }
    /// # }
    /// #
    /// # let my_vec = vec![
    /// #   MyStruct {
    /// #       title: "Harold Godwinson".to_string(),
    /// #       year: 1066,
    /// #       body: "Last crowned Anglo-Saxon king of England.".to_string(),
    /// #   },
    /// #   MyStruct {
    /// #       title: "Edgar Ætheling".to_string(),
    /// #       year: 1066,
    /// #       body: "Last male member of the royal house of Cerdic of Wessex.".to_string(),
    /// #   },
    /// #   MyStruct {
    /// #       title: "William the Conqueror".to_string(),
    /// #       year: 1066,
    /// #       body: "First Norman monarch of England.".to_string(),
    /// #   },
    /// #   MyStruct {
    /// #       title: "William Rufus".to_string(),
    /// #       year: 1087,
    /// #       body: "Third son of William the Conqueror.".to_string(),
    /// #   },
    /// #   MyStruct {
    /// #       title: "Henry Beauclerc".to_string(),
    /// #       year: 1100,
    /// #       body: "Fourth son of William the Conqueror.".to_string(),
    /// #   },
    /// # ];
    /// #
    /// # let mut search_index: SearchIndex<usize> = SearchIndex::default();
    /// #
    /// # my_vec
    /// #   .iter()
    /// #   .enumerate()
    /// #   .for_each(|(index, element)|
    /// #       search_index.insert(&index, element)
    /// #   );
    /// #
    /// let hit_counts = search_index.search_or(&20, "last England");
    /// assert_eq!(hit_counts, vec![&0, &1, &2]);
    /// ```
    #[tracing::instrument(level = "trace", name = "or search", skip(self))]
    pub(crate) fn search_or(
        &'a self,
        maximum_search_results: usize,
        string: &'a str,
    ) -> Vec<&'a K> {
        // Split search `String` into keywords (according to the `SearchIndex`
        // settings). `string_keywords` will allow "use entire string as a
        // keyword" if enabled in user settings:
        let keywords: Vec<KString> =
            self.string_keywords(string, &SplitContext::Searching);

        // For debug builds:
        #[cfg(debug_assertions)]
        tracing::debug!("searching: {:?}", keywords);

        // This `HashMap` is used to count the number of hits for each
        // resulting key. This is so we can return search results in order of
        // relevance:
        let mut hit_counts: HashMap<&K, usize> = HashMap::default();

        // Get each keyword from our search index, record the resulting keys in
        // a our `HashMap`, and track the hit-count for each key:
        for keyword in keywords {
            // Search for keyword in our `HashMap`:
            self.internal_keyword_search(&keyword)
                // For each resulting key from the keyword search:
                .for_each(|key| match hit_counts.get_mut(key) {
                    // Add "hit" to counter for an already existing key:
                    Some(result_entry) => *result_entry += 1,
                    // No record for this key, initialize to one hit:
                    None => {
                        hit_counts.insert(key, 1);
                    }
                }); // for_each
        } // for_each

        // At this point, we have a list of resulting keys in a `HashMap`. The
        // hash map value holds the number of times each key has been returned
        // in the above keywords search.

        // This structure will track the top scoring keys:

        let mut top_scores: SearchTopScores<K> =
            SearchTopScores::with_capacity(maximum_search_results);

        // Populate the top scores by iterating over each key's tally-count:

        #[allow(clippy::needless_for_each, reason = "better performance")]
        hit_counts
            // Iterate over keys in the hash map:
            .into_iter()
            // Collect the tuple elements into a `Vec`:
            .for_each(|(key, hits)| top_scores.insert(key, hits));

        // Return the search results to the user:
        top_scores
            // Get the top scoring results from the `SearchTopScores` struct:
            .results()
            // Remove the hit-count from the tuple, returning only the key:
            .map(|(key, _hits)| key)
            // Collect the keys into a `Vec`:
            .collect()
    } // fn
} // impl