1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
//! # fuzzy_match
//!
//! `fuzzy_match` provides functionality for finding the best match from a set of strings, optionally with items
//! assosciated with each candidate string. This crate is based in large part on the awesome
//! [`fuzzy_match` Ruby gem](https://github.com/seamusabshere/fuzzy_match), but this crate only implements the basic
//! functionality and skips the more advanced functionality for now.
#![cfg_attr(all(feature = "nightly", test), feature(test))]

extern crate sliding_windows;
#[cfg(all(feature = "nightly", test))] extern crate test;

pub mod algorithms;
pub(crate) mod util;

/// Fuzzy finds a set of string-item pairs using a Sorensen Dice coefficient and Levenshtein for breaking ties. May
/// return None if no match is similar. This consumes the input vector. See
/// [`fuzzy_match_with_algorithms`](fuzzy_match::fuzzy_match_with_algorithms) for additional details.
///
/// # Examples
/// ```rust
/// use fuzzy_match::fuzzy_match;
///
/// let haystack = vec![("rust", 0), ("java", 1), ("lisp", 2)];
/// assert_eq!(Some(0), fuzzy_match("bust", haystack));
/// ```
/// 
/// # Panics
/// This function will panic if the haystack is empty (length 0).
pub fn fuzzy_match<'a, T, It>(needle: &str, haystack: It) -> Option<T> 
    where It: IntoIterator<Item = (&'a str, T)>
{
    fuzzy_match_with_algorithms::<T, algorithms::SorensenDice, algorithms::Levenshtein, It>(needle, haystack)
}

/// Version of [`fuzzy_match`](fuzzy_match::fuzzy_match) which allows overriding the first and second choice algorithms,
/// instead of using Sorensen-Dice and Levenshtein respectively. This consumes the input vector.
/// 
/// # Examples
/// ```rust
/// use fuzzy_match::fuzzy_match_with_algorithms;
/// use fuzzy_match::algorithms::{SorensenDice, Levenshtein};
///
/// let haystack = vec![("rust", 0), ("java", 1), ("lisp", 2)];
/// // Search with Levenshtein first, then Sorensen-Dice.
/// assert_eq!(Some(0), fuzzy_match_with_algorithms::<_, Levenshtein, SorensenDice, _>("bust", haystack));
/// ```
/// 
/// # Panics
/// This function will panic if the haystack is empty (length 0).
pub fn fuzzy_match_with_algorithms<'a, T, FST, SND, It>(
    needle: &str,
    haystack: It) -> Option<T> 
    where FST: algorithms::SimilarityAlgorithm,
          SND: algorithms::SimilarityAlgorithm,
          It: IntoIterator<Item = (&'a str, T)>
{

    let mut iter = haystack.into_iter().peekable();

    if iter.peek().is_none() {
        panic!("No haystack provided!")
    }

    let mut highest_set: Vec<(&str, T)> = Vec::new();
    let mut highest_weight = 0f32;
    let mut first_algo = FST::new();    

    // Try with first-case algorithm.
    for (name, item) in iter {
        let weight = first_algo.get_similarity(needle, name);
        if weight == highest_weight {
            highest_set.push((name, item))
        } else if weight > highest_weight {
            highest_weight = weight;
            highest_set.clear();
            highest_set.push((name, item));
        }
    }

    if highest_set.is_empty() {
        return None;
    } else if highest_set.len() == 1 {
        let (_, item) = highest_set.remove(0);
        return Some(item);
    }

    // Break ties with second-case algorithm
    let mut snd_highest_set: Vec<(&str, T)> = Vec::new();
    let mut snd_highest_weight = 0f32;
    let mut second_algo = SND::new();

    for (name, item) in highest_set.drain(..) {
        let weight = second_algo.get_similarity(needle, name);
        if weight == snd_highest_weight {
            snd_highest_set.push((name, item))
        } else if weight > highest_weight {
            snd_highest_weight = weight;
            snd_highest_set.clear();
            snd_highest_set.push((name, item));
        }
    }

    if snd_highest_set.is_empty() || snd_highest_set.len() > 1 {
        None
    } else {
        let (_, item) = snd_highest_set.remove(0);
        Some(item)
    }
}