cofe 0.1.1

tiny string similarity crate
Documentation
// cofe, a tiny string similarity crate
// Copyright (c) 2023 fawn
//
// SPDX-License-Identifier: Apache-2.0

use std::cmp::Ordering;

pub mod algo;

pub fn suggest(
    input: impl AsRef<str>,
    possible_values: impl IntoIterator<Item = impl AsRef<str>>,
) -> Option<Vec<String>> {
    suggest_with_case(input, possible_values, true)
}

pub fn suggest_sensitive(
    input: impl AsRef<str>,
    possible_values: impl IntoIterator<Item = impl AsRef<str>>,
) -> Option<Vec<String>> {
    suggest_with_case(input, possible_values, false)
}

fn suggest_with_case(
    input: impl AsRef<str>,
    possible_values: impl IntoIterator<Item = impl AsRef<str>>,
    case_insensitive: bool,
) -> Option<Vec<String>> {
    const CONFIDENCE_THRESHOLD: f64 = 0.8;

    let mut possible_values: Vec<(f64, String)> = possible_values
        .into_iter()
        .map(move |value| {
            (
                calculate_with_case(input.as_ref(), value.as_ref(), case_insensitive),
                value.as_ref().to_owned(),
            )
        })
        .filter(|&(c, _)| c > CONFIDENCE_THRESHOLD)
        .collect();

    possible_values.sort_by(|a, b| b.partial_cmp(a).unwrap_or(Ordering::Equal));
    let result = possible_values
        .into_iter()
        .map(|(_, s)| s)
        .collect::<Vec<_>>();

    if result.is_empty() {
        None
    } else {
        Some(result)
    }
}

pub fn calculate(a: &str, b: &str) -> f64 {
    calculate_with_case(a, b, true)
}

pub fn calculate_sensitive(a: &str, b: &str) -> f64 {
    calculate_with_case(a, b, false)
}

fn calculate_with_case(a: &str, b: &str, case_insensitive: bool) -> f64 {
    if case_insensitive {
        algo::jaro_winkler(&a.to_ascii_lowercase(), &b.to_ascii_lowercase())
    } else {
        algo::jaro_winkler(a, b)
    }
}