1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
use crate::Value;
use std::cmp;

/// Prepares a list of "sounds like" matches (using edit distance) for the string you're trying to find
pub fn did_you_mean(obj_source: &Value, field_tried: String) -> Option<Vec<String>> {
    let possibilities = obj_source.data_descriptors();

    let mut possible_matches: Vec<_> = possibilities
        .into_iter()
        .map(|word| {
            let edit_distance = levenshtein_distance(&word, &field_tried);
            (edit_distance, word)
        })
        .collect();

    if !possible_matches.is_empty() {
        possible_matches.sort();
        let words_matched: Vec<String> = possible_matches.into_iter().map(|m| m.1).collect();
        Some(words_matched)
    } else {
        None
    }
}

/// Borrowed from https://crates.io/crates/natural
fn levenshtein_distance(str1: &str, str2: &str) -> usize {
    let n = str1.len();
    let m = str2.len();

    let mut current: Vec<usize> = (0..n + 1).collect();
    let a_vec: Vec<char> = str1.chars().collect();
    let b_vec: Vec<char> = str2.chars().collect();

    for i in 1..m + 1 {
        let previous = current;
        current = vec![0; n + 1];
        current[0] = i;
        for j in 1..n + 1 {
            let add = previous[j] + 1;
            let delete = current[j - 1] + 1;
            let mut change = previous[j - 1];
            if a_vec[j - 1] != b_vec[i - 1] {
                change += 1
            }
            current[j] = min3(add, delete, change);
        }
    }
    current[n]
}

fn min3<T: Ord>(a: T, b: T, c: T) -> T {
    cmp::min(a, cmp::min(b, c))
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::UntaggedValue;
    use indexmap::indexmap;
    use nu_source::Tag;

    #[test]
    fn did_you_mean_returns_possible_column_matches() {
        let value = UntaggedValue::row(indexmap! {
           "dog".to_string() => UntaggedValue::int(1).into(),
           "cat".to_string() => UntaggedValue::int(1).into(),
           "alt".to_string() => UntaggedValue::int(1).into(),
        });

        let source = Value {
            tag: Tag::unknown(),
            value,
        };

        assert_eq!(
            Some(vec![
                "cat".to_string(),
                "alt".to_string(),
                "dog".to_string()
            ]),
            did_you_mean(&source, "hat".to_string())
        )
    }

    #[test]
    fn did_you_mean_returns_no_matches_when_empty() {
        let empty_source = Value {
            tag: Tag::unknown(),
            value: UntaggedValue::row(indexmap! {}),
        };

        assert_eq!(None, did_you_mean(&empty_source, "hat".to_string()))
    }
}