gecliht 0.2.0 - Docs.rs

//! Algorithms used primarily for matching names based on phonetic similarity.
//!
//! Each phonetic algorithm converts a word into a representation of its sound
//! using letters and/or numbers to represent different sound groups.

/// The earliest soundex-style algorithm. Separates letters into numbered groups,
/// and returns a code consisting of the initial letter followed by 
/// three numbers.
///
/// Only defined for words consisting of letters in English a-z alphabet.
///
/// # Examples
///
/// ```
/// assert_eq!(gecliht::russell_soundex("Gauss"), "G200");
/// assert_eq!(gecliht::russell_soundex("Ghosh"), "G200");
/// ```
///
pub fn russell_soundex (word: &str) -> String {
    
    fn encode(key: &str) -> &str {
        match key {
            "A" | "E" | "H" | "I" | "O" | "U" | "W" | "Y" => "0",
            "B" | "F" | "P" | "V" => "1",
            "C" | "S" | "K" | "G" | "J" | "Q" | "X" | "Z" => "2",
            "D" | "T" => "3",
            "L" => "4",
            "M" | "N" => "5",
            "R" => "6",
            _ => key,
        }
    }

    // 1. convert the word to uppercase, and remove non-letters
    let mut chars : Vec<String> = word.chars()
        .filter(|c| c.is_alphanumeric())
        .map(|c| c.to_uppercase().to_string())
        .collect ();
    if chars.is_empty() { // return empty string if no word to process
        return "".to_string();
    }

    // 2. replace letters, except first, with codes
    for i in 1..chars.len() {
        chars[i] = encode(&chars[i]).to_string ();
    }

    // 3. delete duplicates, leaving just one
    for i in (2..(chars.len())).rev() {
        if chars[i] == chars[i-1] {
            chars.remove(i);
        }
    }
    if encode(&chars[0]) == chars[1] { // first letter is not encoded, so check it separately
        chars.remove(1);
    }

    // 4. omit vowels or similar letters (code "0"), except first letter
    for i in (1..chars.len()).rev() {
        if chars[i] == "0" {
            chars.remove(i);
        }
    }

    // 5. make sure we have at least 4 characters
    while chars.len() < 4 {
        chars.push("0".to_string());
    }
    
    // 6. but only keep 4
    let mut result = "".to_string();
    for i in 0..4 {
        result.push_str(&chars[i]);
    }

    result
}

/// An improved version of the Russell-Soundex algorithm, with a greater 
/// breakdown of the consonants into groups, and allowing a longer 
/// final string.
///
/// Only defined for words consisting of letters in English a-z alphabet.
///
/// # Example
///
/// ```
/// assert_eq!(gecliht::refined_soundex("Lambert"), "L7081096");
/// ```
///
pub fn refined_soundex (word: &str) -> String {
    
    fn encode(key: &str) -> &str {
        match key {
            "A" | "E" | "H" | "I" | "O" | "U" | "W" | "Y" => "0",
            "B" | "P" => "1",
            "F" | "V" => "2",
            "C" | "S" | "K" => "3",
            "G" | "J" => "4",
            "Q" | "X" | "Z" => "5",
            "D" | "T" => "6",
            "L" => "7",
            "M" | "N" => "8",
            "R" => "9",
            _ => key,
        }
    }

    // 1. convert the word to uppercase, and remove non-letters
    let mut chars : Vec<String> = word.chars()
        .filter(|c| c.is_alphabetic())
        .map(|c| c.to_uppercase().to_string())
        .collect ();
    if chars.is_empty() { // return empty string if no word to process
        return "".to_string();
    }

    // 2. replace letters, except first, with codes
    // keep first letter
    chars.insert(0, chars[0].clone());
    for i in 1..chars.len() {
        chars[i] = encode(&chars[i]).to_string ();
    }

    // 3. delete duplicates, leaving just one
    for i in (2..(chars.len())).rev() {
        if chars[i] == chars[i-1] {
            chars.remove(i);
        }
    }

    // no size limit in refined soundex
    let mut result = "".to_string();
    for c in chars.iter () {
        result.push_str(&c);
    }

    result
}

/// Created to improve soundex algorithm for Yiddish and Slavic names.
/// Output is a six-digit string of numbers.
///
/// This implementation follows the definition at:
/// <https://www.jewishgen.org/InfoFiles/soundex.html>
///
/// # Example
///
/// ```
/// assert_eq!(gecliht::daitch_mokotoff("Lewinsky"), "876450");
/// ```
///
/// (Note that it is possible for a name to have more than one encoding. 
/// This function computes multiple encodings, but only returns one.)
///
pub fn daitch_mokotoff (word: &str) -> String {
    const CODES: &[(&str, &[&str], &[&str], &[&str])] = &[ 
        // (letter(s), at-start, before-vowel, elsewhere)
        // Note: hand-sorted in descending order of length of first string
        ("SCHTSCH", &["2"], &["4"], &["4"]),
        ("SCHTSH", &["2"], &["4"], &["4"]),
        ("SCHTCH", &["2"], &["4"], &["4"]),
        ("SHTCH", &["2"], &["4"], &["4"]),
        ("SHTSH", &["2"], &["4"], &["4"]),
        ("STSCH", &["2"], &["4"], &["4"]),
        ("TTSCH", &["4"], &["4"], &["4"]),
        ("ZHDZH", &["2"], &["4"], &["4"]),
        ("SHCH", &["2"], &["4"], &["4"]),
        ("STCH", &["2"], &["4"], &["4"]),
        ("STRZ", &["2"], &["4"], &["4"]),
        ("STRS", &["2"], &["4"], &["4"]),
        ("STSH", &["2"], &["4"], &["4"]),
        ("SZCZ", &["2"], &["4"], &["4"]),
        ("SZCS", &["2"], &["4"], &["4"]),
        ("TTCH", &["4"], &["4"], &["4"]),
        ("TTSZ", &["4"], &["4"], &["4"]),
        ("ZDZH", &["2"], &["4"], &["4"]),
        ("ZSCH", &["4"], &["4"], &["4"]),
        ("CHS", &["5"], &["54"], &["54"]),
        ("CSZ", &["4"], &["4"], &["4"]),
        ("CZS", &["4"], &["4"], &["4"]),
        ("DRZ", &["4"], &["4"], &["4"]),
        ("DRS", &["4"], &["4"], &["4"]),
        ("DSH", &["4"], &["4"], &["4"]),
        ("DSZ", &["4"], &["4"], &["4"]),
        ("DZH", &["4"], &["4"], &["4"]),
        ("DZS", &["4"], &["4"], &["4"]),
        ("SCH", &["4"], &["4"], &["4"]),
        ("SHT", &["2"], &["43"], &["43"]),
        ("SZT", &["2"], &["43"], &["43"]),
        ("SHD", &["2"], &["43"], &["43"]),
        ("SZD", &["2"], &["43"], &["43"]),
        ("TCH", &["4"], &["4"], &["4"]),
        ("TRZ", &["4"], &["4"], &["4"]),
        ("TRS", &["4"], &["4"], &["4"]),
        ("TSCH", &["4"], &["4"], &["4"]),
        ("TSH", &["4"], &["4"], &["4"]),
        ("TTS", &["4"], &["4"], &["4"]),
        ("TTZ", &["4"], &["4"], &["4"]),
        ("TZS", &["4"], &["4"], &["4"]),
        ("TSZ", &["4"], &["4"], &["4"]),
        ("ZDZ", &["2"], &["4"], &["4"]),
        ("ZHD", &["2"], &["43"], &["43"]),
        ("ZSH", &["4"], &["4"], &["4"]),
        ("AI", &["0"], &["1"], &["-"]), 
        ("AJ", &["0"], &["1"], &["-"]),
        ("AY", &["0"], &["1"], &["-"]),
        ("AU", &["0"], &["7"], &["-"]),
        ("CH", &["5", "4"], &["5", "4"], &["5", "4"]), 
        ("CK", &["5", "45"], &["5", "45"], &["5", "45"]),
        ("CK", &["4"], &["4"], &["4"]),
        ("CS", &["4"], &["4"], &["4"]),
        ("CZ", &["4"], &["4"], &["4"]),
        ("DS", &["4"], &["4"], &["4"]),
        ("DT", &["3"], &["3"], &["3"]),
        ("DZ", &["4"], &["4"], &["4"]),
        ("EI", &["0"], &["1"], &["-"]), 
        ("EJ", &["0"], &["1"], &["-"]),
        ("EY", &["0"], &["1"], &["-"]),
        ("EU", &["1"], &["1"], &["-"]),
        ("FB", &["7"], &["7"], &["7"]),
        ("IA", &["1"], &["-"], &["-"]),
        ("IE", &["1"], &["-"], &["-"]),
        ("IO", &["1"], &["-"], &["-"]),
        ("IU", &["1"], &["-"], &["-"]),
        ("KS", &["5"], &["54"], &["54"]),
        ("KH", &["5"], &["5"], &["5"]),
        ("MN", &["-"], &["66"], &["66"]),
        ("NM", &["-"], &["66"], &["66"]),
        ("OI", &["0"], &["1"], &["-"]),
        ("OJ", &["0"], &["1"], &["-"]),
        ("OY", &["0"], &["1"], &["-"]),
        ("PF", &["7"], &["7"], &["7"]),
        ("PH", &["7"], &["7"], &["7"]),
        ("RS", &["4", "94"], &["4", "94"], &["4", "94"]),
        ("RZ", &["4", "94"], &["4", "94"], &["4", "94"]), 
        ("SC", &["2"], &["4"], &["4"]),
        ("SD", &["2"], &["43"], &["43"]),
        ("SH", &["4"], &["4"], &["4"]),
        ("ST", &["2"], &["43"], &["43"]),
        ("SZ", &["4"], &["4"], &["4"]),
        ("TC", &["4"], &["4"], &["4"]),
        ("TH", &["3"], &["3"], &["3"]),
        ("TS", &["4"], &["4"], &["4"]),
        ("TZ", &["4"], &["4"], &["4"]),
        ("UI", &["0"], &["1"], &["-"]),
        ("UJ", &["0"], &["1"], &["-"]),
        ("UY", &["0"], &["1"], &["-"]),
        ("UE", &["0"], &["-"], &["-"]),
        ("ZD", &["2"], &["43"], &["43"]),
        ("ZH", &["4"], &["4"], &["4"]),
        ("ZS", &["4"], &["4"], &["4"]),
        ("A", &["0"], &["-"], &["-"]),
        ("Ą", &["-"], &["-"], &["6", "-"]), // Polish a-ogonek
        ("B", &["7"], &["7"], &["7"]),
        ("C", &["4", "5"], &["4", "5"], &["4", "5"]), 
        ("D", &["3"], &["3"], &["3"]),
        ("Ę", &["-"], &["-"], &["6", "-"]), // Polish e-ogonek
        ("E", &["0"], &["-"], &["-"]),
        ("F", &["7"], &["7"], &["7"]),
        ("G", &["5"], &["5"], &["5"]),
        ("H", &["5"], &["5"], &["-"]),
        ("I", &["0"], &["-"], &["-"]),
        ("J", &["1", "4"], &["-", "4"], &["-", "4"]),
        ("K", &["5"], &["5"], &["5"]),
        ("L", &["8"], &["8"], &["8"]),
        ("M", &["6"], &["6"], &["6"]),
        ("N", &["6"], &["6"], &["6"]),
        ("O", &["0"], &["-"], &["-"]),
        ("P", &["7"], &["7"], &["7"]),
        ("Q", &["5"], &["5"], &["5"]),
        ("R", &["9"], &["9"], &["9"]),
        ("S", &["4"], &["4"], &["4"]),
        ("Ţ", &["3", "4"], &["3", "4"], &["3", "4"]), // Romanian t-cedilla 
        ("T", &["3"], &["3"], &["3"]),
        ("U", &["0"], &["-"], &["-"]),
        ("V", &["7"], &["7"], &["7"]),
        ("W", &["7"], &["7"], &["7"]),
        ("X", &["5"], &["54"], &["54"]),
        ("Y", &["1"], &["-"], &["-"]),
        ("Z", &["4"], &["4"], &["4"]),
        ];

    // Returns the longest match from above list of CODES.
    // Assumes that the list is in sorted order.
    fn longest_match (word: &str) -> &(&str, &[&str], &[&str], &[&str]) {
        for code in CODES.iter() {
            if word.starts_with(code.0) {
                return code;
            }
        }
        &("", &[], &[], &[]) // this should never be reached
    }

    // This function creates the final code. 
    //
    // Although each item in codes is a list, only some of these will have
    // multiple entries.
    // It is possible to expand these to produce multiple codes for a given 
    // word, but here, for simplicity, only one term is taken from each 
    // item in codes, to produce a single code.
    fn generate_result(codes: &Vec<&[&str]>) -> String {
        let mut result = "".to_string();

        for item in codes.iter() {
            if result.len() == 6 { break; }
            if item.len() > 0 {
                result.push_str(item[0]);
            }
        }
        while result.len() < 6 {
            result.push_str("0");
        }

        result
    }

    // determines which part of the code we use: 
    // are we at the start of the word, before a vowel, or elsewhere?
    fn match_result<'a>(word: &'a str, 
                        posn: usize, 
                        next_posn: usize, 
                        code: &(&'a str, &'a[&'a str], &'a[&'a str], &'a[&'a str])
                        ) -> &'a[&'a str] {
        if posn == 0 {
            code.1
        } else {
            match word.get(next_posn..next_posn+1) {
                Some(c) => {
                    if "AEIOU".contains(c) {
                        code.2
                    } else {
                        code.3
                    }
                },
                None => code.3,
            }
        }
    }

    fn is_valid(word: &str, posn: usize, next_posn: usize, code: &(&str, &[&str], &[&str], &[&str])) -> bool {
        let mr = match_result(word, posn, next_posn, code);
        mr.len() > 0 && mr[0] != "-"
    }
    
    let word = word.to_uppercase();
    let mut posn = 0;
    let mut code = longest_match(&word[0..]);
    let mut result: Vec<&[&str]> = vec![];

    while posn < word.len() {
        if code.0 == "" {
            posn += 1;
            code = longest_match(&word[posn..]);
        } else {
            let next_posn = posn + code.0.len();
            if is_valid(&word, posn, next_posn, code) {
                result.push(match_result(&word, posn, next_posn, code));
            }
            posn += code.0.len();
            code = longest_match(&word[posn..]);
        }
    }

    generate_result(&result)
}

/// Transforms words using a set of English pronunciation rules.
///
/// This implementation follows the definition at:
/// http://aspell.net/metaphone/metaphone-kuhn.txt
///
/// # Example
///
/// ```
/// assert_eq!(gecliht::metaphone("catherine"), "K0RN");
/// ```
///
pub fn metaphone (word: &str) -> String {
    const DUPLICATES: &[char] = &['B', 'F', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'R', 'S', 'T', 'W', 'X', 'Y'];
    const VOWELS: &[char] = &['A', 'E', 'I', 'O', 'U'];

    fn last_was(chars: &[char], i: usize, c: char) -> bool {
        i > 0 && chars[i-1] == c
    }

    fn next_is(chars: &[char], i: usize, c: char) -> bool {
        i+1 < chars.len() && chars[i+1] == c
    }

    fn next_is_eiy(chars: &[char], i: usize) -> bool {
        i+1 < chars.len() && ['E', 'I', 'Y'].contains(&chars[i+1])
    }

    fn last_was_vowel(chars: &[char], i: usize) -> bool {
        i > 0 && VOWELS.contains(&chars[i-1])
    }

    fn next_is_vowel(chars: &[char], i: usize) -> bool {
        i+1 < chars.len() && VOWELS.contains(&chars[i+1])
    }

    fn next_two(chars: &[char], i: usize, c: char, d: char) -> bool {
        i+2 < chars.len() && chars[i+1] == c && chars[i+2] == d
    }

    let mut chars: Vec<char> = word // keep only alphabetic chars, in lowercase
        .to_uppercase()
        .chars()
        .filter(|c| c.is_alphabetic())
        .collect();
    // some preprocessing
    // -- remove certain duplicates 
    for i in (1..chars.len()).rev() {
        if chars[i] == chars[i-1] && DUPLICATES.contains(&chars[i]) {
            chars.remove(i);
        }
    }
    // -- adapt inital letters
    // "ae-", "gn", "kn-", "pn-", "wr-" drop first letter
    // "x" -> "s"
    // "wh" -> "w"
    if chars.len() > 0 && chars[0] == 'X' {
        chars[0] = 'S';
    }
    if chars.len() > 1 {
        if chars[0] == 'W' && chars[1] == 'H' {
            chars.remove(1);
        } else if (chars[0] == 'A' && chars[1] == 'E') ||
            (chars[0] == 'G' && chars[1] == 'N') ||
                (chars[0] == 'K' && chars[1] == 'N') ||
                (chars[0] == 'P' && chars[1] == 'N') ||
                (chars[0] == 'W' && chars[1] == 'R') {
                    chars.remove(0);
                }
    }
    // -- remove b in words ending in mb
    if chars[chars.len()-2] == 'M' && chars[chars.len()-1] == 'B' {
        chars.pop();
    }

    let mut result = String::from("");

    let mut i = 0;
    while i < chars.len() {
        let c = &chars[i];
        match c {
            'A' | 'E' | 'I' | 'O' | 'U' => {
                if i == 0 { // only keep initial vowel
                    result.push(*c);
                }
            },
            'B' | 'F' | 'J' | 'L' | 'M' | 'N' | 'R' => {
                result.push(*c);
            },
            'C' => {
                if next_is(&chars, i, 'H') {
                    if last_was(&chars, i, 'S') {
                        result.push('K');
                    } else {
                        result.push('X');
                    }
                } else if next_two(&chars, i, 'I', 'A') {
                    result.push('X');
                } else if next_is_eiy(&chars, i) {
                    if last_was(&chars, i, 'S') {
                        // silent if -sci- -sce- -scy-
                    } else {
                        result.push('S');
                    }
                } else {
                    result.push('K');
                }
            },
            'D' => {
                if next_is(&chars, i, 'G') && next_is_eiy(&chars, i+1) {
                    result.push('J');
                } else {
                    result.push('T');
                }
            },
            'G' => {
                if next_is(&chars, i, 'G') {
                    // ignore duplicate 'g'
                } else if next_is(&chars, i, 'H') && 
                    i < chars.len()-1 && // not at end
                        !next_is_vowel(&chars, i) {
                            // ignore gh + consonant
                        } else if next_is(&chars, i, 'N') {
                            // ignore gn
                        } else if last_was(&chars, i, 'D') && next_is_eiy(&chars, i) {
                            // ignore DG[EIY]
                        } else if !last_was(&chars, i, 'G') && next_is_eiy(&chars, i) {
                            result.push('J');
                        } else {
                            result.push('K');
                        }
            },
            'H' => {
                if last_was_vowel(&chars, i) && !next_is_vowel(&chars, i) {
                    // ignore h if eh[consonant]
                } else if i > 0 && ['C', 'S', 'P', 'T', 'G'].contains(&chars[i-1]) {
                    // ignore h if ch, sh etc
                } else {
                    result.push('H');
                }
            },
            'K' => {
                if !last_was(&chars, i, 'C') {
                    // silent after c
                    result.push('K');
                }
            },
            'P' => {
                if next_is(&chars, i, 'H') {
                    result.push('F');
                } else {
                    result.push('P');
                }
            },
            'Q' => {
                result.push('K');
            },
            'S' => {
                // looks for h, io or ia following this letter
                if next_is(&chars, i, 'H') || next_two(&chars, i, 'I', 'O') || next_two(&chars, i, 'I', 'A') {
                    result.push('X');
                } else {
                    result.push('S');
                }
            },
            'T' => {
                if next_two(&chars, i, 'C', 'H') {
                    // drop t if followed by ch
                } else if next_is(&chars, i, 'H') {
                    result.push('0');
                    i += 1; 
                } else if next_two(&chars, i, 'I', 'A') || next_two(&chars, i, 'I', 'O') {
                    result.push ('X'); // replace tia/tio with x
                } else {
                    result.push('T');
                }
            },
            'V' => {
                result.push('F');
            },
            'W' | 'Y' => {
                if next_is_vowel(&chars, i) {
                    result.push(*c);
                }
            },
            'X' => {
                result.push('S');
                result.push('K');
            },
            'Z' => {
                result.push('S');
            },
            _ => { // ignore anything else
            },
        }
        i += 1;
    }

    result
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_russell_soundex () {
        let tests = [
            ("Ackermann", "A265"),
            ("Azuron", "A265"),
            ("Euler", "E460"),
            ("Ellery", "E460"),
            ("Gauss", "G200"), 
            ("Ghosh", "G200"),
            ("Hilbert", "H416"),
            ("Heilbronn", "H416"),
            ("Knuth", "K530"),
            ("Kant", "K530"),
            ("Lloyd", "L300"),
            ("Ladd", "L300"),
            ("Lukasiewicz", "L222"),
            ("Lissajous", "L222"),
            ("SanFrancisco", "S516"),
            ("\"SanFrancisco\"", "S516"),
            ("", "") 
        ];
        for (wrd, res) in tests.iter() {
            assert_eq!(res.to_string(), russell_soundex(wrd));
        }
    }

    // Table and test cases from: http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html
    #[test]
    fn test_refined_soundex () {
        let tests = [
            ("Braz", "B1905"),
            ("Broz", "B1905"),
            ("Caren", "C30908"),
            ("Caron", "C30908"),
            ("Carren", "C30908"),
            ("Charon", "C30908"),
            ("Corain", "C30908"),
            ("Coram", "C30908"),
            ("Corran", "C30908"),
            ("Corrin", "C30908"),
            ("Corwin", "C30908"),
            ("Curran", "C30908"),
            ("Curreen", "C30908"),
            ("Currin", "C30908"),
            ("Currom", "C30908"),
            ("Currum", "C30908"),
            ("Curwen", "C30908"),
            ("Hairs", "H093"), 
            ("Hark", "H093"), 
            ("Hars", "H093"), 
            ("Hayers", "H093"), 
            ("Heers", "H093"), 
            ("Hiers", "H093"), 
            ("Lambard", "L7081096"),
            ("Lambart", "L7081096"),
            ("Lambert", "L7081096"),
            ("Lambird", "L7081096"),
            ("Lampaert", "L7081096"),
            ("Lampard", "L7081096"),
            ("Lampart", "L7081096"),
            ("Lamperd", "L7081096"),
            ("Lampert", "L7081096"),
            ("Lamport", "L7081096"),
            ("Limbert", "L7081096"),
            ("Lombard", "L7081096"),
            ("Nolton", "N807608"),
            ("Noulton", "N807608"),
            ("", "") 
                ];
        for (wrd, res) in tests.iter() {
            assert_eq!(res.to_string(), refined_soundex(wrd));
        }
    }

    #[test]
    fn test_daitch_mokotoff () {
        let tests = [
            ("MANHEIM", "665600"),
            ("MINTZ", "664000"),
            ("TOPF", "370000"),
            ("AUERBACH", "097500"),
            ("OHRBACH", "097500"),
            ("LIPSHITZ", "874400"),
            ("LIPPSZYC", "877440"),
            ("LEWINSKY", "876450"),
            ("lewinsky", "876450"),
            ("LEVINSKI", "876450"),
            ("SZLAMAWICZ", "486740"),
            ("SHLAMOVITZ", "486740")
        ];
        for (wrd, res) in tests.iter() {
            assert_eq!(res.to_string(), daitch_mokotoff(wrd));
        }
    }

    // Tests for Metaphone
    // -- using list from https://github.com/threedaymonk/text/blob/master/test/data/metaphone.yml
    // but modifying result as implementation here a little different
    #[test]
    fn test_metaphone () {
        let tests = [ 
            ("ANASTHA", "ANS0"),
            ("DAVIS-CARTER", "TFSKRTR"),
            ("ESCARMANT", "ESKRMNT"),
            ("MCCALL", "MKKL"),     // double c not removed
            ("MCCROREY", "MKKRR"),
            ("MERSEAL", "MRSL"),
            ("PIEURISSAINT", "PRSNT"),
            ("ROTMAN", "RTMN"),
            ("SCHEVEL", "SKFL"),    // c in sch transformed to k
            ("SCHROM", "SKRM"),
            ("SEAL", "SL"),
            ("SPARR", "SPR"),
            ("STARLEPER", "STRLPR"),
            ("THRASH", "0RX"),
            ("LOGGING", "LKNK"),
            ("LOGIC", "LJK"),
            ("JUDGES", "JJS"),
            ("SHOOS", "XS"),
            ("SHOES", "XS"),
            ("CHUTE", "XT"),
            ("SCHUSS", "SKS"),      // c in sch transformed to k
            ("OTTO", "OT"),
            ("ERIC", "ERK"),
            ("BUCK", "BK"),
            ("COCK", "KK"),
            ("DAVE", "TF"),
            ("CATHERINE", "K0RN"),
            ("KATHERINE", "K0RN"),
            ("AUBREY", "ABR"),
            ("BRYAN", "BRYN"),
            ("BRYCE", "BRS"),
            ("STEVEN", "STFN"),
            ("RICHARD", "RXRT"),
            ("HEIDI", "HT"),
            ("AUTO", "AT"),
            ("MAURICE", "MRS"),
            ("RANDY", "RNT"),
            ("CAMBRILLO", "KMBRL"),
            ("BRIAN", "BRN"),
            ("RAY", "R"),
            ("GEOFF", "JF"),
            ("BOB", "BB"),
            ("AHA", "AH"),
            ("AAH", "A"),
            ("PAUL", "PL"),
            ("BATTLEY", "BTL"),
            ("WROTE" , "RT"),
            ("THIS", "0S")
                ];
        for (wrd, res) in tests.iter() {
            assert_eq!(res.to_string(), metaphone(wrd));
        }
    }

}