use serde::{Deserialize, Serialize};
use crate::helper::is_vowel;
use crate::{Encoder, SoundexUtils};
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash, Serialize, Deserialize)]
pub struct Phonex {
max_code_length: usize,
}
impl Phonex {
pub fn new(max_code_length: usize) -> Self {
Self { max_code_length }
}
fn preprocess(&self, value: &str) -> String {
let mut input = Self::soundex_clean(value);
while input.ends_with('S') {
input.pop();
}
let first_two = input.chars().take(2).collect::<String>();
match first_two.as_str() {
"KN" => input.replace_range(..1, "N"),
"PH" => input.replace_range(..1, "F"),
"WR" => input.replace_range(..1, "R"),
_ => (),
};
let first = input.chars().next();
if first == Some('H') {
input.remove(0);
}
let first = input.chars().next();
match first {
Some('E') | Some('I') | Some('O') | Some('U') | Some('Y') => {
input.replace_range(..1, "A")
}
Some('P') => input.replace_range(..1, "B"),
Some('V') => input.replace_range(..1, "F"),
Some('K') | Some('Q') => input.replace_range(..1, "C"),
Some('J') => input.replace_range(..1, "G"),
Some('Z') => input.replace_range(..1, "S"),
_ => (),
};
input
}
fn is_vowel(c: Option<char>) -> bool {
match c {
Some(c) => is_vowel(Some(c.to_ascii_lowercase()), true),
_ => false,
}
}
fn transcode(
&self,
curr: char,
next: Option<char>,
is_last_char: bool,
) -> (Option<char>, bool) {
let mut skip_next_char = false;
let code: Option<char> = match curr {
'B' | 'P' | 'F' | 'V' => Some('1'),
'C' | 'S' | 'K' | 'G' | 'J' | 'Q' | 'X' | 'Z' => Some('2'),
'D' | 'T' => match next {
Some('C') => None,
_ => Some('3'),
},
'L' => {
if Phonex::is_vowel(next) || is_last_char {
Some('4')
} else {
None
}
}
'M' | 'N' => {
skip_next_char = matches!(next, Some('D') | Some('G'));
Some('5')
}
'R' => {
if Phonex::is_vowel(next) || is_last_char {
Some('6')
} else {
None
}
}
_ => Some('0'),
};
(code, skip_next_char)
}
}
impl Default for Phonex {
fn default() -> Self {
Self { max_code_length: 4 }
}
}
impl SoundexUtils for Phonex {}
impl Encoder for Phonex {
fn encode(&self, value: &str) -> String {
let input = self.preprocess(value);
let mut chars = input.chars().enumerate().peekable();
let mut result = String::with_capacity(self.max_code_length);
let mut code = '0';
let mut last = '0';
let mut last_push = '0';
'char_iter: while let Some((mut i, curr)) = chars.next() {
if result.len() == self.max_code_length {
break 'char_iter;
}
if i == 0 {
result.push(curr);
last_push = curr;
}
let next = chars.peek().map(|(_, ch)| ch).copied();
let (new_code, skip_next_char) = self.transcode(curr, next, next.is_none());
if let Some(c) = new_code {
code = c
}
if skip_next_char {
let _ = chars.next();
i += 1;
}
if last != code && code != '0' && i != 0 {
result.push(code);
last_push = code;
}
last = last_push;
if i == 0 {
last = code;
}
}
while result.len() < self.max_code_length {
result.push('0');
}
result
}
}
#[cfg(test)]
mod tests {
use crate::{Encoder, Phonex};
fn preprocess(values: Vec<(&str, String)>) {
let phonex = Phonex::default();
for (input, expected) in values {
let actual = phonex.preprocess(input);
assert_eq!(
actual, expected,
"expected input {input} to be preprocessed to {expected}, but instead got {actual}"
);
}
}
#[allow(clippy::type_complexity)]
fn transcode(values: Vec<(char, Option<char>, bool, Option<char>, bool)>) {
let phonex = Phonex::default();
for (curr, next, is_last_char, e_code, e_skip_next_char) in values {
let (code, skip_next_char) = phonex.transcode(curr, next, is_last_char);
let expected = match code {
Some(c) => c.to_string(),
_ => String::from("None"),
};
let actual = match e_code {
Some(c) => c.to_string(),
_ => String::from("None"),
};
assert_eq!(
code, e_code,
"expected code to be {expected} but got {actual}"
);
assert_eq!(
skip_next_char, e_skip_next_char,
"expected skip_next_char to be {e_skip_next_char} but got {skip_next_char}"
);
}
}
fn encode(values: Vec<(&str, &str)>) {
let phonex = Phonex::default();
for (value, expected) in values {
assert_eq!(
phonex.encode(value),
expected,
"Encoding {value} should output {expected}"
);
}
}
#[test]
fn test_preprocess() {
preprocess(vec![
("TESTSSS", String::from("TEST")),
("SSS", String::from("")),
("KNUTH", String::from("NNUTH")),
("PHONETIC", String::from("FHONETIC")),
("WRIGHT", String::from("RRIGHT")),
("HARRINGTON", String::from("ARRINGTON")),
("EIGER", String::from("AIGER")),
("PERCIVAL", String::from("BERCIVAL")),
("VERTIGAN", String::from("FERTIGAN")),
("KELVIN", String::from("CELVIN")),
("JONES", String::from("GONE")),
("ZEPHYR", String::from("SEPHYR")),
])
}
#[test]
fn test_transcode() {
transcode(vec![
('B', None, false, Some('1'), false),
('P', None, false, Some('1'), false),
('F', None, false, Some('1'), false),
('V', None, false, Some('1'), false),
('C', None, false, Some('2'), false),
('S', None, false, Some('2'), false),
('K', None, false, Some('2'), false),
('G', None, false, Some('2'), false),
('J', None, false, Some('2'), false),
('Q', None, false, Some('2'), false),
('X', None, false, Some('2'), false),
('Z', None, false, Some('2'), false),
('D', None, false, Some('3'), false),
('T', None, false, Some('3'), false),
('D', Some('C'), false, None, false),
('T', Some('C'), false, None, false),
('L', Some('A'), false, Some('4'), false),
('L', Some('B'), true, Some('4'), false),
('L', Some('B'), false, None, false),
('M', None, false, Some('5'), false),
('N', None, false, Some('5'), false),
('M', Some('D'), false, Some('5'), true),
('M', Some('G'), false, Some('5'), true),
('R', Some('A'), false, Some('6'), false),
('R', None, true, Some('6'), false),
]);
}
#[test]
fn test_encode() {
encode(vec![
("123 testsss", "T230"),
("24/7 test", "T230"),
("A", "A000"),
("Ashcraft", "A261"),
("Lee", "L000"),
("Kuhne", "C500"),
("Meyer-Lansky", "M452"),
("Oepping", "A150"),
("Daley", "D400"),
("Dalitz", "D432"),
("Duhlitz", "D432"),
("Dull", "D400"),
("De Ledes", "D430"),
("Sandemann", "S500"),
("Schmidt", "S530"),
("Sinatra", "S536"),
("Heinrich", "A562"),
("Hammerschlag", "A524"),
("Williams", "W450"),
("Wilms", "W500"),
("Wilson", "W250"),
("Worms", "W500"),
("Zedlitz", "S343"),
("Zotteldecke", "S320"),
("ZYX test", "S232"),
("Scherman", "S500"),
("Schurman", "S500"),
("Sherman", "S500"),
("Shermansss", "S500"),
("Shireman", "S650"),
("Shurman", "S500"),
("Euler", "A460"),
("Ellery", "A460"),
("Hilbert", "A130"),
("Heilbronn", "A165"),
("Gauss", "G000"),
("Ghosh", "G200"),
("Knuth", "N300"),
("Kant", "C530"),
("Lloyd", "L430"),
("Ladd", "L300"),
("Lukasiewicz", "L200"),
("Lissajous", "L200"),
("Philip", "F410"),
("Fripp", "F610"),
("Czarkowska", "C200"),
("Hornblower", "A514"),
("Looser", "L260"),
("Wright", "R623"),
("Phonic", "F520"),
("Quickening", "C250"),
("Kuickening", "C250"),
("Joben", "G150"),
("Zelda", "S300"),
]);
}
#[test]
fn test_encode_number() {
let encoder = Phonex::default();
assert_eq!(encoder.encode("123456789"), "0000");
}
#[test]
fn test_encode_empty_string() {
let encoder = Phonex::default();
assert_eq!(encoder.encode(""), "0000");
}
}