use alloc::vec::Vec;
use svara::phoneme::Phoneme;
use tracing::trace;
#[must_use]
pub fn english_rules(word: &str) -> Vec<Phoneme> {
let chars: Vec<char> = word.to_lowercase().chars().collect();
if chars.is_empty() {
return Vec::new();
}
let chars = preprocess_silent(&chars);
trace!(word, "silent letter preprocessing complete");
let (prefix_phonemes, stem) = strip_prefix(&chars);
if prefix_phonemes.is_some() {
trace!(word, "prefix detected and stripped");
}
let (stem, suffix) = detect_suffix(stem);
let mut phonemes = Vec::new();
if let Some(pp) = prefix_phonemes {
phonemes.extend_from_slice(&pp);
}
let mut i = 0;
while i < stem.len() {
let remaining = &stem[i..];
let result = match_pattern(remaining);
i += result.consumed();
phonemes.extend(result);
}
apply_suffix(&mut phonemes, suffix);
phonemes
}
fn preprocess_silent(chars: &[char]) -> Vec<char> {
let mut result = chars.to_vec();
if result.len() >= 2 {
let skip_first = matches!(
(result[0], result[1]),
('k', 'n') | ('g', 'n') | ('w', 'r') | ('p', 's') | ('p', 'n') | ('m', 'n')
);
if skip_first {
result.remove(0);
}
}
if result.len() >= 2 {
let len = result.len();
let strip_last = matches!(
(result[len - 2], result[len - 1]),
('m', 'b') | ('b', 't') | ('m', 'n')
);
if strip_last {
result.pop();
}
let len = result.len();
if len >= 2 && result[len - 2] == 'g' && result[len - 1] == 'n' {
result.remove(len - 2);
}
}
result
}
fn strip_prefix(chars: &[char]) -> (Option<Vec<Phoneme>>, &[char]) {
let word: alloc::string::String = chars.iter().collect();
let prefixes: &[(&str, &[Phoneme])] = &[
("un", &[Phoneme::VowelCupV, Phoneme::NasalN]),
(
"dis",
&[Phoneme::PlosiveD, Phoneme::VowelNearI, Phoneme::FricativeS],
),
(
"mis",
&[Phoneme::NasalM, Phoneme::VowelNearI, Phoneme::FricativeS],
),
(
"pre",
&[Phoneme::PlosiveP, Phoneme::ApproximantR, Phoneme::VowelE],
),
("re", &[Phoneme::ApproximantR, Phoneme::VowelE]),
];
for (prefix, phonemes) in prefixes {
if word.starts_with(prefix) {
let remainder = &chars[prefix.len()..];
if remainder.len() >= 4 && !is_vowel_char(remainder[0]) {
return (Some(phonemes.to_vec()), remainder);
}
}
}
(None, chars)
}
#[derive(Debug, Clone, Copy)]
enum Suffix {
None,
Ed,
}
fn detect_suffix(chars: &[char]) -> (&[char], Suffix) {
if chars.len() >= 3 && chars[chars.len() - 2] == 'e' && chars[chars.len() - 1] == 'd' {
if chars.len() >= 4 {
return (&chars[..chars.len() - 2], Suffix::Ed);
}
}
(chars, Suffix::None)
}
fn apply_suffix(phonemes: &mut Vec<Phoneme>, suffix: Suffix) {
match suffix {
Suffix::None => {}
Suffix::Ed => {
let last = phonemes.last().copied();
match last {
Some(Phoneme::PlosiveT | Phoneme::PlosiveD) => {
phonemes.push(Phoneme::VowelNearI);
phonemes.push(Phoneme::PlosiveD);
}
Some(
Phoneme::PlosiveP
| Phoneme::PlosiveK
| Phoneme::FricativeF
| Phoneme::FricativeS
| Phoneme::FricativeSh
| Phoneme::AffricateCh
| Phoneme::FricativeTh,
) => {
phonemes.push(Phoneme::PlosiveT);
}
_ => {
phonemes.push(Phoneme::PlosiveD);
}
}
}
}
}
static P_EIGH: &[Phoneme] = &[Phoneme::DiphthongEI];
static P_AUGH: &[Phoneme] = &[Phoneme::VowelOpenO];
static P_OUGH: &[Phoneme] = &[Phoneme::DiphthongOU];
static P_TION: &[Phoneme] = &[Phoneme::FricativeSh, Phoneme::VowelSchwa, Phoneme::NasalN];
static P_SION: &[Phoneme] = &[Phoneme::FricativeZh, Phoneme::VowelSchwa, Phoneme::NasalN];
static P_AIR: &[Phoneme] = &[Phoneme::VowelOpenE, Phoneme::ApproximantR];
static P_EAR: &[Phoneme] = &[Phoneme::VowelNearI, Phoneme::ApproximantR];
static P_OUR: &[Phoneme] = &[Phoneme::DiphthongAU, Phoneme::ApproximantR];
static P_IGH: &[Phoneme] = &[Phoneme::DiphthongAI];
static P_THR: &[Phoneme] = &[Phoneme::FricativeTh, Phoneme::ApproximantR];
static P_THE: &[Phoneme] = &[Phoneme::FricativeDh, Phoneme::VowelSchwa];
static P_ING: &[Phoneme] = &[Phoneme::VowelNearI, Phoneme::NasalNg];
static P_SH: &[Phoneme] = &[Phoneme::FricativeSh];
static P_CH: &[Phoneme] = &[Phoneme::AffricateCh];
static P_TH: &[Phoneme] = &[Phoneme::FricativeTh];
static P_WH: &[Phoneme] = &[Phoneme::ApproximantW];
static P_PH: &[Phoneme] = &[Phoneme::FricativeF];
static P_CK: &[Phoneme] = &[Phoneme::PlosiveK];
static P_QU: &[Phoneme] = &[Phoneme::PlosiveK, Phoneme::ApproximantW];
static P_AR: &[Phoneme] = &[Phoneme::VowelOpenA, Phoneme::ApproximantR];
static P_ER: &[Phoneme] = &[Phoneme::VowelBird, Phoneme::ApproximantR];
static P_OR: &[Phoneme] = &[Phoneme::VowelOpenO, Phoneme::ApproximantR];
static P_NG: &[Phoneme] = &[Phoneme::NasalNg];
static P_OU: &[Phoneme] = &[Phoneme::DiphthongAU];
static P_OW: &[Phoneme] = &[Phoneme::DiphthongOU];
static P_AI: &[Phoneme] = &[Phoneme::DiphthongAI];
static P_EE: &[Phoneme] = &[Phoneme::VowelE];
static P_OO: &[Phoneme] = &[Phoneme::VowelU];
static P_EA: &[Phoneme] = &[Phoneme::VowelE];
static P_XS: &[Phoneme] = &[Phoneme::PlosiveK, Phoneme::FricativeS];
enum PatternResult {
Static(usize, &'static [Phoneme]),
Dynamic(usize, Vec<Phoneme>),
}
impl PatternResult {
fn consumed(&self) -> usize {
match self {
Self::Static(n, _) | Self::Dynamic(n, _) => *n,
}
}
}
impl IntoIterator for PatternResult {
type Item = Phoneme;
type IntoIter = PatternIter;
fn into_iter(self) -> Self::IntoIter {
match self {
Self::Static(_, slice) => PatternIter::Static(slice.iter()),
Self::Dynamic(_, vec) => PatternIter::Dynamic(vec.into_iter()),
}
}
}
enum PatternIter {
Static(core::slice::Iter<'static, Phoneme>),
Dynamic(alloc::vec::IntoIter<Phoneme>),
}
impl Iterator for PatternIter {
type Item = Phoneme;
fn next(&mut self) -> Option<Phoneme> {
match self {
Self::Static(it) => it.next().copied(),
Self::Dynamic(it) => it.next(),
}
}
}
fn match_pattern(chars: &[char]) -> PatternResult {
if chars.is_empty() {
return PatternResult::Static(1, &[]);
}
if chars.len() >= 4 {
match (chars[0], chars[1], chars[2], chars[3]) {
('e', 'i', 'g', 'h') => return PatternResult::Static(4, P_EIGH),
('a', 'u', 'g', 'h') => return PatternResult::Static(4, P_AUGH),
('o', 'u', 'g', 'h') => return PatternResult::Static(4, P_OUGH),
('t', 'i', 'o', 'n') => return PatternResult::Static(4, P_TION),
('s', 'i', 'o', 'n') => return PatternResult::Static(4, P_SION),
('c', 'i', 'a', 'n') => return PatternResult::Static(4, P_TION),
_ => {}
}
}
if chars.len() >= 3 {
match (chars[0], chars[1], chars[2]) {
(v, c, 'e') if chars.len() == 3 && is_vowel_char(v) && !is_vowel_char(c) => {
let mut ph = magic_e_vowel(v);
ph.extend_from_slice(&single_consonant(c));
return PatternResult::Dynamic(3, ph);
}
('a', 'i', 'r') => return PatternResult::Static(3, P_AIR),
('e', 'a', 'r') => return PatternResult::Static(3, P_EAR),
('o', 'u', 'r') => return PatternResult::Static(3, P_OUR),
('i', 'g', 'h') => return PatternResult::Static(3, P_IGH),
('t', 'h', 'r') => return PatternResult::Static(3, P_THR),
('t', 'h', 'e') => return PatternResult::Static(3, P_THE),
('s', 'h', _) => return PatternResult::Static(2, P_SH),
('c', 'h', _) => return PatternResult::Static(2, P_CH),
('t', 'h', _) => return PatternResult::Static(2, P_TH),
('n', 'g', _) if !chars[2].is_alphabetic() || chars.len() == 3 => {
return PatternResult::Static(2, P_NG);
}
('i', 'n', 'g') if chars.len() == 3 => return PatternResult::Static(3, P_ING),
('o', 'u', _) => return PatternResult::Static(2, P_OU),
('o', 'w', _) => return PatternResult::Static(2, P_OW),
('a', 'i', _) => return PatternResult::Static(2, P_AI),
('e', 'e', _) => return PatternResult::Static(2, P_EE),
('o', 'o', _) => return PatternResult::Static(2, P_OO),
('e', 'a', _) => return PatternResult::Static(2, P_EA),
_ => {}
}
}
if chars.len() >= 2 {
match (chars[0], chars[1]) {
('s', 'h') => return PatternResult::Static(2, P_SH),
('c', 'h') => return PatternResult::Static(2, P_CH),
('t', 'h') => return PatternResult::Static(2, P_TH),
('w', 'h') => return PatternResult::Static(2, P_WH),
('p', 'h') => return PatternResult::Static(2, P_PH),
('c', 'k') => return PatternResult::Static(2, P_CK),
('q', 'u') => return PatternResult::Static(2, P_QU),
('a', 'r') => return PatternResult::Static(2, P_AR),
('e', 'r') | ('i', 'r') | ('u', 'r') => return PatternResult::Static(2, P_ER),
('o', 'r') => return PatternResult::Static(2, P_OR),
_ => {}
}
}
match chars[0] {
'a' => PatternResult::Static(1, &[Phoneme::VowelAsh]),
'b' => PatternResult::Static(1, &[Phoneme::PlosiveB]),
'c' => {
if chars.len() > 1 && matches!(chars[1], 'e' | 'i' | 'y') {
PatternResult::Static(1, &[Phoneme::FricativeS])
} else {
PatternResult::Static(1, &[Phoneme::PlosiveK])
}
}
'd' => PatternResult::Static(1, &[Phoneme::PlosiveD]),
'e' => {
if chars.len() == 1 {
PatternResult::Static(1, &[])
} else {
PatternResult::Static(1, &[Phoneme::VowelOpenE])
}
}
'f' => PatternResult::Static(1, &[Phoneme::FricativeF]),
'g' => {
if chars.len() > 1 && matches!(chars[1], 'e' | 'i' | 'y') {
PatternResult::Static(1, &[Phoneme::AffricateJ])
} else {
PatternResult::Static(1, &[Phoneme::PlosiveG])
}
}
'h' => PatternResult::Static(1, &[Phoneme::FricativeH]),
'i' => PatternResult::Static(1, &[Phoneme::VowelNearI]),
'j' => PatternResult::Static(1, &[Phoneme::AffricateJ]),
'k' => PatternResult::Static(1, &[Phoneme::PlosiveK]),
'l' => PatternResult::Static(1, &[Phoneme::LateralL]),
'm' => PatternResult::Static(1, &[Phoneme::NasalM]),
'n' => PatternResult::Static(1, &[Phoneme::NasalN]),
'o' => PatternResult::Static(1, &[Phoneme::VowelO]),
'p' => PatternResult::Static(1, &[Phoneme::PlosiveP]),
'r' => PatternResult::Static(1, &[Phoneme::ApproximantR]),
's' => PatternResult::Static(1, &[Phoneme::FricativeS]),
't' => PatternResult::Static(1, &[Phoneme::PlosiveT]),
'u' => PatternResult::Static(1, &[Phoneme::VowelCupV]),
'v' => PatternResult::Static(1, &[Phoneme::FricativeV]),
'w' => PatternResult::Static(1, &[Phoneme::ApproximantW]),
'x' => PatternResult::Static(1, P_XS),
'y' => {
if chars.len() > 1 {
PatternResult::Static(1, &[Phoneme::ApproximantJ])
} else {
PatternResult::Static(1, &[Phoneme::VowelE])
}
}
'z' => PatternResult::Static(1, &[Phoneme::FricativeZ]),
'\'' => PatternResult::Static(1, &[]),
'-' => PatternResult::Static(1, &[Phoneme::Silence]),
_ => PatternResult::Static(1, &[]),
}
}
fn magic_e_vowel(v: char) -> Vec<Phoneme> {
match v {
'a' => alloc::vec![Phoneme::DiphthongEI],
'i' => alloc::vec![Phoneme::DiphthongAI],
'o' => alloc::vec![Phoneme::DiphthongOU],
'u' => alloc::vec![Phoneme::VowelU],
'e' => alloc::vec![Phoneme::VowelE],
_ => alloc::vec![Phoneme::VowelSchwa],
}
}
fn single_consonant(c: char) -> Vec<Phoneme> {
match c {
'b' => alloc::vec![Phoneme::PlosiveB],
'c' => alloc::vec![Phoneme::PlosiveK],
'd' => alloc::vec![Phoneme::PlosiveD],
'f' => alloc::vec![Phoneme::FricativeF],
'g' => alloc::vec![Phoneme::PlosiveG],
'k' => alloc::vec![Phoneme::PlosiveK],
'l' => alloc::vec![Phoneme::LateralL],
'm' => alloc::vec![Phoneme::NasalM],
'n' => alloc::vec![Phoneme::NasalN],
'p' => alloc::vec![Phoneme::PlosiveP],
'r' => alloc::vec![Phoneme::ApproximantR],
's' => alloc::vec![Phoneme::FricativeS],
't' => alloc::vec![Phoneme::PlosiveT],
'v' => alloc::vec![Phoneme::FricativeV],
'z' => alloc::vec![Phoneme::FricativeZ],
_ => alloc::vec![],
}
}
fn is_vowel_char(c: char) -> bool {
matches!(c, 'a' | 'e' | 'i' | 'o' | 'u')
}
#[must_use]
pub fn spanish_rules(word: &str) -> Vec<Phoneme> {
let chars: Vec<char> = word.to_lowercase().chars().collect();
if chars.is_empty() {
return Vec::new();
}
let mut phonemes = Vec::new();
let mut i = 0;
while i < chars.len() {
let remaining = &chars[i..];
let result = spanish_match(remaining);
i += result.consumed();
phonemes.extend(result);
}
phonemes
}
static SP_CH: &[Phoneme] = &[Phoneme::AffricateCh];
static SP_LL: &[Phoneme] = &[Phoneme::ApproximantJ];
static SP_RR: &[Phoneme] = &[Phoneme::ApproximantR];
static SP_XS_ES: &[Phoneme] = &[Phoneme::PlosiveK, Phoneme::FricativeS];
fn spanish_match(chars: &[char]) -> PatternResult {
if chars.is_empty() {
return PatternResult::Static(1, &[]);
}
if chars.len() >= 2 {
match (chars[0], chars[1]) {
('c', 'h') => return PatternResult::Static(2, SP_CH),
('l', 'l') => return PatternResult::Static(2, SP_LL),
('r', 'r') => return PatternResult::Static(2, SP_RR),
('q', 'u') => return PatternResult::Static(2, &[Phoneme::PlosiveK]),
('g', 'u') if chars.len() >= 3 && matches!(chars[2], 'e' | 'i') => {
return PatternResult::Static(2, &[Phoneme::PlosiveG]);
}
_ => {}
}
}
match chars[0] {
'a' | 'á' => PatternResult::Static(1, &[Phoneme::VowelOpenA]),
'e' | 'é' => PatternResult::Static(1, &[Phoneme::VowelOpenE]),
'i' | 'í' => PatternResult::Static(1, &[Phoneme::VowelNearI]),
'o' | 'ó' => PatternResult::Static(1, &[Phoneme::VowelO]),
'u' | 'ú' | 'ü' => PatternResult::Static(1, &[Phoneme::VowelCupV]),
'b' | 'v' => PatternResult::Static(1, &[Phoneme::PlosiveB]),
'c' => {
if chars.len() > 1 && matches!(chars[1], 'e' | 'i') {
PatternResult::Static(1, &[Phoneme::FricativeTh])
} else {
PatternResult::Static(1, &[Phoneme::PlosiveK])
}
}
'd' => PatternResult::Static(1, &[Phoneme::PlosiveD]),
'f' => PatternResult::Static(1, &[Phoneme::FricativeF]),
'g' => {
if chars.len() > 1 && matches!(chars[1], 'e' | 'i') {
PatternResult::Static(1, &[Phoneme::FricativeH])
} else {
PatternResult::Static(1, &[Phoneme::PlosiveG])
}
}
'h' => PatternResult::Static(1, &[]), 'j' => PatternResult::Static(1, &[Phoneme::FricativeH]), 'k' => PatternResult::Static(1, &[Phoneme::PlosiveK]),
'l' => PatternResult::Static(1, &[Phoneme::LateralL]),
'm' => PatternResult::Static(1, &[Phoneme::NasalM]),
'n' => PatternResult::Static(1, &[Phoneme::NasalN]),
'ñ' => PatternResult::Static(1, &[Phoneme::NasalNg]), 'p' => PatternResult::Static(1, &[Phoneme::PlosiveP]),
'r' => PatternResult::Static(1, &[Phoneme::TapFlap]),
's' => PatternResult::Static(1, &[Phoneme::FricativeS]),
't' => PatternResult::Static(1, &[Phoneme::PlosiveT]),
'w' => PatternResult::Static(1, &[Phoneme::ApproximantW]),
'x' => PatternResult::Static(1, SP_XS_ES),
'y' => {
if chars.len() == 1 {
PatternResult::Static(1, &[Phoneme::VowelNearI])
} else {
PatternResult::Static(1, &[Phoneme::ApproximantJ])
}
}
'z' => PatternResult::Static(1, &[Phoneme::FricativeTh]), '\'' | '-' => PatternResult::Static(1, &[]),
_ => PatternResult::Static(1, &[]),
}
}
#[must_use]
pub fn german_rules(word: &str) -> Vec<Phoneme> {
let chars: Vec<char> = word.to_lowercase().chars().collect();
if chars.is_empty() {
return Vec::new();
}
let mut phonemes = Vec::new();
let mut i = 0;
while i < chars.len() {
let remaining = &chars[i..];
let result = german_match(remaining);
i += result.consumed();
phonemes.extend(result);
}
if let Some(last) = phonemes.last_mut() {
*last = match *last {
Phoneme::PlosiveB => Phoneme::PlosiveP,
Phoneme::PlosiveD => Phoneme::PlosiveT,
Phoneme::PlosiveG => Phoneme::PlosiveK,
Phoneme::FricativeV => Phoneme::FricativeF,
Phoneme::FricativeZ => Phoneme::FricativeS,
other => other,
};
}
phonemes
}
static DE_SCH: &[Phoneme] = &[Phoneme::FricativeSh];
static DE_EI: &[Phoneme] = &[Phoneme::DiphthongAI];
static DE_IE: &[Phoneme] = &[Phoneme::VowelE]; static DE_EU: &[Phoneme] = &[Phoneme::DiphthongOI];
static DE_CH: &[Phoneme] = &[Phoneme::FricativeSh]; static DE_CK: &[Phoneme] = &[Phoneme::PlosiveK];
static DE_PF: &[Phoneme] = &[Phoneme::PlosiveP, Phoneme::FricativeF];
static DE_TS: &[Phoneme] = &[Phoneme::PlosiveT, Phoneme::FricativeS];
fn german_match(chars: &[char]) -> PatternResult {
if chars.is_empty() {
return PatternResult::Static(1, &[]);
}
if chars.len() >= 3 && chars[0] == 's' && chars[1] == 'c' && chars[2] == 'h' {
return PatternResult::Static(3, DE_SCH);
}
if chars.len() >= 2 {
match (chars[0], chars[1]) {
('c', 'h') => {
return PatternResult::Static(2, DE_CH);
}
('c', 'k') => return PatternResult::Static(2, DE_CK),
('e', 'i') => return PatternResult::Static(2, DE_EI),
('i', 'e') => return PatternResult::Static(2, DE_IE),
('e', 'u') => return PatternResult::Static(2, DE_EU),
('ä', 'u') => return PatternResult::Static(2, DE_EU),
('p', 'f') => return PatternResult::Static(2, DE_PF),
('t', 'z') | ('z', _) if chars[0] == 'z' => {}
('s', 'p') | ('s', 't') => {
return PatternResult::Dynamic(1, alloc::vec![Phoneme::FricativeSh]);
}
('t', 'h') => return PatternResult::Static(2, &[Phoneme::PlosiveT]),
('p', 'h') => return PatternResult::Static(2, &[Phoneme::FricativeF]),
('q', 'u') => {
return PatternResult::Static(2, &[Phoneme::PlosiveK, Phoneme::FricativeV]);
}
(a, b) if a == b && !is_vowel_char(a) => {
return PatternResult::Static(
2,
match a {
'b' => &[Phoneme::PlosiveB],
'c' => &[Phoneme::PlosiveK],
'd' => &[Phoneme::PlosiveD],
'f' => &[Phoneme::FricativeF],
'g' => &[Phoneme::PlosiveG],
'l' => &[Phoneme::LateralL],
'm' => &[Phoneme::NasalM],
'n' => &[Phoneme::NasalN],
'p' => &[Phoneme::PlosiveP],
'r' => &[Phoneme::ApproximantR],
's' => &[Phoneme::FricativeS],
't' => &[Phoneme::PlosiveT],
_ => &[],
},
);
}
_ => {}
}
}
match chars[0] {
'a' | 'á' => PatternResult::Static(1, &[Phoneme::VowelOpenA]),
'ä' => PatternResult::Static(1, &[Phoneme::VowelOpenE]),
'e' => PatternResult::Static(1, &[Phoneme::VowelOpenE]),
'i' => PatternResult::Static(1, &[Phoneme::VowelNearI]),
'o' => PatternResult::Static(1, &[Phoneme::VowelO]),
'ö' => PatternResult::Static(1, &[Phoneme::VowelOpenE]), 'u' => PatternResult::Static(1, &[Phoneme::VowelU]),
'ü' => PatternResult::Static(1, &[Phoneme::VowelE]), 'b' => PatternResult::Static(1, &[Phoneme::PlosiveB]),
'c' => PatternResult::Static(1, &[Phoneme::PlosiveK]),
'd' => PatternResult::Static(1, &[Phoneme::PlosiveD]),
'f' => PatternResult::Static(1, &[Phoneme::FricativeF]),
'g' => PatternResult::Static(1, &[Phoneme::PlosiveG]),
'h' => PatternResult::Static(1, &[Phoneme::FricativeH]),
'j' => PatternResult::Static(1, &[Phoneme::ApproximantJ]),
'k' => PatternResult::Static(1, &[Phoneme::PlosiveK]),
'l' => PatternResult::Static(1, &[Phoneme::LateralL]),
'm' => PatternResult::Static(1, &[Phoneme::NasalM]),
'n' => PatternResult::Static(1, &[Phoneme::NasalN]),
'p' => PatternResult::Static(1, &[Phoneme::PlosiveP]),
'r' => PatternResult::Static(1, &[Phoneme::ApproximantR]),
's' => {
if chars.len() > 1 && is_vowel_char(chars[1]) {
PatternResult::Static(1, &[Phoneme::FricativeZ])
} else {
PatternResult::Static(1, &[Phoneme::FricativeS])
}
}
'ß' => PatternResult::Static(1, &[Phoneme::FricativeS]),
't' => PatternResult::Static(1, &[Phoneme::PlosiveT]),
'v' => PatternResult::Static(1, &[Phoneme::FricativeF]), 'w' => PatternResult::Static(1, &[Phoneme::FricativeV]), 'x' => PatternResult::Static(1, &[Phoneme::PlosiveK]),
'y' => PatternResult::Static(1, &[Phoneme::VowelE]), 'z' => PatternResult::Static(1, DE_TS),
'\'' | '-' => PatternResult::Static(1, &[]),
_ => PatternResult::Static(1, &[]),
}
}
#[must_use]
pub fn hindi_rules(word: &str) -> Vec<Phoneme> {
let chars: Vec<char> = word.chars().collect();
if chars.is_empty() {
return Vec::new();
}
let mut phonemes = Vec::new();
let mut i = 0;
while i < chars.len() {
let ch = chars[i];
let next = chars.get(i + 1).copied();
match ch {
'अ' => phonemes.push(Phoneme::VowelSchwa),
'आ' | 'ा' => phonemes.push(Phoneme::VowelOpenA),
'इ' | 'ि' => phonemes.push(Phoneme::VowelNearI),
'ई' | 'ी' => phonemes.push(Phoneme::VowelE),
'उ' | 'ु' => phonemes.push(Phoneme::VowelCupV),
'ऊ' | 'ू' => phonemes.push(Phoneme::VowelU),
'ए' | 'े' => phonemes.push(Phoneme::VowelOpenE),
'ऐ' | 'ै' => phonemes.push(Phoneme::VowelOpenA),
'ओ' | 'ो' => phonemes.push(Phoneme::VowelO),
'औ' | 'ौ' => phonemes.push(Phoneme::VowelOpenO),
'ऋ' | 'ृ' => {
phonemes.push(Phoneme::TapFlap);
phonemes.push(Phoneme::VowelNearI);
}
'क' => push_consonant(&mut phonemes, Phoneme::PlosiveK, next),
'ख' => push_consonant(&mut phonemes, Phoneme::PlosiveK, next), 'ग' => push_consonant(&mut phonemes, Phoneme::PlosiveG, next),
'घ' => push_consonant(&mut phonemes, Phoneme::PlosiveG, next), 'ङ' => push_consonant(&mut phonemes, Phoneme::NasalNg, next),
'च' => push_consonant(&mut phonemes, Phoneme::AffricateCh, next),
'छ' => push_consonant(&mut phonemes, Phoneme::AffricateCh, next),
'ज' => push_consonant(&mut phonemes, Phoneme::AffricateJ, next),
'झ' => push_consonant(&mut phonemes, Phoneme::AffricateJ, next),
'ञ' => push_consonant(&mut phonemes, Phoneme::NasalN, next),
'ट' => push_consonant(&mut phonemes, Phoneme::PlosiveT, next),
'ठ' => push_consonant(&mut phonemes, Phoneme::PlosiveT, next),
'ड' => push_consonant(&mut phonemes, Phoneme::PlosiveD, next),
'ढ' => push_consonant(&mut phonemes, Phoneme::PlosiveD, next),
'ण' => push_consonant(&mut phonemes, Phoneme::NasalN, next),
'त' => push_consonant(&mut phonemes, Phoneme::PlosiveT, next),
'थ' => push_consonant(&mut phonemes, Phoneme::PlosiveT, next),
'द' => push_consonant(&mut phonemes, Phoneme::PlosiveD, next),
'ध' => push_consonant(&mut phonemes, Phoneme::PlosiveD, next),
'न' => push_consonant(&mut phonemes, Phoneme::NasalN, next),
'प' => push_consonant(&mut phonemes, Phoneme::PlosiveP, next),
'फ' => push_consonant(&mut phonemes, Phoneme::PlosiveP, next),
'ब' => push_consonant(&mut phonemes, Phoneme::PlosiveB, next),
'भ' => push_consonant(&mut phonemes, Phoneme::PlosiveB, next),
'म' => push_consonant(&mut phonemes, Phoneme::NasalM, next),
'य' => push_consonant(&mut phonemes, Phoneme::ApproximantJ, next),
'र' => push_consonant(&mut phonemes, Phoneme::TapFlap, next),
'ल' => push_consonant(&mut phonemes, Phoneme::LateralL, next),
'व' => push_consonant(&mut phonemes, Phoneme::FricativeV, next),
'श' => push_consonant(&mut phonemes, Phoneme::FricativeSh, next),
'ष' => push_consonant(&mut phonemes, Phoneme::FricativeSh, next),
'स' => push_consonant(&mut phonemes, Phoneme::FricativeS, next),
'ह' => push_consonant(&mut phonemes, Phoneme::FricativeH, next),
'\u{093C}' => {}
'्' => {}
'ं' => phonemes.push(Phoneme::NasalN),
'ः' => phonemes.push(Phoneme::FricativeH),
'ँ' => phonemes.push(Phoneme::NasalN),
c if c.is_ascii_alphabetic() => {
let lower = c.to_lowercase().next().unwrap_or(c);
match lower {
'a' => phonemes.push(Phoneme::VowelSchwa),
'e' => phonemes.push(Phoneme::VowelOpenE),
'i' => phonemes.push(Phoneme::VowelNearI),
'o' => phonemes.push(Phoneme::VowelO),
'u' => phonemes.push(Phoneme::VowelCupV),
'k' => phonemes.push(Phoneme::PlosiveK),
'g' => phonemes.push(Phoneme::PlosiveG),
'c' => phonemes.push(Phoneme::AffricateCh),
'j' => phonemes.push(Phoneme::AffricateJ),
't' => phonemes.push(Phoneme::PlosiveT),
'd' => phonemes.push(Phoneme::PlosiveD),
'n' => phonemes.push(Phoneme::NasalN),
'p' => phonemes.push(Phoneme::PlosiveP),
'b' => phonemes.push(Phoneme::PlosiveB),
'm' => phonemes.push(Phoneme::NasalM),
'y' => phonemes.push(Phoneme::ApproximantJ),
'r' => phonemes.push(Phoneme::TapFlap),
'l' => phonemes.push(Phoneme::LateralL),
'v' | 'w' => phonemes.push(Phoneme::FricativeV),
's' => phonemes.push(Phoneme::FricativeS),
'h' => phonemes.push(Phoneme::FricativeH),
'f' => phonemes.push(Phoneme::FricativeF),
'z' => phonemes.push(Phoneme::FricativeZ),
_ => {}
}
}
_ => {} }
i += 1;
}
if phonemes.last() == Some(&Phoneme::VowelSchwa) && phonemes.len() > 1 {
phonemes.pop();
}
phonemes
}
fn push_consonant(phonemes: &mut Vec<Phoneme>, consonant: Phoneme, next: Option<char>) {
phonemes.push(consonant);
let suppress = matches!(
next,
Some(
'ा' | 'ि'
| 'ी'
| 'ु'
| 'ू'
| 'े'
| 'ै'
| 'ो'
| 'ौ'
| 'ृ'
| '्'
| 'ं'
| 'ँ'
)
);
if !suppress {
phonemes.push(Phoneme::VowelSchwa);
}
}
#[must_use]
pub fn arabic_rules(word: &str) -> Vec<Phoneme> {
let chars: Vec<char> = word.chars().collect();
if chars.is_empty() {
return Vec::new();
}
let mut phonemes = Vec::new();
for &ch in &chars {
match ch {
'ب' => phonemes.push(Phoneme::PlosiveB),
'ت' => phonemes.push(Phoneme::PlosiveT),
'ث' => phonemes.push(Phoneme::FricativeTh),
'ج' => phonemes.push(Phoneme::AffricateJ),
'ح' => phonemes.push(Phoneme::FricativeH), 'خ' => phonemes.push(Phoneme::FricativeH), 'د' => phonemes.push(Phoneme::PlosiveD),
'ذ' => phonemes.push(Phoneme::FricativeDh),
'ر' => phonemes.push(Phoneme::TapFlap),
'ز' => phonemes.push(Phoneme::FricativeZ),
'س' => phonemes.push(Phoneme::FricativeS),
'ش' => phonemes.push(Phoneme::FricativeSh),
'ص' => phonemes.push(Phoneme::FricativeS), 'ض' => phonemes.push(Phoneme::PlosiveD), 'ط' => phonemes.push(Phoneme::PlosiveT), 'ظ' => phonemes.push(Phoneme::FricativeDh), 'ع' => phonemes.push(Phoneme::GlottalStop), 'غ' => phonemes.push(Phoneme::PlosiveG), 'ف' => phonemes.push(Phoneme::FricativeF),
'ق' => phonemes.push(Phoneme::PlosiveK), 'ك' => phonemes.push(Phoneme::PlosiveK),
'ل' => phonemes.push(Phoneme::LateralL),
'م' => phonemes.push(Phoneme::NasalM),
'ن' => phonemes.push(Phoneme::NasalN),
'ه' => phonemes.push(Phoneme::FricativeH),
'و' => phonemes.push(Phoneme::ApproximantW), 'ي' => phonemes.push(Phoneme::ApproximantJ),
'ء' | 'أ' | 'إ' | 'ؤ' | 'ئ' => phonemes.push(Phoneme::GlottalStop),
'ا' => phonemes.push(Phoneme::VowelOpenA),
'آ' => {
phonemes.push(Phoneme::GlottalStop);
phonemes.push(Phoneme::VowelOpenA);
}
'ة' => phonemes.push(Phoneme::VowelOpenA),
'\u{064E}' => phonemes.push(Phoneme::VowelOpenA), '\u{0650}' => phonemes.push(Phoneme::VowelNearI), '\u{064F}' => phonemes.push(Phoneme::VowelCupV),
'\u{064B}' => {
phonemes.push(Phoneme::VowelOpenA);
phonemes.push(Phoneme::NasalN);
}
'\u{064D}' => {
phonemes.push(Phoneme::VowelNearI);
phonemes.push(Phoneme::NasalN);
}
'\u{064C}' => {
phonemes.push(Phoneme::VowelCupV);
phonemes.push(Phoneme::NasalN);
}
'\u{0652}' => {}
'\u{0651}' => {
if let Some(&prev) = phonemes.last() {
phonemes.push(prev);
}
}
'ى' => phonemes.push(Phoneme::VowelOpenA),
c if c.is_ascii_alphabetic() => {
let lower = c.to_lowercase().next().unwrap_or(c);
match lower {
'a' => phonemes.push(Phoneme::VowelOpenA),
'i' => phonemes.push(Phoneme::VowelNearI),
'u' => phonemes.push(Phoneme::VowelCupV),
'b' => phonemes.push(Phoneme::PlosiveB),
't' => phonemes.push(Phoneme::PlosiveT),
'd' => phonemes.push(Phoneme::PlosiveD),
'k' => phonemes.push(Phoneme::PlosiveK),
'f' => phonemes.push(Phoneme::FricativeF),
's' => phonemes.push(Phoneme::FricativeS),
'z' => phonemes.push(Phoneme::FricativeZ),
'h' => phonemes.push(Phoneme::FricativeH),
'l' => phonemes.push(Phoneme::LateralL),
'm' => phonemes.push(Phoneme::NasalM),
'n' => phonemes.push(Phoneme::NasalN),
'r' => phonemes.push(Phoneme::TapFlap),
'w' => phonemes.push(Phoneme::ApproximantW),
'y' => phonemes.push(Phoneme::ApproximantJ),
'j' => phonemes.push(Phoneme::AffricateJ),
'q' => phonemes.push(Phoneme::PlosiveK),
_ => {}
}
}
_ => {} }
}
phonemes
}
#[must_use]
pub fn sanskrit_rules(word: &str) -> Vec<Phoneme> {
let chars: Vec<char> = word.chars().collect();
if chars.is_empty() {
return Vec::new();
}
let mut phonemes = Vec::new();
let mut i = 0;
while i < chars.len() {
let ch = chars[i];
let next = chars.get(i + 1).copied();
match ch {
'अ' => phonemes.push(Phoneme::VowelSchwa),
'आ' | 'ा' => phonemes.push(Phoneme::VowelOpenA),
'इ' | 'ि' => phonemes.push(Phoneme::VowelNearI),
'ई' | 'ी' => phonemes.push(Phoneme::VowelE),
'उ' | 'ु' => phonemes.push(Phoneme::VowelCupV),
'ऊ' | 'ू' => phonemes.push(Phoneme::VowelU),
'ऋ' | 'ृ' => {
phonemes.push(Phoneme::TapFlap);
phonemes.push(Phoneme::VowelNearI);
}
'ॠ' | 'ॄ' => {
phonemes.push(Phoneme::TapFlap);
phonemes.push(Phoneme::VowelE);
}
'ऌ' | 'ॢ' => {
phonemes.push(Phoneme::LateralL);
phonemes.push(Phoneme::VowelNearI);
}
'ए' | 'े' => phonemes.push(Phoneme::VowelOpenE),
'ऐ' | 'ै' => {
phonemes.push(Phoneme::VowelOpenA);
phonemes.push(Phoneme::VowelNearI);
}
'ओ' | 'ो' => phonemes.push(Phoneme::VowelO),
'औ' | 'ौ' => {
phonemes.push(Phoneme::VowelOpenA);
phonemes.push(Phoneme::VowelCupV);
}
'क' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveK, next),
'ख' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveK, next),
'ग' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveG, next),
'घ' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveG, next),
'ङ' => push_sanskrit_consonant(&mut phonemes, Phoneme::NasalNg, next),
'च' => push_sanskrit_consonant(&mut phonemes, Phoneme::AffricateCh, next),
'छ' => push_sanskrit_consonant(&mut phonemes, Phoneme::AffricateCh, next),
'ज' => push_sanskrit_consonant(&mut phonemes, Phoneme::AffricateJ, next),
'झ' => push_sanskrit_consonant(&mut phonemes, Phoneme::AffricateJ, next),
'ञ' => push_sanskrit_consonant(&mut phonemes, Phoneme::NasalN, next),
'ट' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveT, next),
'ठ' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveT, next),
'ड' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveD, next),
'ढ' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveD, next),
'ण' => push_sanskrit_consonant(&mut phonemes, Phoneme::NasalN, next),
'त' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveT, next),
'थ' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveT, next),
'द' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveD, next),
'ध' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveD, next),
'न' => push_sanskrit_consonant(&mut phonemes, Phoneme::NasalN, next),
'प' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveP, next),
'फ' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveP, next),
'ब' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveB, next),
'भ' => push_sanskrit_consonant(&mut phonemes, Phoneme::PlosiveB, next),
'म' => push_sanskrit_consonant(&mut phonemes, Phoneme::NasalM, next),
'य' => push_sanskrit_consonant(&mut phonemes, Phoneme::ApproximantJ, next),
'र' => push_sanskrit_consonant(&mut phonemes, Phoneme::TapFlap, next),
'ल' => push_sanskrit_consonant(&mut phonemes, Phoneme::LateralL, next),
'व' => push_sanskrit_consonant(&mut phonemes, Phoneme::FricativeV, next),
'श' => push_sanskrit_consonant(&mut phonemes, Phoneme::FricativeSh, next),
'ष' => push_sanskrit_consonant(&mut phonemes, Phoneme::FricativeSh, next),
'स' => push_sanskrit_consonant(&mut phonemes, Phoneme::FricativeS, next),
'ह' => push_sanskrit_consonant(&mut phonemes, Phoneme::FricativeH, next),
'्' => {}
'ं' => phonemes.push(Phoneme::NasalM),
'ः' => phonemes.push(Phoneme::FricativeH),
'\u{093C}' => {}
c if c.is_ascii_alphabetic() => {
let lower = c.to_lowercase().next().unwrap_or(c);
match lower {
'a' => phonemes.push(Phoneme::VowelSchwa),
'e' => phonemes.push(Phoneme::VowelOpenE),
'i' => phonemes.push(Phoneme::VowelNearI),
'o' => phonemes.push(Phoneme::VowelO),
'u' => phonemes.push(Phoneme::VowelCupV),
'k' => phonemes.push(Phoneme::PlosiveK),
'g' => phonemes.push(Phoneme::PlosiveG),
'c' => phonemes.push(Phoneme::AffricateCh),
'j' => phonemes.push(Phoneme::AffricateJ),
't' => phonemes.push(Phoneme::PlosiveT),
'd' => phonemes.push(Phoneme::PlosiveD),
'n' => phonemes.push(Phoneme::NasalN),
'p' => phonemes.push(Phoneme::PlosiveP),
'b' => phonemes.push(Phoneme::PlosiveB),
'm' => phonemes.push(Phoneme::NasalM),
'y' => phonemes.push(Phoneme::ApproximantJ),
'r' => phonemes.push(Phoneme::TapFlap),
'l' => phonemes.push(Phoneme::LateralL),
'v' | 'w' => phonemes.push(Phoneme::FricativeV),
's' => phonemes.push(Phoneme::FricativeS),
'h' => phonemes.push(Phoneme::FricativeH),
_ => {}
}
}
_ => {} }
i += 1;
}
phonemes
}
fn push_sanskrit_consonant(phonemes: &mut Vec<Phoneme>, consonant: Phoneme, next: Option<char>) {
phonemes.push(consonant);
let suppress = matches!(
next,
Some(
'ा' | 'ि'
| 'ी'
| 'ु'
| 'ू'
| 'े'
| 'ै'
| 'ो'
| 'ौ'
| 'ृ'
| 'ॄ'
| 'ॢ'
| '्'
| 'ं'
| 'ँ'
)
);
if !suppress {
phonemes.push(Phoneme::VowelSchwa);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_word() {
let phonemes = english_rules("cat");
assert!(!phonemes.is_empty());
assert_eq!(phonemes[0], Phoneme::PlosiveK);
}
#[test]
fn test_digraph_sh() {
let phonemes = english_rules("she");
assert_eq!(phonemes[0], Phoneme::FricativeSh);
}
#[test]
fn test_digraph_th() {
let phonemes = english_rules("the");
assert_eq!(phonemes[0], Phoneme::FricativeDh);
}
#[test]
fn test_digraph_ch() {
let phonemes = english_rules("chat");
assert_eq!(phonemes[0], Phoneme::AffricateCh);
}
#[test]
fn test_empty() {
let phonemes = english_rules("");
assert!(phonemes.is_empty());
}
#[test]
fn test_silent_kn() {
let phonemes = english_rules("knight");
assert_ne!(phonemes[0], Phoneme::PlosiveK);
assert_eq!(phonemes[0], Phoneme::NasalN);
}
#[test]
fn test_silent_gn_initial() {
let phonemes = english_rules("gnome");
assert_eq!(phonemes[0], Phoneme::NasalN);
}
#[test]
fn test_silent_wr() {
let phonemes = english_rules("write");
assert_eq!(phonemes[0], Phoneme::ApproximantR);
}
#[test]
fn test_silent_ps() {
let phonemes = english_rules("psalm");
assert_eq!(phonemes[0], Phoneme::FricativeS);
}
#[test]
fn test_silent_mb_final() {
let phonemes = english_rules("lamb");
assert!(!phonemes.contains(&Phoneme::PlosiveB));
}
#[test]
fn test_ghost_not_silent() {
let phonemes = english_rules("go");
assert_eq!(phonemes[0], Phoneme::PlosiveG);
}
#[test]
fn test_igh_pattern() {
let phonemes = english_rules("high");
assert!(phonemes.contains(&Phoneme::DiphthongAI));
}
#[test]
fn test_magic_e_make() {
let phonemes = english_rules("make");
assert!(phonemes.contains(&Phoneme::DiphthongEI));
}
#[test]
fn test_magic_e_time() {
let phonemes = english_rules("time");
assert!(phonemes.contains(&Phoneme::DiphthongAI));
}
#[test]
fn test_magic_e_home() {
let phonemes = english_rules("home");
assert!(phonemes.contains(&Phoneme::DiphthongOU));
}
#[test]
fn test_r_colored_car() {
let phonemes = english_rules("car");
assert!(phonemes.contains(&Phoneme::VowelOpenA));
assert!(phonemes.contains(&Phoneme::ApproximantR));
}
#[test]
fn test_r_colored_bird() {
let phonemes = english_rules("bird");
assert!(phonemes.contains(&Phoneme::VowelBird));
}
#[test]
fn test_tion_suffix() {
let phonemes = english_rules("nation");
assert!(phonemes.contains(&Phoneme::FricativeSh));
}
#[test]
fn test_ed_after_voiceless() {
let phonemes = english_rules("walked");
let last = phonemes.last().copied();
assert_eq!(last, Some(Phoneme::PlosiveT));
}
#[test]
fn test_ed_after_t() {
let phonemes = english_rules("wanted");
let len = phonemes.len();
assert!(len >= 2);
assert_eq!(phonemes[len - 2], Phoneme::VowelNearI);
assert_eq!(phonemes[len - 1], Phoneme::PlosiveD);
}
#[test]
fn test_ed_after_voiced() {
let phonemes = english_rules("played");
let last = phonemes.last().copied();
assert_eq!(last, Some(Phoneme::PlosiveD));
}
#[test]
fn test_prefix_un() {
let phonemes = english_rules("unhappy");
assert_eq!(phonemes[0], Phoneme::VowelCupV);
assert_eq!(phonemes[1], Phoneme::NasalN);
}
#[test]
fn test_spanish_hola() {
let phonemes = spanish_rules("hola");
assert_eq!(phonemes[0], Phoneme::VowelO);
assert_eq!(phonemes[1], Phoneme::LateralL);
assert_eq!(phonemes[2], Phoneme::VowelOpenA);
}
#[test]
fn test_spanish_ch() {
let phonemes = spanish_rules("chico");
assert_eq!(phonemes[0], Phoneme::AffricateCh);
}
#[test]
fn test_spanish_ll() {
let phonemes = spanish_rules("llamar");
assert_eq!(phonemes[0], Phoneme::ApproximantJ);
}
#[test]
fn test_spanish_rr() {
let phonemes = spanish_rules("perro");
assert!(phonemes.contains(&Phoneme::ApproximantR));
}
#[test]
fn test_spanish_que() {
let phonemes = spanish_rules("que");
assert_eq!(phonemes[0], Phoneme::PlosiveK);
assert_eq!(phonemes[1], Phoneme::VowelOpenE);
}
#[test]
fn test_spanish_gui() {
let phonemes = spanish_rules("guitarra");
assert_eq!(phonemes[0], Phoneme::PlosiveG);
}
#[test]
fn test_spanish_empty() {
let phonemes = spanish_rules("");
assert!(phonemes.is_empty());
}
#[test]
fn test_spanish_nino() {
let phonemes = spanish_rules("niño");
assert!(phonemes.contains(&Phoneme::NasalNg));
}
#[test]
fn test_german_hallo() {
let phonemes = german_rules("hallo");
assert_eq!(phonemes[0], Phoneme::FricativeH);
assert!(!phonemes.is_empty());
}
#[test]
fn test_german_sch() {
let phonemes = german_rules("schule");
assert_eq!(phonemes[0], Phoneme::FricativeSh);
}
#[test]
fn test_german_ch() {
let phonemes = german_rules("ich");
assert!(phonemes.contains(&Phoneme::FricativeSh)); }
#[test]
fn test_german_ei() {
let phonemes = german_rules("ein");
assert!(phonemes.contains(&Phoneme::DiphthongAI));
}
#[test]
fn test_german_ie() {
let phonemes = german_rules("die");
assert!(phonemes.contains(&Phoneme::VowelE)); }
#[test]
fn test_german_final_devoicing() {
let phonemes = german_rules("hund");
assert_eq!(*phonemes.last().unwrap(), Phoneme::PlosiveT);
}
#[test]
fn test_german_umlaut() {
let phonemes = german_rules("über");
assert_eq!(phonemes[0], Phoneme::VowelE); }
#[test]
fn test_german_z() {
let phonemes = german_rules("zeit");
assert_eq!(phonemes[0], Phoneme::PlosiveT);
assert_eq!(phonemes[1], Phoneme::FricativeS);
}
#[test]
fn test_german_empty() {
assert!(german_rules("").is_empty());
}
#[test]
fn test_hindi_namaste() {
let phonemes = hindi_rules("नमस्ते");
assert!(!phonemes.is_empty());
assert!(phonemes.contains(&Phoneme::NasalN));
assert!(phonemes.contains(&Phoneme::NasalM));
}
#[test]
fn test_hindi_simple_ka() {
let phonemes = hindi_rules("क");
assert_eq!(phonemes[0], Phoneme::PlosiveK);
}
#[test]
fn test_hindi_vowels() {
let phonemes = hindi_rules("आ");
assert_eq!(phonemes[0], Phoneme::VowelOpenA);
}
#[test]
fn test_hindi_romanized() {
let phonemes = hindi_rules("namaste");
assert!(!phonemes.is_empty());
assert!(phonemes.contains(&Phoneme::NasalN));
}
#[test]
fn test_hindi_empty() {
assert!(hindi_rules("").is_empty());
}
#[test]
fn test_arabic_bismillah() {
let phonemes = arabic_rules("بسم");
assert!(!phonemes.is_empty());
assert_eq!(phonemes[0], Phoneme::PlosiveB);
}
#[test]
fn test_arabic_kitab() {
let phonemes = arabic_rules("كتاب");
assert_eq!(phonemes[0], Phoneme::PlosiveK);
assert!(phonemes.contains(&Phoneme::VowelOpenA)); }
#[test]
fn test_arabic_shin() {
let phonemes = arabic_rules("شمس");
assert_eq!(phonemes[0], Phoneme::FricativeSh);
}
#[test]
fn test_arabic_romanized() {
let phonemes = arabic_rules("salam");
assert!(!phonemes.is_empty());
assert_eq!(phonemes[0], Phoneme::FricativeS);
}
#[test]
fn test_arabic_empty() {
assert!(arabic_rules("").is_empty());
}
#[test]
fn test_sanskrit_om() {
let phonemes = sanskrit_rules("ओम्");
assert!(!phonemes.is_empty());
assert_eq!(phonemes[0], Phoneme::VowelO);
}
#[test]
fn test_sanskrit_dharma() {
let phonemes = sanskrit_rules("धर्म");
assert!(!phonemes.is_empty());
assert!(phonemes.contains(&Phoneme::TapFlap)); assert!(phonemes.contains(&Phoneme::NasalM)); }
#[test]
fn test_sanskrit_preserves_schwa() {
let phonemes = sanskrit_rules("कर");
assert_eq!(phonemes.len(), 4); assert_eq!(phonemes[3], Phoneme::VowelSchwa);
}
#[test]
fn test_sanskrit_romanized() {
let phonemes = sanskrit_rules("dharma");
assert!(!phonemes.is_empty());
}
#[test]
fn test_sanskrit_empty() {
assert!(sanskrit_rules("").is_empty());
}
}