use alloc::vec::Vec;
use serde::{Deserialize, Serialize};
use svara::phoneme::{Phoneme, PhonemeClass};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Syllable {
pub onset: Vec<Phoneme>,
pub nucleus: Phoneme,
pub coda: Vec<Phoneme>,
}
impl Syllable {
#[must_use]
pub fn is_heavy(&self) -> bool {
!self.coda.is_empty() || self.nucleus.class() == PhonemeClass::Diphthong
}
#[must_use]
pub fn phonemes(&self) -> Vec<Phoneme> {
let mut ph = self.onset.clone();
ph.push(self.nucleus);
ph.extend_from_slice(&self.coda);
ph
}
}
#[must_use]
pub fn syllabify(phonemes: &[Phoneme]) -> Vec<Syllable> {
if phonemes.is_empty() {
return Vec::new();
}
let nuclei: Vec<usize> = phonemes
.iter()
.enumerate()
.filter(|(_, p)| is_nucleus(p))
.map(|(i, _)| i)
.collect();
if nuclei.is_empty() {
return Vec::new();
}
let mut syllables = Vec::with_capacity(nuclei.len());
for (syl_idx, &nucleus_idx) in nuclei.iter().enumerate() {
let onset_start = if syl_idx == 0 {
0
} else {
let prev_nucleus = nuclei[syl_idx - 1];
let cluster_start = prev_nucleus + 1;
let cluster = &phonemes[cluster_start..nucleus_idx];
let onset_len = max_legal_onset(cluster);
nucleus_idx - onset_len
};
let coda_end = if syl_idx == nuclei.len() - 1 {
phonemes.len()
} else {
nuclei[syl_idx + 1]
};
let onset = phonemes[onset_start..nucleus_idx].to_vec();
let nucleus = phonemes[nucleus_idx];
let coda_slice = &phonemes[nucleus_idx + 1..coda_end];
let coda = if syl_idx < nuclei.len() - 1 {
let next_nucleus = nuclei[syl_idx + 1];
let cluster = &phonemes[nucleus_idx + 1..next_nucleus];
let next_onset_len = max_legal_onset(cluster);
phonemes[nucleus_idx + 1..next_nucleus - next_onset_len].to_vec()
} else {
coda_slice.to_vec()
};
syllables.push(Syllable {
onset,
nucleus,
coda,
});
}
syllables
}
fn is_nucleus(ph: &Phoneme) -> bool {
matches!(ph.class(), PhonemeClass::Vowel | PhonemeClass::Diphthong)
}
fn sonority(ph: &Phoneme) -> u8 {
match ph.class() {
PhonemeClass::Vowel | PhonemeClass::Diphthong => 6,
_ => match ph {
Phoneme::ApproximantR | Phoneme::ApproximantW | Phoneme::ApproximantJ => 5,
Phoneme::LateralL => 4,
Phoneme::NasalM | Phoneme::NasalN | Phoneme::NasalNg => 3,
Phoneme::FricativeF
| Phoneme::FricativeV
| Phoneme::FricativeS
| Phoneme::FricativeZ
| Phoneme::FricativeSh
| Phoneme::FricativeZh
| Phoneme::FricativeTh
| Phoneme::FricativeDh
| Phoneme::FricativeH => 2,
_ => 1, },
}
}
fn max_legal_onset(cluster: &[Phoneme]) -> usize {
if cluster.is_empty() {
return 0;
}
for start in 0..cluster.len() {
let candidate = &cluster[start..];
if is_legal_onset(candidate) {
return candidate.len();
}
}
1
}
fn is_legal_onset(cluster: &[Phoneme]) -> bool {
if cluster.is_empty() {
return true;
}
if cluster.len() == 1 {
return true;
}
for i in 1..cluster.len() {
if sonority(&cluster[i]) <= sonority(&cluster[i - 1]) {
if cluster[0] == Phoneme::FricativeS && sonority(&cluster[i]) <= 2 {
continue;
}
return false;
}
}
true
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_syllabify_monosyllable() {
let phonemes = alloc::vec![Phoneme::PlosiveK, Phoneme::VowelAsh, Phoneme::PlosiveT];
let syls = syllabify(&phonemes);
assert_eq!(syls.len(), 1);
assert_eq!(syls[0].onset, alloc::vec![Phoneme::PlosiveK]);
assert_eq!(syls[0].nucleus, Phoneme::VowelAsh);
assert_eq!(syls[0].coda, alloc::vec![Phoneme::PlosiveT]);
assert!(syls[0].is_heavy()); }
#[test]
fn test_syllabify_two_syllables() {
let phonemes = alloc::vec![
Phoneme::FricativeH,
Phoneme::VowelOpenE,
Phoneme::LateralL,
Phoneme::DiphthongOU,
];
let syls = syllabify(&phonemes);
assert_eq!(syls.len(), 2);
assert_eq!(syls[0].nucleus, Phoneme::VowelOpenE);
assert_eq!(syls[1].nucleus, Phoneme::DiphthongOU);
}
#[test]
fn test_syllabify_empty() {
assert!(syllabify(&[]).is_empty());
}
#[test]
fn test_syllabify_no_vowels() {
let phonemes = alloc::vec![Phoneme::PlosiveK, Phoneme::PlosiveT];
assert!(syllabify(&phonemes).is_empty());
}
#[test]
fn test_heavy_syllable_with_coda() {
let syl = Syllable {
onset: alloc::vec![Phoneme::PlosiveK],
nucleus: Phoneme::VowelAsh,
coda: alloc::vec![Phoneme::PlosiveT],
};
assert!(syl.is_heavy());
}
#[test]
fn test_light_syllable() {
let syl = Syllable {
onset: alloc::vec![Phoneme::PlosiveK],
nucleus: Phoneme::VowelAsh,
coda: alloc::vec![],
};
assert!(!syl.is_heavy());
}
#[test]
fn test_heavy_with_diphthong() {
let syl = Syllable {
onset: alloc::vec![],
nucleus: Phoneme::DiphthongAI,
coda: alloc::vec![],
};
assert!(syl.is_heavy()); }
#[test]
fn test_serde_roundtrip() {
let syl = Syllable {
onset: alloc::vec![Phoneme::PlosiveK],
nucleus: Phoneme::VowelAsh,
coda: alloc::vec![Phoneme::PlosiveT],
};
let json = serde_json::to_string(&syl).unwrap();
let syl2: Syllable = serde_json::from_str(&json).unwrap();
assert_eq!(syl, syl2);
}
#[test]
fn test_syllable_phonemes() {
let syl = Syllable {
onset: alloc::vec![Phoneme::PlosiveK],
nucleus: Phoneme::VowelAsh,
coda: alloc::vec![Phoneme::PlosiveT],
};
assert_eq!(
syl.phonemes(),
alloc::vec![Phoneme::PlosiveK, Phoneme::VowelAsh, Phoneme::PlosiveT]
);
}
}