pub mod allophony;
pub mod constraint;
pub mod morphology;
pub mod phoneme;
pub mod romanization;
pub mod stress;
pub mod template;
pub mod tone;
pub use allophony::{AllophonyRule, PatternAtom};
pub use constraint::PhonotacticConstraint;
pub use phoneme::{Phoneme, PhonemeKind};
pub use romanization::RomanizationScheme;
pub use stress::StressRule;
pub use template::{SyllableTemplate, TemplateRole};
pub use tone::ToneSystem;
use std::collections::BTreeMap;
use serde::Deserialize;
#[derive(Debug, Clone, Default, Deserialize)]
pub struct Phonology {
#[serde(default)]
pub phonemes: Vec<Phoneme>,
#[serde(default)]
pub classes: BTreeMap<String, Vec<String>>,
#[serde(default)]
pub templates: BTreeMap<String, Vec<SyllableTemplate>>,
#[serde(default)]
pub constraints: Vec<PhonotacticConstraint>,
#[serde(default)]
pub allophony: Vec<AllophonyRule>,
#[serde(default)]
pub stress: Option<StressRule>,
#[serde(default)]
pub romanizations: Vec<RomanizationScheme>,
#[serde(default)]
pub default_romanization: Option<String>,
#[serde(default)]
pub tone: Option<ToneSystem>,
#[serde(default = "default_max_syllables")]
#[allow(dead_code)]
pub max_word_syllables: usize,
}
fn default_max_syllables() -> usize {
4
}
impl Phonology {
pub fn from_hjson(body: &str) -> Result<Option<Self>, String> {
if body.trim().is_empty() {
return Ok(None);
}
let block = crate::language_entry::extract_hjson_block(body).unwrap_or(body);
serde_hjson::from_str::<Self>(block)
.map(Some)
.map_err(|e| format!("phonology HJSON parse failed: {e}"))
}
pub fn phoneme(&self, ipa: &str) -> Option<&Phoneme> {
self.phonemes.iter().find(|p| p.ipa == ipa)
}
pub fn kind_of(&self, ipa: &str) -> Option<PhonemeKind> {
self.phoneme(ipa).map(|p| p.kind)
}
pub fn class_members(&self, name: &str) -> &[String] {
self.classes.get(name).map(Vec::as_slice).unwrap_or(&[])
}
pub fn templates_for(&self, role: TemplateRole) -> &[SyllableTemplate] {
self.templates
.get(role.as_str())
.map(Vec::as_slice)
.unwrap_or(&[])
}
pub fn scheme(&self, name: Option<&str>) -> Option<&RomanizationScheme> {
match name {
Some(n) => self.romanizations.iter().find(|s| s.name.eq_ignore_ascii_case(n)),
None => self
.default_romanization
.as_deref()
.and_then(|d| self.romanizations.iter().find(|s| s.name.eq_ignore_ascii_case(d)))
.or_else(|| self.romanizations.first()),
}
}
pub fn segment(&self, word: &str) -> Vec<String> {
let mut graphs: Vec<(&str, &str)> = self
.phonemes
.iter()
.map(|p| (p.grapheme(), p.ipa.as_str()))
.filter(|(g, _)| !g.is_empty())
.collect();
graphs.sort_by(|a, b| b.0.chars().count().cmp(&a.0.chars().count()));
let mut out = Vec::new();
let mut rest = word;
'outer: while !rest.is_empty() {
for (g, ipa) in &graphs {
if rest.starts_with(g) {
out.push((*ipa).to_string());
rest = &rest[g.len()..];
continue 'outer;
}
}
let ch = rest.chars().next().unwrap();
out.push(ch.to_string());
rest = &rest[ch.len_utf8()..];
}
out
}
}