use std::borrow::Cow;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SyllableTemplate {
pub max_onset: u8,
pub max_coda: u8,
pub complex_nucleus: bool,
pub pattern: Cow<'static, str>,
}
impl SyllableTemplate {
#[must_use]
#[inline]
pub fn allows_onset_clusters(&self) -> bool {
self.max_onset > 1
}
#[must_use]
#[inline]
pub fn allows_coda_clusters(&self) -> bool {
self.max_coda > 1
}
#[must_use]
#[inline]
pub fn allows_closed_syllables(&self) -> bool {
self.max_coda > 0
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct PhonotacticConstraint {
pub kind: ConstraintKind,
pub position: SyllablePosition,
pub sequences: Vec<Cow<'static, str>>,
pub description: Cow<'static, str>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum ConstraintKind {
Permitted,
Forbidden,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum SyllablePosition {
Onset,
Coda,
Nucleus,
AcrossBoundary,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Phonotactics {
pub language_code: Cow<'static, str>,
pub syllable: SyllableTemplate,
pub constraints: Vec<PhonotacticConstraint>,
}
impl Phonotactics {
#[must_use]
pub fn constraints_at(&self, position: SyllablePosition) -> Vec<&PhonotacticConstraint> {
tracing::trace!(
language = %self.language_code,
position = ?position,
"phonotactic constraint lookup"
);
self.constraints
.iter()
.filter(|c| c.position == position)
.collect()
}
#[must_use]
pub fn is_permitted(&self, sequence: &str, position: SyllablePosition) -> Option<bool> {
for c in &self.constraints {
if c.position != position {
continue;
}
let matches = c.sequences.iter().any(|s| s.as_ref() == sequence);
if matches {
return Some(c.kind == ConstraintKind::Permitted);
}
}
None }
}
#[must_use]
pub fn english_phonotactics() -> Phonotactics {
Phonotactics {
language_code: Cow::Borrowed("en"),
syllable: SyllableTemplate {
max_onset: 3,
max_coda: 4,
complex_nucleus: true,
pattern: Cow::Borrowed("(C)(C)(C)V(C)(C)(C)(C)"),
},
constraints: vec![
PhonotacticConstraint {
kind: ConstraintKind::Permitted,
position: SyllablePosition::Onset,
sequences: vec![
Cow::Borrowed("st"),
Cow::Borrowed("sp"),
Cow::Borrowed("sk"),
Cow::Borrowed("str"),
Cow::Borrowed("spr"),
Cow::Borrowed("skr"),
Cow::Borrowed("spl"),
Cow::Borrowed("skw"),
Cow::Borrowed("stj"),
Cow::Borrowed("spj"),
Cow::Borrowed("skj"),
],
description: Cow::Borrowed("English /s/+stop onset clusters"),
},
PhonotacticConstraint {
kind: ConstraintKind::Forbidden,
position: SyllablePosition::Onset,
sequences: vec![
Cow::Borrowed("sr"),
Cow::Borrowed("tl"),
Cow::Borrowed("dl"),
Cow::Borrowed("ŋ"),
],
description: Cow::Borrowed("Forbidden English onset sequences"),
},
],
}
}
#[must_use]
pub fn sanskrit_phonotactics() -> Phonotactics {
Phonotactics {
language_code: Cow::Borrowed("sa"),
syllable: SyllableTemplate {
max_onset: 2,
max_coda: 2,
complex_nucleus: true,
pattern: Cow::Borrowed("(C)(C)V(C)(C)"),
},
constraints: vec![PhonotacticConstraint {
kind: ConstraintKind::Permitted,
position: SyllablePosition::Onset,
sequences: vec![
Cow::Borrowed("pr"),
Cow::Borrowed("kr"),
Cow::Borrowed("tr"),
Cow::Borrowed("sr"),
Cow::Borrowed("pl"),
Cow::Borrowed("kl"),
],
description: Cow::Borrowed("Sanskrit stop+liquid onset clusters"),
}],
}
}
#[must_use]
pub fn japanese_phonotactics() -> Phonotactics {
Phonotactics {
language_code: Cow::Borrowed("ja"),
syllable: SyllableTemplate {
max_onset: 1,
max_coda: 1,
complex_nucleus: true,
pattern: Cow::Borrowed("(C)V(N)"),
},
constraints: vec![PhonotacticConstraint {
kind: ConstraintKind::Permitted,
position: SyllablePosition::Coda,
sequences: vec![Cow::Borrowed("n"), Cow::Borrowed("ɴ")],
description: Cow::Borrowed("Only moraic nasal permitted in Japanese coda"),
}],
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_english_syllable_template() {
let p = english_phonotactics();
assert_eq!(p.syllable.max_onset, 3);
assert_eq!(p.syllable.max_coda, 4);
assert!(p.syllable.allows_onset_clusters());
assert!(p.syllable.allows_coda_clusters());
assert!(p.syllable.allows_closed_syllables());
}
#[test]
fn test_japanese_syllable_template() {
let p = japanese_phonotactics();
assert_eq!(p.syllable.max_onset, 1);
assert_eq!(p.syllable.max_coda, 1);
assert!(!p.syllable.allows_onset_clusters());
assert!(!p.syllable.allows_coda_clusters());
assert!(p.syllable.allows_closed_syllables());
}
#[test]
fn test_english_onset_permitted() {
let p = english_phonotactics();
assert_eq!(p.is_permitted("str", SyllablePosition::Onset), Some(true));
assert_eq!(p.is_permitted("sp", SyllablePosition::Onset), Some(true));
}
#[test]
fn test_english_onset_forbidden() {
let p = english_phonotactics();
assert_eq!(p.is_permitted("sr", SyllablePosition::Onset), Some(false));
assert_eq!(p.is_permitted("tl", SyllablePosition::Onset), Some(false));
}
#[test]
fn test_english_onset_unknown() {
let p = english_phonotactics();
assert_eq!(p.is_permitted("br", SyllablePosition::Onset), None);
}
#[test]
fn test_constraints_at() {
let p = english_phonotactics();
let onset = p.constraints_at(SyllablePosition::Onset);
assert_eq!(onset.len(), 2); let coda = p.constraints_at(SyllablePosition::Coda);
assert!(coda.is_empty());
}
#[test]
fn test_sanskrit_onset() {
let p = sanskrit_phonotactics();
assert_eq!(p.is_permitted("kr", SyllablePosition::Onset), Some(true));
assert_eq!(p.syllable.max_onset, 2);
}
#[test]
fn test_japanese_coda() {
let p = japanese_phonotactics();
assert_eq!(p.is_permitted("n", SyllablePosition::Coda), Some(true));
}
#[test]
fn test_phonotactics_serde_roundtrip() {
let p = english_phonotactics();
let json = serde_json::to_string(&p).unwrap();
let back: Phonotactics = serde_json::from_str(&json).unwrap();
assert_eq!(p, back);
}
#[test]
fn test_sanskrit_phonotactics_serde_roundtrip() {
let p = sanskrit_phonotactics();
let json = serde_json::to_string(&p).unwrap();
let back: Phonotactics = serde_json::from_str(&json).unwrap();
assert_eq!(p, back);
}
}