use crate::conlang::phonology::validator;
use crate::conlang::types::{Phonology, SyllableTemplate, TemplateRole};
const RETRY_BUDGET: u64 = 64;
pub fn generate_word(phon: &Phonology, role: TemplateRole, seed: u64) -> Option<String> {
let templates = phon.templates_for(role);
if templates.is_empty() {
return None;
}
for step in 0..RETRY_BUDGET {
let mut rng = SplitMix::new(seed.wrapping_add(step.wrapping_mul(0x1000_0001)));
if let Some(seq) = realize(phon, templates, &mut rng) {
if validator::is_legal(phon, &seq) {
let surface = crate::conlang::phonology::allophony_eval::surface_form(phon, &seq);
return Some(render(phon, &surface));
}
}
}
None
}
pub fn generate_words(phon: &Phonology, role: TemplateRole, count: usize) -> Vec<String> {
(0..count as u64)
.filter_map(|i| generate_word(phon, role, i))
.collect()
}
fn realize(phon: &Phonology, templates: &[SyllableTemplate], rng: &mut SplitMix) -> Option<Vec<String>> {
let tmpl = weighted_pick(templates, rng)?;
let mut out = Vec::new();
for atom in &tmpl.pattern {
if atom.is_optional() && !rng.next_bool() {
continue;
}
let members = phon.class_members(atom.class_name());
if members.is_empty() {
if atom.is_optional() {
continue;
}
return None;
}
let idx = (rng.next_u64() as usize) % members.len();
out.push(members[idx].clone());
}
if out.is_empty() {
None
} else {
Some(out)
}
}
fn weighted_pick<'a>(ts: &'a [SyllableTemplate], rng: &mut SplitMix) -> Option<&'a SyllableTemplate> {
let total: f32 = ts.iter().map(|t| t.weight.max(0.0)).sum();
if total <= 0.0 {
return ts.first();
}
let mut r = rng.next_f32() * total;
for t in ts {
let w = t.weight.max(0.0);
if r < w {
return Some(t);
}
r -= w;
}
ts.last()
}
fn render(phon: &Phonology, seq: &[String]) -> String {
seq.iter()
.map(|ipa| phon.phoneme(ipa).map(|p| p.grapheme()).unwrap_or(ipa.as_str()))
.collect()
}
struct SplitMix {
state: u64,
}
impl SplitMix {
fn new(seed: u64) -> Self {
Self { state: seed ^ 0x9E37_79B9_7F4A_7C15 }
}
fn next_u64(&mut self) -> u64 {
self.state = self.state.wrapping_add(0x9E37_79B9_7F4A_7C15);
let mut z = self.state;
z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
z ^ (z >> 31)
}
fn next_f32(&mut self) -> f32 {
(self.next_u64() >> 40) as f32 / (1u64 << 24) as f32
}
fn next_bool(&mut self) -> bool {
self.next_u64() & 1 == 1
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::conlang::types::{Phoneme, PhonemeKind, PhonotacticConstraint};
fn lang() -> Phonology {
let body = r#"{
phonemes: [
{ ipa: "p", kind: "consonant" }, { ipa: "t", kind: "consonant" },
{ ipa: "k", kind: "consonant" }, { ipa: "s", kind: "consonant" },
{ ipa: "r", kind: "consonant" }, { ipa: "l", kind: "consonant" },
{ ipa: "a", kind: "vowel" }, { ipa: "i", kind: "vowel" }, { ipa: "u", kind: "vowel" }
],
classes: { C: ["p","t","k","s","r","l"], V: ["a","i","u"] },
templates: { root: [ { pattern: "C V (C)", weight: 1.0 }, { pattern: "C V", weight: 2.0 } ] },
constraints: [ { kind: "max_cluster_size", value: 1 }, { kind: "no_geminate" } ]
}"#;
Phonology::from_hjson(body).unwrap().unwrap()
}
#[test]
fn parses_the_phonology_block() {
let p = lang();
assert_eq!(p.phonemes.len(), 9);
assert_eq!(p.class_members("C").len(), 6);
assert_eq!(p.templates_for(TemplateRole::Root).len(), 2);
assert_eq!(p.constraints.len(), 2);
}
#[test]
fn generation_is_deterministic_per_seed() {
let p = lang();
let a = generate_word(&p, TemplateRole::Root, 7);
let b = generate_word(&p, TemplateRole::Root, 7);
assert_eq!(a, b);
assert!(a.is_some());
}
#[test]
fn every_generated_word_satisfies_the_constraints() {
let p = lang();
let words = generate_words(&p, TemplateRole::Root, 200);
assert!(words.len() > 150, "most seeds should yield a word, got {}", words.len());
for w in &words {
let seq: Vec<String> = w.chars().map(|c| c.to_string()).collect();
assert!(
validator::is_legal(&p, &seq),
"generated `{w}` violates its own phonotactics",
);
assert!(!w.is_empty() && w.chars().count() <= 3, "unexpected shape: {w}");
}
}
#[test]
fn unknown_role_yields_nothing() {
let p = lang();
assert!(generate_word(&p, TemplateRole::Suffix, 0).is_none());
assert!(generate_words(&p, TemplateRole::Suffix, 10).is_empty());
}
#[test]
fn romanization_is_used_when_present() {
let p = Phonology {
phonemes: vec![
Phoneme { ipa: "ʃ".into(), romanize: Some("sh".into()), kind: PhonemeKind::Consonant, sonority: None },
Phoneme { ipa: "a".into(), romanize: None, kind: PhonemeKind::Vowel, sonority: None },
],
classes: [("C".to_string(), vec!["ʃ".to_string()]), ("V".to_string(), vec!["a".to_string()])]
.into_iter()
.collect(),
templates: [("root".to_string(), vec![SyllableTemplate {
pattern: vec![
crate::conlang::types::template::TemplateAtom::Class("C".into()),
crate::conlang::types::template::TemplateAtom::Class("V".into()),
],
weight: 1.0,
}])]
.into_iter()
.collect(),
constraints: vec![PhonotacticConstraint::NoGeminate],
allophony: Vec::new(),
stress: None,
romanizations: Vec::new(),
default_romanization: None,
tone: None,
max_word_syllables: 4,
};
let w = generate_word(&p, TemplateRole::Root, 1).unwrap();
assert_eq!(w, "sha"); }
}