use serde::{Deserialize, Serialize};
use svara::phoneme::Phoneme;
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct HeteronymRule {
pub word: &'static str,
pub default_variant: usize,
pub context_variant: usize,
pub triggers: &'static [&'static str],
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[non_exhaustive]
pub struct HeteronymRuleOwned {
pub word: alloc::string::String,
pub default_variant: usize,
pub context_variant: usize,
pub triggers: alloc::vec::Vec<alloc::string::String>,
}
impl From<&HeteronymRule> for HeteronymRuleOwned {
fn from(rule: &HeteronymRule) -> Self {
Self {
word: alloc::string::String::from(rule.word),
default_variant: rule.default_variant,
context_variant: rule.context_variant,
triggers: rule
.triggers
.iter()
.map(|s| alloc::string::String::from(*s))
.collect(),
}
}
}
#[must_use]
pub fn lookup(word: &str) -> Option<&'static HeteronymRule> {
HETERONYMS.iter().find(|r| r.word == word)
}
#[must_use]
pub fn select_variant(rule: &HeteronymRule, preceding_words: &[&str]) -> usize {
for prev in preceding_words.iter().rev().take(3) {
let lower = prev.to_lowercase();
if rule.triggers.contains(&lower.as_str()) {
return rule.context_variant;
}
}
rule.default_variant
}
#[must_use]
pub fn select_phonemes<'a>(
rule: &HeteronymRule,
preceding_words: &[&str],
pronunciations: &'a [shabdakosh::Pronunciation],
) -> &'a [Phoneme] {
let idx = select_variant(rule, preceding_words);
if idx < pronunciations.len() {
pronunciations[idx].phonemes()
} else {
pronunciations[0].phonemes()
}
}
static HETERONYMS: &[HeteronymRule] = &[
HeteronymRule {
word: "read",
default_variant: 0,
context_variant: 1,
triggers: &[
"to", "will", "can", "could", "should", "would", "may", "might", "must", "please",
"let's", "shall",
],
},
HeteronymRule {
word: "lead",
default_variant: 0,
context_variant: 1,
triggers: &[
"to", "will", "can", "could", "should", "would", "may", "might", "must", "shall",
],
},
HeteronymRule {
word: "live",
default_variant: 0,
context_variant: 1,
triggers: &["a", "the", "is", "was", "go", "went", "broadcast", "on"],
},
HeteronymRule {
word: "wind",
default_variant: 0,
context_variant: 1,
triggers: &["to", "will", "can", "could", "should", "would", "must"],
},
HeteronymRule {
word: "tear",
default_variant: 0,
context_variant: 1,
triggers: &["to", "will", "can", "could", "don't", "didn't"],
},
HeteronymRule {
word: "bow",
default_variant: 0,
context_variant: 1,
triggers: &["to", "will", "take", "took", "a"],
},
HeteronymRule {
word: "close",
default_variant: 0,
context_variant: 1,
triggers: &["a", "the", "too", "very", "so", "how", "is", "was", "get"],
},
HeteronymRule {
word: "record",
default_variant: 0,
context_variant: 1,
triggers: &[
"to", "will", "can", "could", "should", "would", "must", "please",
],
},
HeteronymRule {
word: "present",
default_variant: 0,
context_variant: 1,
triggers: &[
"to", "will", "can", "could", "should", "would", "must", "shall",
],
},
HeteronymRule {
word: "refuse",
default_variant: 0,
context_variant: 1,
triggers: &["to", "will", "can", "could", "should", "would", "must", "i"],
},
HeteronymRule {
word: "produce",
default_variant: 0,
context_variant: 1,
triggers: &["to", "will", "can", "could", "should", "would", "must"],
},
HeteronymRule {
word: "object",
default_variant: 0,
context_variant: 1,
triggers: &[
"to", "will", "can", "could", "should", "would", "must", "i", "we", "they",
],
},
HeteronymRule {
word: "project",
default_variant: 0,
context_variant: 1,
triggers: &["to", "will", "can", "could", "should", "would", "must"],
},
HeteronymRule {
word: "permit",
default_variant: 0,
context_variant: 1,
triggers: &[
"to", "will", "can", "could", "should", "would", "must", "not",
],
},
HeteronymRule {
word: "desert",
default_variant: 0,
context_variant: 1,
triggers: &[
"to", "will", "can", "could", "should", "would", "must", "don't",
],
},
HeteronymRule {
word: "minute",
default_variant: 0,
context_variant: 1,
triggers: &["a", "the", "very", "most", "so", "how", "extremely"],
},
HeteronymRule {
word: "bass",
default_variant: 0,
context_variant: 1,
triggers: &[
"a",
"the",
"caught",
"fishing",
"lake",
"sea",
"striped",
"largemouth",
],
},
HeteronymRule {
word: "wound",
default_variant: 0,
context_variant: 1,
triggers: &["he", "she", "they", "i", "we", "had", "was", "were"],
},
HeteronymRule {
word: "dove",
default_variant: 0,
context_variant: 1,
triggers: &["he", "she", "they", "i", "we", "then", "and"],
},
HeteronymRule {
word: "sow",
default_variant: 0,
context_variant: 1,
triggers: &["a", "the", "old", "fat"],
},
];
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lookup_known_heteronym() {
assert!(lookup("read").is_some());
assert!(lookup("lead").is_some());
assert!(lookup("live").is_some());
}
#[test]
fn test_lookup_unknown_word() {
assert!(lookup("hello").is_none());
assert!(lookup("world").is_none());
}
#[test]
fn test_select_variant_default() {
let rule = lookup("read").unwrap();
assert_eq!(select_variant(rule, &["the", "book"]), 0);
}
#[test]
fn test_select_variant_triggered() {
let rule = lookup("read").unwrap();
assert_eq!(select_variant(rule, &["want", "to"]), 1);
}
#[test]
fn test_select_variant_trigger_in_window() {
let rule = lookup("read").unwrap();
assert_eq!(select_variant(rule, &["i", "will"]), 1);
}
#[test]
fn test_serde_roundtrip() {
let rule = lookup("read").unwrap();
let owned = HeteronymRuleOwned::from(rule);
let json = serde_json::to_string(&owned).unwrap();
let roundtripped: HeteronymRuleOwned = serde_json::from_str(&json).unwrap();
assert_eq!(owned, roundtripped);
}
#[test]
fn test_all_heteronyms_have_triggers() {
for rule in HETERONYMS {
assert!(
!rule.triggers.is_empty(),
"heteronym {:?} has no triggers",
rule.word
);
}
}
}