pub const DIPHTHONGS: &str = "AIOQWYʤʧ";
pub const CONSONANTS: &str = "bdfhjklmnpstvwzðŋɡɹɾʃʒʤʧθ";
pub const US_TAUS: &str = "AIOWYiuæɑəɛɪɹʊʌ";
pub const US_VOCAB: &str = "AIOWYbdfhijklmnpstuvwzæðŋɑɔəɛɜɡɪɹɾʃʊʌʒʤʧˈˌθᵊᵻʔ";
pub const GB_VOCAB: &str = "AIQWYabdfhijklmnpstuvwzðŋɑɒɔəɛɜɡɪɹʃʊʌʒʤʧˈˌːθᵊ";
pub const STRESSES: &str = "ˌˈ";
pub const PRIMARY_STRESS: char = 'ˈ';
pub const SECONDARY_STRESS: char = 'ˌ';
pub const VOWELS: &str = "AIOQWYaiuæɑɒɔəɛɜɪʊʌᵻ";
pub const SUBTOKEN_JUNKS: &str = "',-._''/";
pub const PUNCTS: &str = ";:,.!?—…\u{201C}\u{201D}\u{201E}";
pub const NON_QUOTE_PUNCTS: &str = ";:,.!?—…";
pub const PUNCT_TAGS: &[&str] = &[
".", ",", "-LRB-", "-RRB-", "``", "\"\"", "''", ":", "$", "#", "NFP",
];
pub fn punct_tag_phoneme(tag: &str) -> Option<&'static str> {
match tag {
"-LRB-" => Some("("),
"-RRB-" => Some(")"),
"``" => Some("\u{201C}"), "\"\"" => Some("\u{201D}"), "''" => Some("\u{201D}"), _ => None,
}
}
pub fn currency_names(symbol: char) -> Option<(&'static str, &'static str)> {
match symbol {
'$' => Some(("dollar", "cent")),
'£' => Some(("pound", "pence")),
'€' => Some(("euro", "cent")),
_ => None,
}
}
pub const ORDINALS: &[&str] = &["st", "nd", "rd", "th"];
pub fn add_symbol_name(c: char) -> Option<&'static str> {
match c {
'.' => Some("dot"),
'/' => Some("slash"),
_ => None,
}
}
pub fn symbol_name(c: char) -> Option<&'static str> {
match c {
'%' => Some("percent"),
'&' => Some("and"),
'+' => Some("plus"),
'@' => Some("at"),
_ => None,
}
}
pub fn is_lexicon_ord(c: char) -> bool {
matches!(c, '\'' | '-' | 'A'..='Z' | 'a'..='z')
}
pub fn stress_weight(ps: &str) -> usize {
if ps.is_empty() {
return 0;
}
ps.chars()
.map(|c| if DIPHTHONGS.contains(c) { 2 } else { 1 })
.sum()
}
fn contains_vowel(ps: &str) -> bool {
ps.chars().any(|c| VOWELS.contains(c))
}
fn restress(ps: &str) -> String {
let chars: Vec<char> = ps.chars().collect();
let mut ips: Vec<(f64, char)> = chars
.iter()
.enumerate()
.map(|(i, &c)| (i as f64, c))
.collect();
let stress_moves: Vec<(usize, usize)> = chars
.iter()
.enumerate()
.filter(|(_, c)| STRESSES.contains(**c))
.filter_map(|(i, _)| {
chars[i..]
.iter()
.enumerate()
.find(|(_, c)| VOWELS.contains(**c))
.map(|(offset, _)| (i, i + offset))
})
.collect();
for (i, vowel_idx) in stress_moves {
ips[i].0 = vowel_idx as f64 - 0.5;
}
ips.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
ips.iter().map(|(_, c)| c).collect()
}
pub fn apply_stress(ps: &str, stress: Option<f32>) -> String {
let stress = match stress {
Some(s) => s,
None => return ps.to_string(),
};
let has_primary = ps.contains(PRIMARY_STRESS);
let has_secondary = ps.contains(SECONDARY_STRESS);
let has_any_stress = has_primary || has_secondary;
if stress < -1.0 {
ps.replace([PRIMARY_STRESS, SECONDARY_STRESS], "")
} else if (stress - (-1.0)).abs() < f32::EPSILON
|| ((stress == 0.0 || stress == -0.5) && has_primary)
{
ps.replace(SECONDARY_STRESS, "")
.replace(PRIMARY_STRESS, &SECONDARY_STRESS.to_string())
} else if [0.0_f32, 0.5, 1.0]
.iter()
.any(|v| (stress - v).abs() < f32::EPSILON)
&& !has_any_stress
{
if !contains_vowel(ps) {
return ps.to_string();
}
let mut s = String::new();
s.push(SECONDARY_STRESS);
s.push_str(ps);
restress(&s)
} else if stress >= 1.0 && !has_primary && has_secondary {
ps.replace(SECONDARY_STRESS, &PRIMARY_STRESS.to_string())
} else if stress > 1.0 && !has_any_stress {
if !contains_vowel(ps) {
return ps.to_string();
}
let mut s = String::new();
s.push(PRIMARY_STRESS);
s.push_str(ps);
restress(&s)
} else {
ps.to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn stress_weight_empty() {
assert_eq!(stress_weight(""), 0);
}
#[test]
fn stress_weight_plain() {
assert_eq!(stress_weight("bk"), 2);
}
#[test]
fn stress_weight_with_diphthongs() {
assert_eq!(stress_weight("Ab"), 3);
}
#[test]
fn stress_none_returns_unchanged() {
assert_eq!(apply_stress("hˈɛloʊ", None), "hˈɛloʊ");
}
#[test]
fn stress_strip_all() {
assert_eq!(apply_stress("hˈɛˌloʊ", Some(-2.0)), "hɛloʊ");
}
#[test]
fn stress_demote_primary() {
let result = apply_stress("hˈɛloʊ", Some(-1.0));
assert!(
result.contains(SECONDARY_STRESS),
"should contain secondary stress"
);
assert!(
!result.contains(PRIMARY_STRESS),
"should not contain primary stress"
);
assert_eq!(result, "hˌɛloʊ");
}
#[test]
fn stress_zero_with_primary_demotes() {
let result = apply_stress("hˈɛloʊ", Some(0.0));
assert_eq!(result, "hˌɛloʊ");
}
#[test]
fn stress_zero_no_stress_adds_secondary() {
let result = apply_stress("hɛloʊ", Some(0.0));
assert!(result.contains(SECONDARY_STRESS));
let stress_pos = result.find(SECONDARY_STRESS).unwrap();
let vowel_pos = result
.char_indices()
.find(|(_, c)| VOWELS.contains(*c))
.map(|(i, _)| i)
.unwrap();
assert!(
stress_pos < vowel_pos,
"secondary stress ({}) should precede first vowel ({})",
stress_pos,
vowel_pos
);
}
#[test]
fn stress_one_promotes_secondary() {
let result = apply_stress("hˌɛloʊ", Some(1.0));
assert_eq!(result, "hˈɛloʊ");
}
#[test]
fn stress_high_no_stress_adds_primary() {
let result = apply_stress("hɛloʊ", Some(2.0));
assert!(result.contains(PRIMARY_STRESS));
assert!(!result.contains(SECONDARY_STRESS));
}
#[test]
fn stress_no_vowels_unchanged() {
assert_eq!(apply_stress("bkd", Some(0.0)), "bkd");
assert_eq!(apply_stress("bkd", Some(2.0)), "bkd");
}
#[test]
fn stress_fallthrough_unchanged() {
let input = "hˈɛloʊ";
assert_eq!(apply_stress(input, Some(1.5)), input);
}
#[test]
fn restress_basic() {
let result = restress("ˌhɛloʊ");
let chars: Vec<char> = result.chars().collect();
let stress_idx = chars.iter().position(|&c| c == SECONDARY_STRESS).unwrap();
let vowel_idx = chars.iter().position(|&c| VOWELS.contains(c)).unwrap();
assert_eq!(
stress_idx + 1,
vowel_idx,
"stress should be immediately before first vowel"
);
}
}