pub mod entry;
pub mod format;
use alloc::{collections::BTreeMap, string::String, vec::Vec};
use hashbrown::HashMap;
use serde::{Deserialize, Serialize};
use svara::phoneme::Phoneme;
use entry::{DictEntry, Pronunciation};
include!(concat!(env!("OUT_DIR"), "/generated_dict.rs"));
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PronunciationDict {
#[serde(deserialize_with = "deserialize_entries_compat")]
entries: HashMap<String, DictEntry>,
#[serde(
default,
skip_serializing_if = "BTreeMap::is_empty",
deserialize_with = "deserialize_user_entries_compat"
)]
user_entries: BTreeMap<String, DictEntry>,
}
impl PronunciationDict {
#[must_use]
pub fn new() -> Self {
Self {
entries: HashMap::new(),
user_entries: BTreeMap::new(),
}
}
#[must_use]
pub fn english() -> Self {
Self {
entries: generated_english_entries(),
user_entries: BTreeMap::new(),
}
}
#[must_use]
pub fn english_minimal() -> Self {
let mut dict = Self::new();
dict.insert("the", &[Phoneme::FricativeDh, Phoneme::VowelSchwa]);
dict.insert("a", &[Phoneme::VowelSchwa]);
dict.insert("an", &[Phoneme::VowelSchwa, Phoneme::NasalN]);
dict.insert("i", &[Phoneme::DiphthongAI]);
dict.insert("is", &[Phoneme::VowelNearI, Phoneme::FricativeZ]);
dict.insert(
"was",
&[
Phoneme::ApproximantW,
Phoneme::VowelOpenO,
Phoneme::FricativeZ,
],
);
dict.insert("are", &[Phoneme::VowelOpenA, Phoneme::ApproximantR]);
dict.insert("to", &[Phoneme::PlosiveT, Phoneme::VowelU]);
dict.insert("of", &[Phoneme::VowelOpenO, Phoneme::FricativeV]);
dict.insert("in", &[Phoneme::VowelNearI, Phoneme::NasalN]);
dict.insert("it", &[Phoneme::VowelNearI, Phoneme::PlosiveT]);
dict.insert(
"and",
&[Phoneme::VowelAsh, Phoneme::NasalN, Phoneme::PlosiveD],
);
dict.insert(
"that",
&[Phoneme::FricativeDh, Phoneme::VowelAsh, Phoneme::PlosiveT],
);
dict.insert(
"for",
&[
Phoneme::FricativeF,
Phoneme::VowelOpenO,
Phoneme::ApproximantR,
],
);
dict.insert("you", &[Phoneme::ApproximantJ, Phoneme::VowelU]);
dict.insert("he", &[Phoneme::FricativeH, Phoneme::VowelE]);
dict.insert("she", &[Phoneme::FricativeSh, Phoneme::VowelE]);
dict.insert("we", &[Phoneme::ApproximantW, Phoneme::VowelE]);
dict.insert("they", &[Phoneme::FricativeDh, Phoneme::DiphthongEI]);
dict.insert(
"this",
&[
Phoneme::FricativeDh,
Phoneme::VowelNearI,
Phoneme::FricativeS,
],
);
dict.insert(
"with",
&[
Phoneme::ApproximantW,
Phoneme::VowelNearI,
Phoneme::FricativeTh,
],
);
dict.insert(
"not",
&[Phoneme::NasalN, Phoneme::VowelOpenO, Phoneme::PlosiveT],
);
dict.insert(
"but",
&[Phoneme::PlosiveB, Phoneme::VowelCupV, Phoneme::PlosiveT],
);
dict.insert(
"have",
&[Phoneme::FricativeH, Phoneme::VowelAsh, Phoneme::FricativeV],
);
dict.insert(
"one",
&[Phoneme::ApproximantW, Phoneme::VowelCupV, Phoneme::NasalN],
);
dict.insert(
"hello",
&[
Phoneme::FricativeH,
Phoneme::VowelOpenE,
Phoneme::LateralL,
Phoneme::DiphthongOU,
],
);
dict.insert(
"world",
&[
Phoneme::ApproximantW,
Phoneme::VowelBird,
Phoneme::LateralL,
Phoneme::PlosiveD,
],
);
dict.insert(
"yes",
&[
Phoneme::ApproximantJ,
Phoneme::VowelOpenE,
Phoneme::FricativeS,
],
);
dict.insert("no", &[Phoneme::NasalN, Phoneme::DiphthongOU]);
dict
}
#[must_use]
pub fn from_entries(entries: HashMap<String, DictEntry>) -> Self {
Self {
entries,
user_entries: BTreeMap::new(),
}
}
#[must_use]
pub fn from_simple_entries(entries: HashMap<String, Vec<Phoneme>>) -> Self {
let entries = entries
.into_iter()
.map(|(word, phonemes)| (word, DictEntry::from_phonemes(&phonemes)))
.collect();
Self {
entries,
user_entries: BTreeMap::new(),
}
}
pub fn insert(&mut self, word: &str, phonemes: &[Phoneme]) {
self.entries.insert(
alloc::string::ToString::to_string(&word.to_lowercase()),
DictEntry::from_phonemes(phonemes),
);
}
pub fn insert_entry(&mut self, word: &str, entry: DictEntry) {
self.entries.insert(
alloc::string::ToString::to_string(&word.to_lowercase()),
entry,
);
}
pub fn insert_user(&mut self, word: &str, phonemes: &[Phoneme]) {
self.user_entries.insert(
alloc::string::ToString::to_string(&word.to_lowercase()),
DictEntry::from_phonemes(phonemes),
);
}
pub fn insert_user_entry(&mut self, word: &str, entry: DictEntry) {
self.user_entries.insert(
alloc::string::ToString::to_string(&word.to_lowercase()),
entry,
);
}
pub fn remove_user(&mut self, word: &str) -> bool {
self.user_entries
.remove(&alloc::string::ToString::to_string(&word.to_lowercase()))
.is_some()
}
#[must_use]
pub fn user_entries(&self) -> &BTreeMap<String, DictEntry> {
&self.user_entries
}
#[must_use]
pub fn user_len(&self) -> usize {
self.user_entries.len()
}
#[must_use]
pub fn lookup(&self, word: &str) -> Option<&[Phoneme]> {
self.lookup_entry(word)
.map(|entry| entry.primary_phonemes())
}
#[must_use]
pub fn lookup_entry(&self, word: &str) -> Option<&DictEntry> {
let key = alloc::string::ToString::to_string(&word.to_lowercase());
self.user_entries
.get(&key)
.or_else(|| self.entries.get(&key))
}
#[must_use]
pub fn lookup_all(&self, word: &str) -> Option<&[Pronunciation]> {
self.lookup_entry(word).map(|entry| entry.all())
}
#[must_use]
pub fn len(&self) -> usize {
self.entries.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
#[must_use]
pub fn entries(&self) -> &HashMap<String, DictEntry> {
&self.entries
}
pub fn merge(&mut self, other: &PronunciationDict) {
for (word, entry) in other.entries() {
self.entries.insert(word.clone(), entry.clone());
}
for (word, entry) in other.user_entries() {
self.user_entries.insert(word.clone(), entry.clone());
}
}
pub fn merge_conservative(&mut self, other: &PronunciationDict) {
for (word, entry) in other.entries() {
if !self.entries.contains_key(word) {
self.entries.insert(word.clone(), entry.clone());
}
}
for (word, entry) in other.user_entries() {
if !self.user_entries.contains_key(word) {
self.user_entries.insert(word.clone(), entry.clone());
}
}
}
}
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct DictDiff {
pub added: Vec<String>,
pub removed: Vec<String>,
pub changed: Vec<String>,
}
impl DictDiff {
#[must_use]
pub fn is_empty(&self) -> bool {
self.added.is_empty() && self.removed.is_empty() && self.changed.is_empty()
}
#[must_use]
pub fn len(&self) -> usize {
self.added.len() + self.removed.len() + self.changed.len()
}
}
#[must_use]
pub fn diff(left: &PronunciationDict, right: &PronunciationDict) -> DictDiff {
let mut all_words = alloc::collections::BTreeSet::new();
for word in left.entries().keys() {
all_words.insert(word.as_str());
}
for word in left.user_entries().keys() {
all_words.insert(word.as_str());
}
for word in right.entries().keys() {
all_words.insert(word.as_str());
}
for word in right.user_entries().keys() {
all_words.insert(word.as_str());
}
let mut result = DictDiff::default();
for word in all_words {
let l = left.lookup_entry(word);
let r = right.lookup_entry(word);
match (l, r) {
(None, Some(_)) => result.added.push(alloc::string::ToString::to_string(word)),
(Some(_), None) => result
.removed
.push(alloc::string::ToString::to_string(word)),
(Some(le), Some(re)) if le != re => {
result
.changed
.push(alloc::string::ToString::to_string(word));
}
_ => {}
}
}
result
}
impl Default for PronunciationDict {
fn default() -> Self {
Self::new()
}
}
#[derive(Deserialize)]
#[serde(untagged)]
enum EntryCompat {
New(DictEntry),
Old(Vec<Phoneme>),
}
impl EntryCompat {
fn into_entry(self) -> DictEntry {
match self {
Self::New(entry) => entry,
Self::Old(phonemes) => DictEntry::from_phonemes(&phonemes),
}
}
}
fn deserialize_entries_compat<'de, D>(
deserializer: D,
) -> core::result::Result<HashMap<String, DictEntry>, D::Error>
where
D: serde::Deserializer<'de>,
{
let raw: BTreeMap<String, EntryCompat> = BTreeMap::deserialize(deserializer)?;
Ok(raw.into_iter().map(|(k, v)| (k, v.into_entry())).collect())
}
fn deserialize_user_entries_compat<'de, D>(
deserializer: D,
) -> core::result::Result<BTreeMap<String, DictEntry>, D::Error>
where
D: serde::Deserializer<'de>,
{
let raw: BTreeMap<String, EntryCompat> = BTreeMap::deserialize(deserializer)?;
Ok(raw.into_iter().map(|(k, v)| (k, v.into_entry())).collect())
}