use alloc::{string::String, vec::Vec};
use serde::{Deserialize, Serialize};
use svara::phoneme::Phoneme;
use crate::ipa::phoneme_to_ipa;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct InvalidEntry {
pub word: String,
pub invalid_phonemes: Vec<Phoneme>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ValidationReport {
pub language: String,
pub invalid_entries: Vec<InvalidEntry>,
}
impl ValidationReport {
#[must_use]
pub fn is_valid(&self) -> bool {
self.invalid_entries.is_empty()
}
#[must_use]
pub fn invalid_count(&self) -> usize {
self.invalid_entries.len()
}
}
#[must_use]
pub fn validate_inventory(
dict: &super::PronunciationDict,
inventory: &varna::phoneme::PhonemeInventory,
) -> ValidationReport {
let mut invalid_entries = Vec::new();
for (word, entry) in dict.entries() {
if let Some(invalid) = check_entry(word, entry, inventory) {
invalid_entries.push(invalid);
}
}
for (word, entry) in dict.user_entries() {
if let Some(invalid) = check_entry(word, entry, inventory) {
invalid_entries.push(invalid);
}
}
invalid_entries.sort_by(|a, b| a.word.cmp(&b.word));
ValidationReport {
language: alloc::string::ToString::to_string(&inventory.language_code),
invalid_entries,
}
}
fn inventory_has_normalized(inventory: &varna::phoneme::PhonemeInventory, ipa: &str) -> bool {
if inventory.has(ipa) {
return true;
}
let with_long = alloc::format!("{ipa}ː");
if inventory.has(&with_long) {
return true;
}
if let Some(base) = ipa.strip_suffix('ː')
&& inventory.has(base)
{
return true;
}
false
}
fn check_entry(
word: &str,
entry: &super::entry::DictEntry,
inventory: &varna::phoneme::PhonemeInventory,
) -> Option<InvalidEntry> {
let mut invalid_phonemes = Vec::new();
for pronunciation in entry.all() {
for phoneme in pronunciation.phonemes() {
if let Some(ipa) = phoneme_to_ipa(phoneme)
&& !inventory_has_normalized(inventory, ipa)
&& !invalid_phonemes.contains(phoneme)
{
invalid_phonemes.push(*phoneme);
}
}
}
if invalid_phonemes.is_empty() {
None
} else {
Some(InvalidEntry {
word: alloc::string::ToString::to_string(word),
invalid_phonemes,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::PronunciationDict;
use crate::dictionary::entry::DictEntry;
#[test]
fn test_length_normalization() {
let mut dict = PronunciationDict::new();
dict.insert("caught", &[Phoneme::VowelOpenO]);
let inventory = varna::phoneme::english();
let report = validate_inventory(&dict, &inventory);
assert!(report.is_valid(), "length normalization failed: {report:?}");
}
#[test]
fn test_consonants_validate_english() {
let mut dict = PronunciationDict::new();
dict.insert(
"test",
&[
Phoneme::PlosiveP,
Phoneme::PlosiveB,
Phoneme::PlosiveT,
Phoneme::PlosiveD,
Phoneme::PlosiveK,
Phoneme::PlosiveG,
Phoneme::FricativeF,
Phoneme::FricativeV,
Phoneme::FricativeS,
Phoneme::FricativeZ,
Phoneme::FricativeSh,
Phoneme::FricativeZh,
Phoneme::FricativeTh,
Phoneme::FricativeDh,
Phoneme::FricativeH,
Phoneme::NasalM,
Phoneme::NasalN,
Phoneme::NasalNg,
Phoneme::LateralL,
Phoneme::ApproximantW,
Phoneme::ApproximantJ,
],
);
let inventory = varna::phoneme::english();
let report = validate_inventory(&dict, &inventory);
assert!(report.is_valid(), "consonant validation failed: {report:?}");
}
#[test]
fn test_invalid_phoneme_detected() {
let mut dict = PronunciationDict::new();
dict.insert("sing", &[Phoneme::NasalNg]);
let inventory = varna::phoneme::inventories::spanish();
let report = validate_inventory(&dict, &inventory);
assert!(!report.is_valid());
assert_eq!(report.invalid_count(), 1);
assert_eq!(report.invalid_entries[0].word, "sing");
assert!(
report.invalid_entries[0]
.invalid_phonemes
.contains(&Phoneme::NasalNg)
);
}
#[test]
fn test_empty_dict_is_valid() {
let dict = PronunciationDict::new();
let inventory = varna::phoneme::english();
let report = validate_inventory(&dict, &inventory);
assert!(report.is_valid());
assert_eq!(report.invalid_count(), 0);
}
#[test]
fn test_user_overlay_validated() {
let mut dict = PronunciationDict::new();
dict.insert_user("test", &[Phoneme::NasalNg]);
let inventory = varna::phoneme::inventories::spanish();
let report = validate_inventory(&dict, &inventory);
assert!(!report.is_valid());
}
#[test]
fn test_validation_report_serde_roundtrip() {
let report = ValidationReport {
language: alloc::string::ToString::to_string("es"),
invalid_entries: alloc::vec![InvalidEntry {
word: alloc::string::ToString::to_string("sing"),
invalid_phonemes: alloc::vec![Phoneme::NasalNg],
}],
};
let json = serde_json::to_string(&report).unwrap();
let roundtripped: ValidationReport = serde_json::from_str(&json).unwrap();
assert_eq!(report, roundtripped);
}
#[test]
fn test_validate_method_with_language() {
let mut dict = PronunciationDict::new().with_language("en");
dict.insert(
"pat",
&[Phoneme::PlosiveP, Phoneme::VowelAsh, Phoneme::PlosiveT],
);
let report = dict.validate().unwrap();
assert!(report.is_valid(), "validation failed: {report:?}");
}
#[test]
fn test_validate_method_without_language() {
let dict = PronunciationDict::new();
assert!(dict.validate().is_none());
}
#[test]
fn test_multiple_pronunciations_checked() {
use crate::dictionary::entry::Pronunciation;
let mut dict = PronunciationDict::new();
let entry = DictEntry::from_pronunciations(alloc::vec![
Pronunciation::new(alloc::vec![Phoneme::NasalNg]),
Pronunciation::new(alloc::vec![Phoneme::PlosiveP]),
])
.unwrap();
dict.insert_entry("test", entry);
let inventory = varna::phoneme::inventories::spanish();
let report = validate_inventory(&dict, &inventory);
assert_eq!(report.invalid_count(), 1);
assert_eq!(report.invalid_entries[0].invalid_phonemes.len(), 1);
}
}