use alloc::{collections::BTreeSet, string::String, vec::Vec};
use serde::{Deserialize, Serialize};
use super::PronunciationDict;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[non_exhaustive]
pub struct CoverageReport {
pub total_tokens: usize,
pub covered_tokens: usize,
pub uncovered_words: Vec<String>,
}
impl CoverageReport {
#[must_use]
pub fn coverage_pct(&self) -> f32 {
if self.total_tokens == 0 {
return 0.0;
}
(self.covered_tokens as f32 / self.total_tokens as f32) * 100.0
}
#[must_use]
pub fn uncovered_count(&self) -> usize {
self.uncovered_words.len()
}
#[must_use]
pub fn is_fully_covered(&self) -> bool {
self.uncovered_words.is_empty()
}
}
fn normalize_token(token: &str) -> String {
token
.trim_matches(|c: char| c.is_ascii_punctuation())
.to_lowercase()
}
impl PronunciationDict {
#[must_use]
pub fn coverage(&self, text: &str) -> CoverageReport {
let mut total_tokens = 0_usize;
let mut covered_tokens = 0_usize;
let mut uncovered_set = BTreeSet::new();
for token in text.split_whitespace() {
let normalized = normalize_token(token);
if normalized.is_empty() {
continue;
}
total_tokens += 1;
if self.lookup_entry(&normalized).is_some() {
covered_tokens += 1;
} else {
uncovered_set.insert(normalized);
}
}
CoverageReport {
total_tokens,
covered_tokens,
uncovered_words: uncovered_set.into_iter().collect(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_coverage_full() {
let dict = PronunciationDict::english_minimal();
let report = dict.coverage("the hello world");
assert_eq!(report.total_tokens, 3);
assert_eq!(report.covered_tokens, 3);
assert!(report.is_fully_covered());
assert_eq!(report.coverage_pct(), 100.0);
}
#[test]
fn test_coverage_partial() {
let dict = PronunciationDict::english_minimal();
let report = dict.coverage("the hello xyzzy");
assert_eq!(report.total_tokens, 3);
assert_eq!(report.covered_tokens, 2);
assert!(!report.is_fully_covered());
assert_eq!(report.uncovered_words, alloc::vec!["xyzzy".to_string()]);
}
#[test]
fn test_coverage_empty() {
let dict = PronunciationDict::english_minimal();
let report = dict.coverage("");
assert_eq!(report.total_tokens, 0);
assert_eq!(report.coverage_pct(), 0.0);
assert!(report.is_fully_covered());
}
#[test]
fn test_coverage_strips_punctuation() {
let dict = PronunciationDict::english_minimal();
let report = dict.coverage("hello, world! the.");
assert_eq!(report.total_tokens, 3);
assert_eq!(report.covered_tokens, 3);
}
#[test]
fn test_coverage_case_insensitive() {
let dict = PronunciationDict::english_minimal();
let report = dict.coverage("Hello THE World");
assert_eq!(report.covered_tokens, 3);
}
#[test]
fn test_coverage_unique_uncovered() {
let dict = PronunciationDict::english_minimal();
let report = dict.coverage("xyzzy xyzzy xyzzy");
assert_eq!(report.total_tokens, 3);
assert_eq!(report.covered_tokens, 0);
assert_eq!(report.uncovered_count(), 1);
}
#[test]
fn test_coverage_serde_roundtrip() {
let report = CoverageReport {
total_tokens: 10,
covered_tokens: 8,
uncovered_words: alloc::vec!["foo".to_string(), "bar".to_string()],
};
let json = serde_json::to_string(&report).unwrap();
let rt: CoverageReport = serde_json::from_str(&json).unwrap();
assert_eq!(report, rt);
}
#[test]
fn test_normalize_token() {
assert_eq!(normalize_token("Hello,"), "hello");
assert_eq!(normalize_token("\"world\""), "world");
assert_eq!(normalize_token("(test)"), "test");
assert_eq!(normalize_token("..."), "");
}
}