use crate::ngram::NgramModel;
use liblevenshtein::dictionary::MutableMappedDictionary;
pub struct SentenceScorer<'a, D>
where
D: MutableMappedDictionary<Value = crate::ngram::NgramEntry>,
{
model: &'a NgramModel<D>,
}
impl<'a, D> SentenceScorer<'a, D>
where
D: MutableMappedDictionary<Value = crate::ngram::NgramEntry>,
{
pub fn new(model: &'a NgramModel<D>) -> Self {
Self { model }
}
pub fn log_prob(&self, tokens: &[&str]) -> f64 {
self.model.sentence_log_prob(tokens)
}
pub fn normalized_log_prob(&self, tokens: &[&str]) -> f64 {
if tokens.is_empty() {
return 0.0;
}
self.log_prob(tokens) / tokens.len() as f64
}
pub fn perplexity(&self, tokens: &[&str]) -> f64 {
let normalized = self.normalized_log_prob(tokens);
(-normalized).exp()
}
pub fn rank_sentences<'b>(&self, sentences: &[&'b [&'b str]]) -> Vec<(&'b [&'b str], f64)> {
let mut scored: Vec<_> = sentences.iter().map(|s| (*s, self.log_prob(s))).collect();
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored
}
pub fn best_sentence<'b>(&self, sentences: &[&'b [&'b str]]) -> Option<(&'b [&'b str], f64)> {
sentences
.iter()
.map(|s| (*s, self.log_prob(s)))
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
}
}