use crate::rouge::{tokenize, TokenSeq};
#[derive(Debug, Clone)]
pub struct MeteorScore {
pub score: f32,
pub precision: f32,
pub recall: f32,
pub fragmentation: f32,
}
#[derive(Debug, Clone)]
pub struct MeteorConfig {
pub alpha: f32,
pub gamma: f32,
pub beta: f32,
}
impl Default for MeteorConfig {
fn default() -> Self {
Self {
alpha: 0.9,
gamma: 0.5,
beta: 3.0,
}
}
}
pub fn meteor(candidate: &str, reference: &str, cfg: &MeteorConfig) -> MeteorScore {
let cand = tokenize(candidate);
let refs = tokenize(reference);
meteor_tokens(&cand, &refs, cfg)
}
pub fn meteor_tokens(
candidate: &TokenSeq,
reference: &TokenSeq,
cfg: &MeteorConfig,
) -> MeteorScore {
if candidate.is_empty() && reference.is_empty() {
return MeteorScore {
score: 1.0,
precision: 1.0,
recall: 1.0,
fragmentation: 0.0,
};
}
if candidate.is_empty() || reference.is_empty() {
return MeteorScore {
score: 0.0,
precision: 0.0,
recall: 0.0,
fragmentation: 0.0,
};
}
let alignment = align_tokens(candidate, reference);
let matches = alignment.len();
if matches == 0 {
return MeteorScore {
score: 0.0,
precision: 0.0,
recall: 0.0,
fragmentation: 0.0,
};
}
let p = matches as f32 / candidate.len() as f32;
let r = matches as f32 / reference.len() as f32;
let denom = cfg.alpha * p + (1.0 - cfg.alpha) * r;
let f_mean = if denom > 0.0 { (p * r) / denom } else { 0.0 };
let chunks = count_chunks(&alignment);
let frag = (chunks as f32) / (matches as f32);
let pen = cfg.gamma * frag.powf(cfg.beta);
let score = ((1.0 - pen) * f_mean).clamp(0.0, 1.0);
MeteorScore {
score,
precision: p,
recall: r,
fragmentation: pen,
}
}
pub fn meteor_multi(candidate: &str, references: &[&str], cfg: &MeteorConfig) -> MeteorScore {
if references.is_empty() {
return MeteorScore {
score: 0.0,
precision: 0.0,
recall: 0.0,
fragmentation: 0.0,
};
}
let mut best: Option<MeteorScore> = None;
for r in references {
let s = meteor(candidate, r, cfg);
best = match best.take() {
None => Some(s),
Some(b) => {
if s.score > b.score {
Some(s)
} else {
Some(b)
}
}
};
}
best.unwrap_or(MeteorScore {
score: 0.0,
precision: 0.0,
recall: 0.0,
fragmentation: 0.0,
})
}
pub fn align_tokens(candidate: &TokenSeq, reference: &TokenSeq) -> Vec<(usize, usize)> {
let mut used = vec![false; reference.len()];
let mut out: Vec<(usize, usize)> = Vec::new();
for (ci, ctok) in candidate.iter().enumerate() {
for (ri, rtok) in reference.iter().enumerate() {
if !used[ri] && ctok == rtok {
used[ri] = true;
out.push((ci, ri));
break;
}
}
}
out
}
fn count_chunks(alignment: &[(usize, usize)]) -> usize {
if alignment.is_empty() {
return 0;
}
let mut sorted = alignment.to_vec();
sorted.sort_by_key(|&(ci, _)| ci);
let mut chunks = 1usize;
for w in sorted.windows(2) {
let (pc, pr) = w[0];
let (nc, nr) = w[1];
if !(nc == pc + 1 && nr == pr + 1) {
chunks += 1;
}
}
chunks
}