use crate::{Entity, HierarchicalConfidence};
pub struct SpanUncertainty {
pub overall: f64,
pub boundary: f64,
pub type_score: f64,
}
pub fn annotation_priority(entities: &[Entity]) -> f64 {
entities
.iter()
.map(|e| 1.0 - e.confidence.value())
.fold(0.0_f64, f64::max)
}
pub fn rank_for_annotation<S: AsRef<str>>(texts: &[(S, Vec<Entity>)]) -> Vec<(usize, f64)> {
let mut ranked: Vec<(usize, f64)> = texts
.iter()
.enumerate()
.map(|(i, (_, entities))| (i, annotation_priority(entities)))
.collect();
ranked.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
ranked
}
pub fn span_uncertainties(entities: &[Entity]) -> Vec<(&Entity, SpanUncertainty)> {
entities
.iter()
.map(|e| {
let (boundary, type_score) = decompose_uncertainty(e.hierarchical_confidence.as_ref());
(
e,
SpanUncertainty {
overall: 1.0 - e.confidence.value(),
boundary,
type_score,
},
)
})
.collect()
}
fn decompose_uncertainty(hc: Option<&HierarchicalConfidence>) -> (f64, f64) {
match hc {
Some(h) => (1.0 - h.boundary.value(), 1.0 - h.type_score.value()),
None => (0.5, 0.5),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{Confidence, EntityType, HierarchicalConfidence};
#[test]
fn annotation_priority_empty() {
assert_eq!(annotation_priority(&[]), 0.0);
}
#[test]
fn annotation_priority_single_entity() {
let e = Entity::new("Alice", EntityType::Person, 0, 5, 0.8);
let priority = annotation_priority(&[e]);
assert!((priority - 0.2).abs() < 1e-10, "priority = {priority}");
}
#[test]
fn annotation_priority_returns_max_uncertainty() {
let entities = vec![
Entity::new("Alice", EntityType::Person, 0, 5, 0.9),
Entity::new("IBM", EntityType::Organization, 10, 13, 0.6),
];
let priority = annotation_priority(&entities);
assert!((priority - 0.4).abs() < 1e-10, "priority = {priority}");
}
#[test]
fn annotation_priority_all_certain() {
let entities = vec![
Entity::new("Alice", EntityType::Person, 0, 5, 1.0),
Entity::new("Bob", EntityType::Person, 10, 13, 1.0),
];
assert_eq!(annotation_priority(&entities), 0.0);
}
#[test]
fn rank_for_annotation_orders_highest_uncertainty_first() {
let batch = vec![
(
"high confidence text",
vec![Entity::new("A", EntityType::Person, 0, 1, 0.95)],
),
(
"low confidence text",
vec![Entity::new("B", EntityType::Person, 0, 1, 0.4)],
),
(
"medium confidence text",
vec![Entity::new("C", EntityType::Person, 0, 1, 0.7)],
),
];
let ranked = rank_for_annotation(&batch);
assert_eq!(ranked[0].0, 1, "low-confidence text should be first");
assert_eq!(ranked[1].0, 2, "medium-confidence text should be second");
assert_eq!(ranked[2].0, 0, "high-confidence text should be last");
}
#[test]
fn rank_for_annotation_empty_batch() {
let batch: Vec<(&str, Vec<Entity>)> = Vec::new();
let ranked = rank_for_annotation(&batch);
assert!(ranked.is_empty());
}
#[test]
fn span_uncertainties_without_hierarchical() {
let entities = vec![Entity::new("Alice", EntityType::Person, 0, 5, 0.7)];
let result = span_uncertainties(&entities);
assert_eq!(result.len(), 1);
let (_, u) = &result[0];
assert!((u.overall - 0.3).abs() < 1e-10, "overall = {}", u.overall);
assert!((u.boundary - 0.5).abs() < 1e-10);
assert!((u.type_score - 0.5).abs() < 1e-10);
}
#[test]
fn span_uncertainties_with_hierarchical() {
let hc = HierarchicalConfidence::new(
Confidence::new(0.9),
Confidence::new(0.8),
Confidence::new(0.6),
);
let mut e = Entity::new("Alice", EntityType::Person, 0, 5, 0.8);
e.set_hierarchical_confidence(hc);
let entities = vec![e];
let result = span_uncertainties(&entities);
let (_, u) = &result[0];
let expected_combined = (0.9_f64 * 0.8 * 0.6).powf(1.0 / 3.0);
let expected_overall = 1.0 - expected_combined;
assert!(
(u.overall - expected_overall).abs() < 1e-9,
"overall = {}",
u.overall
);
assert!((u.boundary - 0.4).abs() < 1e-9, "boundary = {}", u.boundary);
assert!(
(u.type_score - 0.2).abs() < 1e-9,
"type_score = {}",
u.type_score
);
}
}