anno_metrics/lib.rs
1//! Shared evaluation/analysis primitives for `anno`.
2//!
3//! This crate exists to avoid duplicating low-level analysis code across:
4//! - `anno` (library backends + analysis features)
5//! - `anno-eval` (evaluation harness, datasets, reporting)
6//!
7//! It depends only on `anno-core` (plus serde for serialization), so it can be used by both
8//! without creating dependency cycles.
9
10#![warn(missing_docs)]
11
12/// Coreference types (re-exported from `anno-core`).
13pub mod coref {
14 pub use anno_core::core::coref::*;
15}
16
17/// Small shared analysis structs.
18pub mod types {
19 use serde::{Deserialize, Serialize};
20
21 /// Chain-length stratified statistics for coreference evaluation.
22 #[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
23 pub struct CorefChainStats {
24 /// Number of long chains (>10 mentions).
25 pub long_chain_count: usize,
26 /// Number of short chains (2-10 mentions).
27 pub short_chain_count: usize,
28 /// Number of singletons (1 mention).
29 pub singleton_count: usize,
30 /// F1 score on long chains only.
31 pub long_chain_f1: f64,
32 /// F1 score on short chains only.
33 pub short_chain_f1: f64,
34 /// F1 score on singletons (if evaluated).
35 pub singleton_f1: f64,
36 }
37
38 impl CorefChainStats {
39 /// Total chain count.
40 #[must_use]
41 pub fn total_chains(&self) -> usize {
42 self.long_chain_count + self.short_chain_count + self.singleton_count
43 }
44
45 /// Weighted F1 (by chain count).
46 ///
47 /// Note: this is **not** CoNLL F1; it is a diagnostic aggregation over chain strata.
48 #[must_use]
49 pub fn weighted_f1(&self) -> f64 {
50 let total = self.total_chains();
51 if total == 0 {
52 return 0.0;
53 }
54
55 let weighted_sum = self.long_chain_f1 * self.long_chain_count as f64
56 + self.short_chain_f1 * self.short_chain_count as f64
57 + self.singleton_f1 * self.singleton_count as f64;
58 weighted_sum / total as f64
59 }
60 }
61}
62
63/// Coreference evaluation metrics.
64pub mod coref_metrics;
65
66/// Cluster encoding and merge scoring primitives for cross-context coreference.
67pub mod cluster_encoder;