use clap::{Parser, Subcommand};
#[cfg(feature = "eval")]
use itertools::Itertools;
#[cfg(feature = "eval")]
use std::fs;
#[cfg(feature = "eval")]
use std::path::{Path, PathBuf};
#[cfg(feature = "eval")]
use std::time::Instant;
use super::super::output::color;
use super::super::parser::{EvalTask, ModelBackend};
#[cfg(feature = "eval")]
use anno_eval::eval::relation::create_entity_pair_relations;
#[cfg(feature = "eval")]
use anno::core::grounded::{
render_eval_html_with_title, EvalComparison, EvalMatch, Location, Signal, SignalId,
};
#[cfg(feature = "eval")]
use anno::core::grounded::{render_document_html, GroundedDocument};
#[cfg(feature = "eval")]
use anno_eval::eval::loader::DatasetId;
#[cfg(feature = "eval")]
use anno_eval::eval::loader::LoadableDatasetId;
#[cfg(feature = "eval")]
use anno::CoreferenceResolver;
#[cfg(feature = "eval")]
#[derive(Debug)]
struct HtmlWorstCase {
case_idx: usize,
errors: usize,
cmp: EvalComparison,
}
#[derive(Parser, Debug)]
pub struct DatasetArgs {
#[command(subcommand)]
pub action: DatasetAction,
}
#[derive(Subcommand, Debug)]
pub enum DatasetAction {
#[command(visible_alias = "ls")]
List {
#[arg(short, long)]
task: Option<String>,
#[arg(short, long)]
domain: Option<String>,
#[arg(long)]
loadable: bool,
#[arg(short, long)]
verbose: bool,
},
#[command(visible_alias = "i")]
Info {
#[arg(short, long)]
dataset: String,
},
#[command(visible_alias = "e")]
Eval {
#[arg(short, long, default_value = "synthetic")]
dataset: String,
#[arg(short, long, default_value = "stacked")]
model: ModelBackend,
#[arg(short, long, default_value = "ner")]
task: EvalTask,
#[arg(long)]
html: bool,
#[arg(long, value_name = "PATH")]
output: Option<String>,
#[arg(long, default_value_t = 50)]
max_cases: usize,
#[arg(long, default_value_t = 1)]
min_errors: usize,
#[arg(long)]
min_case_errors: Option<usize>,
#[arg(long)]
min_gold_mentions: Option<usize>,
#[arg(long)]
min_gold_relations: Option<usize>,
#[arg(long)]
coref_oracle_mentions: bool,
},
#[command(visible_alias = "ex")]
Export(super::export::ExportArgs),
#[command(visible_alias = "im")]
Import(super::import::ImportArgs),
#[command(visible_alias = "ctx")]
Context(super::context::ContextArgs),
#[command(visible_alias = "c")]
Check {
#[arg(short, long)]
issues_only: bool,
#[arg(short, long)]
dataset: Option<String>,
#[arg(short, long)]
fix: bool,
},
#[command(visible_alias = "ch")]
CheckHealth {
#[arg(short, long)]
dataset: Option<String>,
#[arg(long)]
all: bool,
#[arg(long)]
relaxed: bool,
#[arg(short, long)]
verbose: bool,
#[arg(short, long, default_value = "5")]
workers: usize,
#[arg(short, long, default_value = "10")]
timeout: u64,
},
#[command(visible_alias = "dl")]
Download {
#[arg(short, long)]
dataset: String,
},
Facets {
#[arg(long, value_name = "PATH")]
touched_report: Option<String>,
#[arg(long)]
gaps: bool,
},
}
pub fn run(args: DatasetArgs) -> Result<(), String> {
match args.action {
DatasetAction::List {
task,
domain,
loadable,
verbose,
} => {
run_list(task, domain, loadable, verbose)?;
}
DatasetAction::Info { dataset } => {
run_info(&dataset)?;
}
DatasetAction::Download { dataset } => {
#[cfg(feature = "eval")]
{
run_download(&dataset)?;
}
#[cfg(not(feature = "eval"))]
{
let _ = dataset;
return Err(
"Dataset download requires --features eval (build: cargo build -p anno-cli --features eval)"
.to_string(),
);
}
}
DatasetAction::Facets {
touched_report,
gaps,
} => {
#[cfg(feature = "eval")]
{
run_facets(touched_report.as_deref(), gaps)?;
}
#[cfg(not(feature = "eval"))]
{
let _ = (touched_report, gaps);
return Err(
"Dataset facets require --features eval (build: cargo build -p anno-cli --features eval)"
.to_string(),
);
}
}
DatasetAction::Eval {
dataset,
model,
task,
html,
output,
max_cases,
min_errors,
min_case_errors,
min_gold_mentions,
min_gold_relations,
coref_oracle_mentions,
} => {
#[cfg(feature = "eval")]
{
let m = model.create_model()?;
let ner_min_errors = min_case_errors.unwrap_or(min_errors);
#[cfg(feature = "eval")]
let coref_min_mentions = min_gold_mentions.unwrap_or(min_errors);
#[cfg(feature = "eval")]
let rel_min_gold = min_gold_relations.unwrap_or(min_errors);
#[cfg(not(feature = "eval"))]
let _ = (min_gold_mentions, min_gold_relations, coref_oracle_mentions);
let (name, test_cases) = if dataset == "synthetic" {
("synthetic".to_string(), synthetic_ner_test_cases())
} else {
let dataset_id: DatasetId = dataset
.parse::<DatasetId>()
.map_err(|e| format!("Invalid dataset '{}': {}", dataset, e))?;
#[cfg(not(feature = "eval"))]
{
let _ = dataset_id; return Err("Loading real datasets requires --features eval".to_string());
}
#[cfg(feature = "eval")]
{
use anno_eval::eval::loader::DatasetLoader;
let loader = DatasetLoader::new()
.map_err(|e| format!("Failed to init dataset loader: {}", e))?;
println!(
"Loading {} (may download if not cached)...",
dataset_id.name()
);
let loadable = LoadableDatasetId::try_from(dataset_id)
.map_err(|e| format!("Dataset is not loadable: {}", e))?;
let ds = loader
.load_or_download(loadable)
.map_err(|e| format!("Failed to load dataset: {}", e))?;
if matches!(task, EvalTask::Ner)
&& (dataset_id.is_coreference() || dataset_id.is_relation_extraction())
{
println!("{} Warning: Evaluating NER on non-NER dataset. Results may be empty.", color("33", "!"));
}
(ds.stats().name, ds.to_test_cases())
}
};
#[cfg(feature = "eval")]
let parsed_dataset_result: Result<DatasetId, String> = if dataset != "synthetic" {
dataset
.parse::<DatasetId>()
.map_err(|e| format!("Invalid dataset '{}': {}", dataset, e))
} else {
Err("synthetic dataset".to_string()) };
match task {
EvalTask::Ner => {
println!();
println!("Evaluating {} on {} dataset (NER)...", model.name(), name);
println!(" Sentences: {}", test_cases.len());
println!();
let mut per_type_stats: std::collections::HashMap<
String,
(usize, usize, usize),
> = std::collections::HashMap::new();
let mut total_gold = 0;
let mut total_pred = 0;
let mut total_correct = 0;
let html_output_path: Option<PathBuf> = if html {
Some(PathBuf::from(output.as_deref().ok_or_else(|| {
"--html requires --output PATH".to_string()
})?))
} else {
None
};
if html_output_path.is_some() && max_cases == 0 {
return Err("--max-cases must be > 0 when --html is set".to_string());
}
let mut worst_cases: Vec<HtmlWorstCase> = Vec::new();
let start_time = Instant::now();
#[cfg(feature = "eval")]
{
use anno_eval::eval::validation::validate_ground_truth_entities;
let mut total_warnings = 0;
for (text, gold) in &test_cases {
let validation = validate_ground_truth_entities(text, gold, false);
if !validation.is_valid {
eprintln!(
"{} Invalid gold annotations: {}",
color("33", "warning:"),
validation.errors.join("; ")
);
}
total_warnings += validation.warnings.len();
}
if total_warnings > 0 {
eprintln!(
"{} {} validation warnings in gold annotations",
color("33", "warning:"),
total_warnings
);
}
}
for (case_idx, (text, gold)) in test_cases.iter().enumerate() {
let entities = m.extract_entities(text, None).unwrap_or_default();
total_gold += gold.len();
total_pred += entities.len();
if html_output_path.is_some() {
let gold_signals: Vec<Signal<Location>> = gold
.iter()
.enumerate()
.map(|(i, g)| {
Signal::new(
SignalId::new(i as u64),
Location::text(g.start, g.end),
g.text.as_str(),
g.original_label.as_str(),
1.0,
)
})
.collect();
let pred_signals: Vec<Signal<Location>> = entities
.iter()
.enumerate()
.map(|(i, e)| {
let mut s = Signal::from(e);
s.id = SignalId::new(i as u64);
s
})
.collect();
let cmp = EvalComparison::compare(text, gold_signals, pred_signals);
let errors = cmp.error_count();
if errors >= ner_min_errors {
worst_cases.push(HtmlWorstCase {
case_idx,
errors,
cmp,
});
if worst_cases.len() > max_cases.saturating_mul(5) {
worst_cases.sort_by(|a, b| {
b.errors
.cmp(&a.errors)
.then_with(|| a.case_idx.cmp(&b.case_idx))
});
worst_cases.truncate(max_cases);
}
}
}
let mut matched_pred = vec![false; entities.len()];
for gold_entity in gold {
let gold_type =
anno::EntityType::from_label(&gold_entity.original_label);
let gold_type_key = gold_type.as_label().to_string();
per_type_stats
.entry(gold_type_key.clone())
.or_insert((0, 0, 0))
.0 += 1;
let matched = entities.iter().enumerate().any(|(i, e)| {
if matched_pred[i] {
return false; }
let span_match = e.start() == gold_entity.start
&& e.end() == gold_entity.end;
if !span_match {
return false;
}
let pred_type_str = e.entity_type.as_label();
let gold_type_str = gold_type.as_label();
if pred_type_str == gold_type_str {
matched_pred[i] = true; return true;
}
false
});
if matched {
total_correct += 1;
per_type_stats.entry(gold_type_key).or_insert((0, 0, 0)).2 += 1;
}
}
for e in &entities {
let type_key = e.entity_type.as_label().to_string();
per_type_stats.entry(type_key).or_insert((0, 0, 0)).1 += 1;
}
}
let elapsed = start_time.elapsed();
let (p, r, f1) = if total_gold == 0 && total_pred == 0 {
(1.0, 1.0, 1.0)
} else {
let p = if total_pred > 0 {
total_correct as f64 / total_pred as f64
} else {
0.0
};
let r = if total_gold > 0 {
total_correct as f64 / total_gold as f64
} else {
0.0
};
let f1 = if p + r > 0.0 {
2.0 * p * r / (p + r)
} else {
0.0
};
(p, r, f1)
};
println!("Results:");
println!(
" Gold: {} Predicted: {} Correct: {}",
total_gold, total_pred, total_correct
);
println!(
" P: {:.1}% R: {:.1}% F1: {:.1}%",
p * 100.0,
r * 100.0,
f1 * 100.0
);
let mut type_entries: Vec<_> = per_type_stats.iter().collect();
type_entries.sort_by_key(|b| std::cmp::Reverse(b.1 .0));
if !type_entries.is_empty() {
println!();
println!("Per-type breakdown:");
for (type_name, (gold_count, pred_count, correct_count)) in type_entries
{
if *gold_count == 0 && *pred_count == 0 {
continue;
}
let type_p = if *pred_count > 0 {
*correct_count as f64 / *pred_count as f64
} else {
0.0
};
let type_r = if *gold_count > 0 {
*correct_count as f64 / *gold_count as f64
} else {
0.0
};
let type_f1 = if type_p + type_r > 0.0 {
2.0 * type_p * type_r / (type_p + type_r)
} else {
0.0
};
println!(
" {:12} F1={:5.1}% P={:5.1}% R={:5.1}% [gold={} pred={} correct={}]",
type_name,
type_f1 * 100.0,
type_p * 100.0,
type_r * 100.0,
gold_count,
pred_count,
correct_count
);
}
}
let ms_per_sent = if !test_cases.is_empty() {
elapsed.as_secs_f64() * 1000.0 / test_cases.len() as f64
} else {
0.0
};
println!();
println!(
" Time: {:.1}s ({:.1}ms/sent)",
elapsed.as_secs_f64(),
ms_per_sent
);
if let Some(output_path) = html_output_path {
worst_cases.sort_by(|a, b| {
b.errors
.cmp(&a.errors)
.then_with(|| a.case_idx.cmp(&b.case_idx))
});
worst_cases.truncate(max_cases);
write_ner_error_explorer_html(
output_path.as_path(),
&name,
model.name(),
test_cases.len(),
total_gold,
total_pred,
total_correct,
&worst_cases,
)?;
println!(
"{} HTML written to: {}",
color("32", "ok:"),
output_path.display()
);
}
println!();
}
EvalTask::Coref => {
#[cfg(not(feature = "eval"))]
{
return Err(
"Coreference evaluation requires --features eval".to_string()
);
}
#[cfg(feature = "eval")]
{
use anno_eval::eval::coref_resolver::SimpleCorefResolver;
use anno_eval::eval::loader::DatasetLoader;
let html_output_path: Option<PathBuf> = if html {
Some(PathBuf::from(output.as_deref().ok_or_else(|| {
"--html requires --output PATH".to_string()
})?))
} else {
None
};
if html_output_path.is_some() && max_cases == 0 {
return Err(
"--max-cases must be > 0 when --html is set".to_string()
);
}
if dataset == "synthetic" {
return Err("Coreference evaluation requires a real dataset (e.g., gap, preco, litbank)".to_string());
}
let dataset_id: DatasetId = parsed_dataset_result.clone()?;
if !dataset_id.is_coreference() {
return Err(format!("Dataset '{}' is not a coreference dataset. Use: gap, preco, or litbank", dataset));
}
let loader = DatasetLoader::new()
.map_err(|e| format!("Failed to init dataset loader: {}", e))?;
println!();
println!(
"Evaluating coreference resolution on {} dataset...",
dataset_id.name()
);
println!("Loading dataset (may download if not cached)...");
let gold_docs =
loader.load_or_download_coref(dataset_id).map_err(|e| {
format!("Failed to load coreference dataset: {}", e)
})?;
println!(" Documents: {}", gold_docs.len());
println!();
let resolver = SimpleCorefResolver::default();
let mut all_pred_chains: Vec<Vec<anno_eval::eval::coref::CorefChain>> =
Vec::new();
let mut all_gold_chains: Vec<&[anno_eval::eval::coref::CorefChain]> =
Vec::new();
let start_time = Instant::now();
for doc in &gold_docs {
let text = doc.text.as_str();
all_gold_chains.push(&doc.chains);
let entities = if coref_oracle_mentions {
coref_doc_to_oracle_mentions(doc)
} else {
m.extract_entities(text, None).unwrap_or_default()
};
let pred_chains = resolver.resolve_to_chains(&entities);
all_pred_chains.push(pred_chains);
}
let elapsed = start_time.elapsed();
let document_pairs: Vec<_> = all_pred_chains
.iter()
.zip(all_gold_chains.iter())
.map(|(pred, gold)| (pred.as_slice(), *gold))
.collect();
let results =
anno_eval::eval::coref_metrics::AggregateCorefEvaluation::compute(
&document_pairs,
);
println!("Results:");
println!(" CoNLL F1: {:.3}", results.mean.conll_f1);
println!(
" MUC: P={:.3} R={:.3} F1={:.3}",
results.mean.muc.precision,
results.mean.muc.recall,
results.mean.muc.f1
);
println!(
" B³: P={:.3} R={:.3} F1={:.3}",
results.mean.b_cubed.precision,
results.mean.b_cubed.recall,
results.mean.b_cubed.f1
);
println!(
" CEAF-e: P={:.3} R={:.3} F1={:.3}",
results.mean.ceaf_e.precision,
results.mean.ceaf_e.recall,
results.mean.ceaf_e.f1
);
println!(
" LEA: P={:.3} R={:.3} F1={:.3}",
results.mean.lea.precision,
results.mean.lea.recall,
results.mean.lea.f1
);
println!(
" BLANC: P={:.3} R={:.3} F1={:.3}",
results.mean.blanc.precision,
results.mean.blanc.recall,
results.mean.blanc.f1
);
println!(" Documents: {}", results.num_documents);
println!(" Time: {:.1}s", elapsed.as_secs_f64());
println!();
if let Some(output_path) = html_output_path {
let mut scored: Vec<(usize, f64)> = results
.per_document
.iter()
.enumerate()
.filter_map(|(i, ev)| {
let doc = gold_docs.get(i)?;
if doc.mention_count() < coref_min_mentions {
return None;
}
Some((i, ev.conll_f1))
})
.collect();
scored.sort_by(|a, b| {
a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)
});
let selected: Vec<usize> =
scored.into_iter().take(max_cases).map(|(i, _)| i).collect();
write_coref_error_explorer_html(
output_path.as_path(),
&name,
model.name(),
resolver.name(),
&gold_docs,
&results.per_document,
&selected,
m.as_ref(),
&resolver,
coref_oracle_mentions,
)?;
println!(
"{} HTML written to: {}",
color("32", "ok:"),
output_path.display()
);
}
}
}
EvalTask::Relation => {
#[cfg(not(feature = "eval"))]
{
return Err("Relation extraction evaluation requires --features eval"
.to_string());
}
#[cfg(feature = "eval")]
{
use anno::backends::inference::RelationExtractor;
use anno_eval::eval::loader::DatasetLoader;
use anno_eval::eval::relation::{
evaluate_relations, RelationEvalConfig, RelationPrediction,
};
let html_output_path: Option<PathBuf> = if html {
Some(PathBuf::from(output.as_deref().ok_or_else(|| {
"--html requires --output PATH".to_string()
})?))
} else {
None
};
if html_output_path.is_some() && max_cases == 0 {
return Err(
"--max-cases must be > 0 when --html is set".to_string()
);
}
if dataset == "synthetic" {
return Err("Relation extraction evaluation requires a real dataset (e.g., docred, retacred)".to_string());
}
let dataset_id: DatasetId = parsed_dataset_result.clone()?;
if !dataset_id.is_relation_extraction() {
return Err(format!("Dataset '{}' is not a relation extraction dataset. Use: docred or retacred", dataset));
}
let loader = DatasetLoader::new()
.map_err(|e| format!("Failed to init dataset loader: {}", e))?;
println!();
println!(
"Evaluating relation extraction on {} dataset...",
dataset_id.name()
);
println!("Loading dataset (may download if not cached)...");
let gold_docs = loader
.load_or_download_relation(dataset_id)
.map_err(|e| format!("Failed to load relation dataset: {}", e))?;
println!(" Documents: {}", gold_docs.len());
println!();
let mut entity_types = std::collections::HashSet::new();
let mut relation_types = std::collections::HashSet::new();
for doc in &gold_docs {
for rel in &doc.relations {
entity_types.insert(rel.head_type.clone());
entity_types.insert(rel.tail_type.clone());
relation_types.insert(rel.relation_type.clone());
}
}
let entity_types_vec: Vec<&str> =
entity_types.iter().map(|s| s.as_str()).collect_vec();
let relation_types_vec: Vec<&str> =
relation_types.iter().map(|s| s.as_str()).collect_vec();
println!(" Entity types: {}", entity_types_vec.join(", "));
println!(
" Relation types: {} ({} total)",
relation_types_vec.len(),
relation_types_vec.iter().take(5).join(", ")
);
println!();
let use_relation_extractor: Option<(
String,
Box<dyn RelationExtractor>,
)> = match model {
#[cfg(feature = "onnx")]
ModelBackend::GlinerMultitask => {
anno::backends::gliner_multitask::GLiNERMultitaskOnnx::from_pretrained(
"onnx-community/gliner-multitask-large-v0.5",
)
.ok()
.map(|m| {
(
"gliner_multitask".to_string(),
Box::new(m) as Box<dyn RelationExtractor>,
)
})
}
ModelBackend::Tplinker => {
anno::backends::tplinker::TPLinker::new().ok().map(|m| {
(
"tplinker".to_string(),
Box::new(m) as Box<dyn RelationExtractor>,
)
})
}
_ => None,
};
let mut all_gold = Vec::new();
let mut all_pred = Vec::new();
let mut pred_by_doc: Vec<Vec<RelationPrediction>> =
Vec::with_capacity(gold_docs.len());
let start_time = Instant::now();
if let Some((ref extractor_name, ref rel_extractor)) =
use_relation_extractor
{
println!(
"{} Using {} RelationExtractor",
color("32", "[OK]"),
extractor_name
);
println!(
" Note: This uses heuristics, not a neural relation model."
);
println!();
for (doc_idx, doc) in gold_docs.iter().enumerate() {
let text = doc.text.as_str();
all_gold.extend(doc.relations.clone());
let mut pred_this: Vec<RelationPrediction> = Vec::new();
match rel_extractor.extract_with_relations(
text,
&entity_types_vec,
&relation_types_vec,
0.5,
) {
Ok(result) => {
let allow_oracle_entities =
std::env::var("ANNO_RELATION_ORACLE_ENTITIES")
.ok()
.map(|v| {
let v = v.trim().to_lowercase();
v == "1"
|| v == "true"
|| v == "yes"
|| v == "y"
})
.unwrap_or(true);
let is_gliner_multitask = {
#[cfg(feature = "onnx")]
{
matches!(model, ModelBackend::GlinerMultitask)
}
#[cfg(not(feature = "onnx"))]
{
false
}
};
if dataset_id == DatasetId::CHisIEC
&& is_gliner_multitask
&& allow_oracle_entities
&& result.entities.is_empty()
&& !doc.relations.is_empty()
{
use anno::backends::inference::{
extract_relation_triples_simple,
RelationExtractionConfig,
};
use anno::{
Confidence, Entity as PredEntity, EntityType,
};
use std::collections::BTreeMap;
if doc_idx == 0 {
eprintln!(
"{} CHisIEC fallback: using gold entity spans as oracle candidates (NER produced 0 entities)",
color("33", "note:")
);
}
let mut by_key: BTreeMap<
(usize, usize, String, String),
PredEntity,
> = BTreeMap::new();
for r in &doc.relations {
for (ty, span, txt) in [
(&r.head_type, r.head_span, &r.head_text),
(&r.tail_type, r.tail_span, &r.tail_text),
] {
let (start, end) = span;
let text_fallback: String =
if !txt.is_empty() {
txt.clone()
} else {
text.chars()
.skip(start)
.take(end.saturating_sub(start))
.collect()
};
let ent = PredEntity::new(
text_fallback.clone(),
EntityType::from_label(ty),
start,
end,
1.0,
);
by_key
.entry((
start,
end,
ty.to_string(),
text_fallback,
))
.or_insert(ent);
}
}
let oracle_entities: Vec<PredEntity> =
by_key.into_values().collect();
let rel_strs: Vec<&str> =
relation_types_vec.to_vec();
let rel_cfg = RelationExtractionConfig {
threshold: Confidence::new(0.5),
max_span_distance: 120,
extract_triggers: false,
};
let triples = extract_relation_triples_simple(
&oracle_entities,
text,
&rel_strs,
&rel_cfg,
);
for t in &triples {
if let (Some(head), Some(tail)) = (
oracle_entities.get(t.head_idx),
oracle_entities.get(t.tail_idx),
) {
let pred = RelationPrediction {
head_span: (head.start(), head.end()),
head_type: head
.entity_type
.as_label()
.to_string(),
tail_span: (tail.start(), tail.end()),
tail_type: tail
.entity_type
.as_label()
.to_string(),
relation_type: t.relation_type.clone(),
confidence: f32::from(t.confidence),
};
all_pred.push(pred.clone());
pred_this.push(pred);
}
}
} else {
for triple in &result.relations {
if let Some(pred) =
RelationPrediction::from_triple_with_entities(
triple,
&result.entities,
)
{
all_pred.push(pred.clone());
pred_this.push(pred);
}
}
}
}
Err(e) => {
eprintln!(
"{} Relation extraction failed: {}",
color("33", "warning:"),
e
);
let entities =
m.extract_entities(text, None).unwrap_or_default();
let fallback = create_entity_pair_relations(
&entities,
text,
&relation_types_vec,
);
all_pred.extend(fallback.iter().cloned());
pred_this.extend(fallback);
}
}
pred_by_doc.push(pred_this);
}
} else {
println!(
"{} Using entity-pair heuristic (no RelationExtractor for this backend)",
color("33", "!")
);
println!();
for doc in &gold_docs {
let text = doc.text.as_str();
all_gold.extend(doc.relations.clone());
let entities =
m.extract_entities(text, None).unwrap_or_default();
let pred_this = create_entity_pair_relations(
&entities,
text,
&relation_types_vec,
);
all_pred.extend(pred_this.iter().cloned());
pred_by_doc.push(pred_this);
}
}
let elapsed = start_time.elapsed();
let config = RelationEvalConfig {
overlap_threshold: 0.5,
require_entity_type_match: false, directed_relations: true,
};
let metrics = evaluate_relations(&all_gold, &all_pred, &config);
println!();
println!("{}", color("1;36", "======================================================================="));
println!(
" {} model={} time={:.1}s",
color("1;36", "RELATION EXTRACTION EVALUATION"),
model.name(),
elapsed.as_secs_f64()
);
println!("{}", color("1;36", "======================================================================="));
println!();
println!("{}", metrics.to_string_human(false)); println!();
if let Some(output_path) = html_output_path {
let mut per_doc_metrics: Vec<
anno_eval::eval::relation::RelationMetrics,
> = Vec::with_capacity(gold_docs.len());
for (i, doc) in gold_docs.iter().enumerate() {
let pred: &[RelationPrediction] =
pred_by_doc.get(i).map(|v| v.as_slice()).unwrap_or(&[]);
per_doc_metrics.push(evaluate_relations(
&doc.relations,
pred,
&config,
));
}
let mut scored: Vec<(usize, f64, usize)> = per_doc_metrics
.iter()
.enumerate()
.filter_map(|(i, m)| {
let gold_n = gold_docs.get(i)?.relations.len();
if gold_n < rel_min_gold {
return None;
}
let strict_errors = (m.num_gold - m.strict_matches)
+ (m.num_predicted - m.strict_matches);
Some((i, m.strict_f1, strict_errors))
})
.collect();
scored.sort_by(|a, b| {
a.1.partial_cmp(&b.1)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| b.2.cmp(&a.2))
});
let selected: Vec<usize> = scored
.into_iter()
.take(max_cases)
.map(|(i, _, _)| i)
.collect();
write_relation_error_explorer_html(
output_path.as_path(),
&name,
model.name(),
&gold_docs,
&pred_by_doc,
&per_doc_metrics,
&selected,
)?;
println!(
"{} HTML written to: {}",
color("32", "ok:"),
output_path.display()
);
}
}
}
}
}
#[cfg(not(feature = "eval"))]
{
let _ = (
dataset,
model,
task,
html,
output,
max_cases,
min_errors,
min_case_errors,
min_gold_mentions,
min_gold_relations,
coref_oracle_mentions,
);
return Err("Dataset evaluation requires --features eval".to_string());
}
}
DatasetAction::Export(args) => {
super::export::run(args)?;
}
DatasetAction::Import(args) => {
super::import::run(args)?;
}
DatasetAction::Context(args) => {
super::context::run(args)?;
}
DatasetAction::Check {
issues_only,
dataset,
fix,
} => {
run_check(issues_only, dataset.as_deref(), fix)?;
}
DatasetAction::CheckHealth {
dataset,
all,
relaxed,
verbose,
workers,
timeout,
} => {
run_check_health(dataset.as_deref(), all, relaxed, verbose, workers, timeout)?;
}
}
Ok(())
}
#[cfg(feature = "eval")]
fn run_download(dataset: &str) -> Result<(), String> {
use anno_eval::eval::loader::{DatasetId, DatasetLoader, LoadableDatasetId};
let dataset_id: DatasetId = dataset
.parse::<DatasetId>()
.map_err(|e| format!("Invalid dataset '{}': {}", dataset, e))?;
let loader =
DatasetLoader::new().map_err(|e| format!("Failed to init dataset loader: {}", e))?;
if dataset_id.is_relation_extraction() {
let docs = loader
.load_or_download_relation(dataset_id)
.map_err(|e| format!("Failed to load relation dataset: {}", e))?;
println!(
"{} cached relation dataset {} (documents={})",
color("32", "ok:"),
dataset_id.name(),
docs.len()
);
return Ok(());
}
if dataset_id.is_coreference() {
let docs = loader
.load_or_download_coref(dataset_id)
.map_err(|e| format!("Failed to load coref dataset: {}", e))?;
println!(
"{} cached coref dataset {} (documents={})",
color("32", "ok:"),
dataset_id.name(),
docs.len()
);
return Ok(());
}
let loadable = LoadableDatasetId::try_from(dataset_id)
.map_err(|e| format!("Dataset is not loadable: {}", e))?;
let ds = loader
.load_or_download(loadable)
.map_err(|e| format!("Failed to load dataset: {}", e))?;
println!(
"{} cached dataset {} (sentences={})",
color("32", "ok:"),
dataset_id.name(),
ds.sentences.len()
);
Ok(())
}
#[cfg(feature = "eval")]
fn html_escape(s: &str) -> String {
s.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
.replace('"', """)
.replace('\'', "'")
}
#[cfg(feature = "eval")]
fn preview_text(s: &str, max_chars: usize) -> String {
let mut out: String = s.chars().take(max_chars).collect();
out = out.replace(['\n', '\r', '\t'], " ");
while out.contains(" ") {
out = out.replace(" ", " ");
}
out.trim().to_string()
}
#[cfg(feature = "eval")]
fn synthetic_ner_test_cases() -> Vec<(String, Vec<anno_eval::eval::GoldEntity>)> {
use anno::{EntityCategory, EntityType};
fn find_char_span(text: &str, needle: &str) -> (usize, usize) {
let start_byte = text.find(needle).unwrap_or_else(|| {
panic!("synthetic_ner_test_cases bug: needle not found: {needle:?}")
});
let end_byte = start_byte + needle.len();
let start = text[..start_byte].chars().count();
let end = text[..end_byte].chars().count();
(start, end)
}
fn entity_type_for_label(label: &str) -> EntityType {
match label {
"PER" | "PERSON" => EntityType::Person,
"ORG" | "ORGANIZATION" => EntityType::Organization,
"LOC" | "LOCATION" | "GPE" => EntityType::Location,
other => EntityType::custom(other, EntityCategory::Misc),
}
}
fn ge(text: &str, needle: &str, label: &str) -> anno_eval::eval::GoldEntity {
let (start, end) = find_char_span(text, needle);
anno_eval::eval::GoldEntity {
text: needle.to_string(),
original_label: label.to_string(),
entity_type: entity_type_for_label(label),
start,
end,
}
}
let mut cases: Vec<(String, Vec<anno_eval::eval::GoldEntity>)> = Vec::new();
let t = "Lynn Conway worked at IBM.";
cases.push((
t.to_string(),
vec![ge(t, "Lynn Conway", "PER"), ge(t, "IBM", "ORG")],
));
let t = "習近平在北京會見了普京。";
cases.push((
t.to_string(),
vec![
ge(t, "習近平", "PER"),
ge(t, "北京", "LOC"),
ge(t, "普京", "PER"),
],
));
let t = "التقى محمد بن سلمان بالرئيس في الرياض";
cases.push((
t.to_string(),
vec![ge(t, "محمد بن سلمان", "PER"), ge(t, "الرياض", "LOC")],
));
let t = "Путин встретился с Си Цзиньпином в Москве.";
cases.push((
t.to_string(),
vec![
ge(t, "Путин", "PER"),
ge(t, "Си Цзиньпином", "PER"),
ge(t, "Москве", "LOC"),
],
));
let t = "डॉ. शर्मा ने दिल्ली में सम्मेलन में भाषण दिया।";
cases.push((
t.to_string(),
vec![ge(t, "शर्मा", "PER"), ge(t, "दिल्ली", "LOC")],
));
let t = "Dr. 田中 presented at MIT in 東京.";
cases.push((
t.to_string(),
vec![
ge(t, "田中", "PER"),
ge(t, "MIT", "ORG"),
ge(t, "東京", "LOC"),
],
));
let t = "François Müller and José García met in São Paulo.";
cases.push((
t.to_string(),
vec![
ge(t, "François Müller", "PER"),
ge(t, "José García", "PER"),
ge(t, "São Paulo", "LOC"),
],
));
let t = "Contact john@example.com for help.";
cases.push((t.to_string(), vec![ge(t, "john@example.com", "EMAIL")]));
cases
}
#[cfg(feature = "eval")]
#[allow(clippy::too_many_arguments)]
fn write_ner_error_explorer_html(
output_path: &Path,
dataset_name: &str,
model_name: &str,
total_sentences: usize,
total_gold: usize,
total_pred: usize,
total_correct: usize,
worst_cases: &[HtmlWorstCase],
) -> Result<(), String> {
let parent = output_path.parent().unwrap_or_else(|| Path::new("."));
let stem = output_path
.file_stem()
.and_then(|s| s.to_str())
.filter(|s| !s.is_empty())
.unwrap_or("dataset_eval");
let files_dir = parent.join(format!("{stem}_files"));
fs::create_dir_all(&files_dir)
.map_err(|e| format!("Failed to create {:?}: {}", files_dir, e))?;
for case in worst_cases {
let case_filename = format!("case_{:06}.html", case.case_idx);
let case_path = files_dir.join(&case_filename);
let title = format!("{model_name} — {dataset_name} — case {}", case.case_idx);
let case_html = render_eval_html_with_title(&case.cmp, &title);
fs::write(&case_path, case_html)
.map_err(|e| format!("Failed to write {:?}: {}", case_path, e))?;
}
let mut index = String::new();
index.push_str("<!DOCTYPE html><html><head><meta charset=\"UTF-8\"><meta name=\"color-scheme\" content=\"dark light\">");
index.push_str(&format!(
"<title>{}</title>",
html_escape(&format!(
"dataset eval explorer — {model_name} — {dataset_name}"
))
));
index.push_str(
r#"<style>
:root{color-scheme:light dark;--bg:#0a0a0a;--text:#b0b0b0;--text-strong:#fff;--muted:#777;--border:#222;--border-strong:#333;--hover:#111;--input-bg:#080808;--link:#9ad;--code:#bbb}
@media (prefers-color-scheme: light){:root{--bg:#fff;--text:#222;--text-strong:#000;--muted:#555;--border:#d6d6d6;--border-strong:#c6c6c6;--hover:#f0f0f0;--input-bg:#fff;--link:#06c;--code:#333}}
html[data-theme='dark']{--bg:#0a0a0a;--text:#b0b0b0;--text-strong:#fff;--muted:#777;--border:#222;--border-strong:#333;--hover:#111;--input-bg:#080808;--link:#9ad;--code:#bbb}
html[data-theme='light']{--bg:#fff;--text:#222;--text-strong:#000;--muted:#555;--border:#d6d6d6;--border-strong:#c6c6c6;--hover:#f0f0f0;--input-bg:#fff;--link:#06c;--code:#333}
*{box-sizing:border-box;margin:0;padding:0}
body{font:12px/1.4 monospace;background:var(--bg);color:var(--text);padding:12px}
h1{font-size:14px;color:var(--text-strong);font-weight:normal;border-bottom:1px solid var(--border-strong);padding:4px 0;margin:0 0 12px}
.meta{color:var(--muted);margin:0 0 12px}
.meta b{color:var(--text);font-weight:normal}
.row{display:flex;gap:12px;align-items:center;margin:0 0 10px}
input{flex:1;background:var(--input-bg);border:1px solid var(--border);color:var(--text);padding:6px 8px}
.count{color:var(--muted)}
table{width:100%;border-collapse:collapse;font-size:11px}
th,td{padding:4px 8px;text-align:left;border:1px solid var(--border);vertical-align:top}
th{background:var(--hover);color:var(--muted);font-weight:normal;text-transform:uppercase;font-size:10px}
tr:hover{background:var(--hover)}
a{color:var(--link);text-decoration:none}
a:hover{text-decoration:underline}
.num{text-align:right;font-variant-numeric:tabular-nums}
code{color:var(--code)}
.toggle{cursor:pointer;user-select:none;color:var(--muted);border:1px solid var(--border);background:var(--bg);padding:2px 6px;font-size:10px}
</style></head><body>"#,
);
index.push_str(&format!(
"<div class=\"row\" style=\"justify-content:space-between\"><h1>dataset eval explorer</h1><span class=\"toggle\" id=\"theme-toggle\" title=\"toggle theme (auto → dark → light)\">theme: auto</span></div><div class=\"meta\"><b>model</b> {} <b>dataset</b> {} <b>sentences</b> {} <b>gold</b> {} <b>pred</b> {} <b>correct</b> {}</div>",
html_escape(model_name),
html_escape(dataset_name),
total_sentences,
total_gold,
total_pred,
total_correct
));
index.push_str(
r#"<div class="row">
<input id="case-filter" placeholder="filter (case id, label, text…)" />
<div id="case-count" class="count"></div>
</div>
<table id="case-table"><thead><tr>
<th>case</th><th class="num">errors</th><th class="num">f1</th><th class="num">gold</th><th class="num">pred</th><th class="num">✓</th><th>text</th>
</tr></thead><tbody>"#,
);
for case in worst_cases {
let first_error_mid = case
.cmp
.matches
.iter()
.position(|m| !matches!(m, EvalMatch::Correct { .. }))
.unwrap_or(0);
let rel = format!(
"{stem}_files/case_{:06}.html#M{}",
case.case_idx, first_error_mid
);
let preview = preview_text(&case.cmp.text, 180);
index.push_str(&format!(
"<tr data-hay=\"{hay}\"><td><a target=\"_blank\" rel=\"noopener\" href=\"{href}\">case {case_id}</a></td><td class=\"num\">{errors}</td><td class=\"num\">{f1:.1}%</td><td class=\"num\">{gold}</td><td class=\"num\">{pred}</td><td class=\"num\">{ok}</td><td><code>{text}</code></td></tr>",
hay = html_escape(&format!(
"case {} errors {} {} {}",
case.case_idx, case.errors, preview, dataset_name
))
.to_lowercase(),
href = html_escape(&rel),
case_id = case.case_idx,
errors = case.errors,
f1 = case.cmp.f1() * 100.0,
gold = case.cmp.gold.len(),
pred = case.cmp.predicted.len(),
ok = case.cmp.correct_count(),
text = html_escape(&preview),
));
}
index.push_str(
r##"</tbody></table>
<script>
(() => {
// Theme toggle: auto → dark → light (persisted).
const themeBtn = document.getElementById("theme-toggle");
const themeKey = "anno-theme";
const applyTheme = (theme) => {
const t = theme || "auto";
if (t === "auto") {
delete document.documentElement.dataset.theme;
} else {
document.documentElement.dataset.theme = t;
}
if (themeBtn) themeBtn.textContent = `theme: ${t}`;
};
const readTheme = () => {
try { return localStorage.getItem(themeKey) || "auto"; } catch (_) { return "auto"; }
};
const writeTheme = (t) => {
try { localStorage.setItem(themeKey, t); } catch (_) { /* ignore */ }
};
applyTheme(readTheme());
if (themeBtn) {
themeBtn.addEventListener("click", () => {
const cur = readTheme();
const next = cur === "auto" ? "dark" : (cur === "dark" ? "light" : "auto");
writeTheme(next);
applyTheme(next);
});
}
const input = document.getElementById("case-filter");
const rows = Array.from(document.querySelectorAll("#case-table tbody tr"));
const count = document.getElementById("case-count");
function update() {
const q = (input.value || "").toLowerCase().trim();
let shown = 0;
for (const tr of rows) {
const hay = (tr.dataset.hay || "");
const show = !q || hay.includes(q);
tr.style.display = show ? "" : "none";
if (show) shown++;
}
count.textContent = `${shown} shown / ${rows.length} total`;
}
input.addEventListener("input", update);
update();
})();
</script>
</body></html>"##,
);
fs::write(output_path, index)
.map_err(|e| format!("Failed to write {:?}: {}", output_path, e))?;
Ok(())
}
#[cfg(feature = "eval")]
fn coref_doc_to_gold_entities(doc: &anno_eval::eval::coref::CorefDocument) -> Vec<anno::Entity> {
use anno::{Entity, EntityCategory, EntityType};
let mut entities: Vec<Entity> = Vec::new();
let mut next_cluster = anno::CanonicalId::ZERO;
for chain in &doc.chains {
let cid = chain.cluster_id.unwrap_or_else(|| {
let id = next_cluster;
next_cluster += 1;
id
});
let label = chain
.entity_type
.as_deref()
.or_else(|| {
chain
.mentions
.first()
.and_then(|m| m.entity_type.as_deref())
})
.unwrap_or("COREF");
for m in &chain.mentions {
let mut e = Entity::new(
m.text.clone(),
EntityType::custom(label, EntityCategory::Misc),
m.start,
m.end,
1.0,
);
e.canonical_id = Some(cid);
entities.push(e);
}
}
entities
}
#[cfg(feature = "eval")]
fn coref_doc_to_oracle_mentions(doc: &anno_eval::eval::coref::CorefDocument) -> Vec<anno::Entity> {
use anno::{Entity, EntityCategory, EntityType};
let mut entities: Vec<Entity> = Vec::new();
for chain in &doc.chains {
let label = chain
.entity_type
.as_deref()
.or_else(|| {
chain
.mentions
.first()
.and_then(|m| m.entity_type.as_deref())
})
.unwrap_or("COREF");
for m in &chain.mentions {
let e = Entity::new(
m.text.clone(),
EntityType::custom(label, EntityCategory::Misc),
m.start,
m.end,
1.0,
);
entities.push(e);
}
}
entities
}
#[cfg(feature = "eval")]
#[allow(clippy::too_many_arguments)]
fn write_coref_error_explorer_html(
output_path: &Path,
dataset_name: &str,
model_name: &str,
resolver_name: &str,
gold_docs: &[anno_eval::eval::coref::CorefDocument],
per_doc: &[anno_eval::eval::coref_metrics::CorefEvaluation],
selected_doc_indices: &[usize],
model: &dyn anno::Model,
resolver: &anno_eval::eval::coref_resolver::SimpleCorefResolver,
coref_oracle_mentions: bool,
) -> Result<(), String> {
let parent = output_path.parent().unwrap_or_else(|| Path::new("."));
let stem = output_path
.file_stem()
.and_then(|s| s.to_str())
.filter(|s| !s.is_empty())
.unwrap_or("coref_eval");
let files_dir = parent.join(format!("{stem}_files"));
fs::create_dir_all(&files_dir)
.map_err(|e| format!("Failed to create {:?}: {}", files_dir, e))?;
for &idx in selected_doc_indices {
let doc = gold_docs
.get(idx)
.ok_or_else(|| format!("Invalid doc index {}", idx))?;
let scores = per_doc
.get(idx)
.ok_or_else(|| format!("Missing per-doc eval for doc {}", idx))?;
let doc_id = doc
.doc_id
.clone()
.unwrap_or_else(|| format!("doc_{:06}", idx));
let gold_entities = coref_doc_to_gold_entities(doc);
let gold_gdoc = GroundedDocument::from_entities(
format!("{dataset_name}:{doc_id}:gold"),
doc.text.clone(),
&gold_entities,
);
let gold_html = render_document_html(&gold_gdoc);
let gold_filename = format!("coref_{:06}_gold.html", idx);
let gold_path = files_dir.join(&gold_filename);
fs::write(&gold_path, gold_html)
.map_err(|e| format!("Failed to write {:?}: {}", gold_path, e))?;
let entities = if coref_oracle_mentions {
coref_doc_to_oracle_mentions(doc)
} else {
model
.extract_entities(doc.text.as_str(), None)
.unwrap_or_default()
};
let resolved = resolver.resolve(&entities);
let pred_gdoc = GroundedDocument::from_entities(
format!("{dataset_name}:{doc_id}:pred"),
doc.text.clone(),
&resolved,
);
let pred_html = render_document_html(&pred_gdoc);
let pred_filename = format!("coref_{:06}_pred.html", idx);
let pred_path = files_dir.join(&pred_filename);
fs::write(&pred_path, pred_html)
.map_err(|e| format!("Failed to write {:?}: {}", pred_path, e))?;
let container_filename = format!("coref_{:06}.html", idx);
let container_path = files_dir.join(&container_filename);
let mut page = String::new();
page.push_str("<!DOCTYPE html><html><head><meta charset=\"UTF-8\"><meta name=\"color-scheme\" content=\"dark light\">");
page.push_str(&format!(
"<title>{}</title>",
html_escape(&format!(
"coref explorer — {dataset_name} — {doc_id} — F1={:.3}",
scores.conll_f1
))
));
page.push_str(
r#"<style>
:root{color-scheme:light dark;--bg:#0a0a0a;--text:#b0b0b0;--text-strong:#fff;--muted:#777;--border:#222;--border-strong:#333;--hover:#111;--panel-bg:#0d0d0d;--link:#9ad}
@media (prefers-color-scheme: light){:root{--bg:#fff;--text:#222;--text-strong:#000;--muted:#555;--border:#d6d6d6;--border-strong:#c6c6c6;--hover:#f0f0f0;--panel-bg:#f7f7f7;--link:#06c}}
html[data-theme='dark']{--bg:#0a0a0a;--text:#b0b0b0;--text-strong:#fff;--muted:#777;--border:#222;--border-strong:#333;--hover:#111;--panel-bg:#0d0d0d;--link:#9ad}
html[data-theme='light']{--bg:#fff;--text:#222;--text-strong:#000;--muted:#555;--border:#d6d6d6;--border-strong:#c6c6c6;--hover:#f0f0f0;--panel-bg:#f7f7f7;--link:#06c}
*{box-sizing:border-box;margin:0;padding:0}
body{font:12px/1.4 monospace;background:var(--bg);color:var(--text);padding:10px}
h1{font-size:14px;color:var(--text-strong);font-weight:normal;border-bottom:1px solid var(--border-strong);padding:4px 0;margin:0 0 10px}
.meta{color:var(--muted);margin:0 0 10px}
.meta b{color:var(--text);font-weight:normal}
.grid{display:grid;grid-template-columns:1fr 1fr;gap:10px}
.panel{border:1px solid var(--border);background:var(--panel-bg)}
.hdr{display:flex;justify-content:space-between;align-items:center;padding:6px 8px;border-bottom:1px solid var(--border)}
.hdr a{color:var(--link);text-decoration:none}
.hdr a:hover{text-decoration:underline}
iframe{width:100%;height:82vh;border:0;background:var(--bg)}
.toggle{cursor:pointer;user-select:none;color:var(--muted);border:1px solid var(--border);background:var(--bg);padding:2px 6px;font-size:10px}
</style></head><body>"#,
);
page.push_str(&format!(
"<div style=\"display:flex;justify-content:space-between;align-items:center\"><h1>coref doc {idx:06} — {}</h1><span class=\"toggle\" id=\"theme-toggle\" title=\"toggle theme (auto → dark → light)\">theme: auto</span></div>",
html_escape(&doc_id)
));
page.push_str(&format!(
"<div class=\"meta\"><b>dataset</b> {} <b>model</b> {} <b>resolver</b> {} <b>CoNLL F1</b> {:.3} <b>MUC</b> {:.3} <b>B³</b> {:.3} <b>CEAF-e</b> {:.3}</div>",
html_escape(dataset_name),
html_escape(model_name),
html_escape(resolver_name),
scores.conll_f1,
scores.muc.f1,
scores.b_cubed.f1,
scores.ceaf_e.f1,
));
page.push_str("<div class=\"grid\">");
page.push_str(&format!(
"<div class=\"panel\"><div class=\"hdr\"><div>gold</div><div><a target=\"_blank\" rel=\"noopener\" href=\"{}\">open</a></div></div><iframe id=\"gold-frame\" src=\"{}\"></iframe></div>",
html_escape(&gold_filename),
html_escape(&gold_filename),
));
page.push_str(&format!(
"<div class=\"panel\"><div class=\"hdr\"><div>predicted</div><div><a target=\"_blank\" rel=\"noopener\" href=\"{}\">open</a></div></div><iframe id=\"pred-frame\" src=\"{}\"></iframe></div>",
html_escape(&pred_filename),
html_escape(&pred_filename),
));
page.push_str("</div>");
page.push_str(
r#"<script>
(() => {
// Theme toggle: auto → dark → light (persisted).
const themeBtn = document.getElementById('theme-toggle');
const themeKey = 'anno-theme';
const applyTheme = (theme) => {
const t = theme || 'auto';
if (t === 'auto') {
delete document.documentElement.dataset.theme;
} else {
document.documentElement.dataset.theme = t;
}
if (themeBtn) themeBtn.textContent = `theme: ${t}`;
};
const readTheme = () => {
try { return localStorage.getItem(themeKey) || 'auto'; } catch (_) { return 'auto'; }
};
const writeTheme = (t) => {
try { localStorage.setItem(themeKey, t); } catch (_) { /* ignore */ }
};
applyTheme(readTheme());
if (themeBtn) {
themeBtn.addEventListener('click', () => {
const cur = readTheme();
const next = cur === 'auto' ? 'dark' : (cur === 'dark' ? 'light' : 'auto');
writeTheme(next);
applyTheme(next);
});
}
const gold = document.getElementById('gold-frame');
const pred = document.getElementById('pred-frame');
if (!gold || !pred) return;
window.addEventListener('message', (ev) => {
const data = ev && ev.data ? ev.data : null;
if (!data || data.type !== 'anno:activate-span') return;
if (ev.source === gold.contentWindow) {
pred.contentWindow && pred.contentWindow.postMessage(data, '*');
} else if (ev.source === pred.contentWindow) {
gold.contentWindow && gold.contentWindow.postMessage(data, '*');
}
});
})();
</script>"#,
);
page.push_str("</body></html>");
fs::write(&container_path, page)
.map_err(|e| format!("Failed to write {:?}: {}", container_path, e))?;
}
let mut index = String::new();
index.push_str("<!DOCTYPE html><html><head><meta charset=\"UTF-8\"><meta name=\"color-scheme\" content=\"dark light\">");
index.push_str(&format!(
"<title>{}</title>",
html_escape(&format!(
"coref eval explorer — {model_name} — {dataset_name}"
))
));
index.push_str(
r##"<style>
:root{color-scheme:light dark;--bg:#0a0a0a;--text:#b0b0b0;--text-strong:#fff;--muted:#777;--border:#222;--border-strong:#333;--hover:#111;--input-bg:#080808;--link:#9ad;--code:#bbb}
@media (prefers-color-scheme: light){:root{--bg:#fff;--text:#222;--text-strong:#000;--muted:#555;--border:#d6d6d6;--border-strong:#c6c6c6;--hover:#f0f0f0;--input-bg:#fff;--link:#06c;--code:#333}}
html[data-theme='dark']{--bg:#0a0a0a;--text:#b0b0b0;--text-strong:#fff;--muted:#777;--border:#222;--border-strong:#333;--hover:#111;--input-bg:#080808;--link:#9ad;--code:#bbb}
html[data-theme='light']{--bg:#fff;--text:#222;--text-strong:#000;--muted:#555;--border:#d6d6d6;--border-strong:#c6c6c6;--hover:#f0f0f0;--input-bg:#fff;--link:#06c;--code:#333}
*{box-sizing:border-box;margin:0;padding:0}
body{font:12px/1.4 monospace;background:var(--bg);color:var(--text);padding:12px}
h1{font-size:14px;color:var(--text-strong);font-weight:normal;border-bottom:1px solid var(--border-strong);padding:4px 0;margin:0 0 12px}
.meta{color:var(--muted);margin:0 0 12px}
.meta b{color:var(--text);font-weight:normal}
.row{display:flex;gap:12px;align-items:center;margin:0 0 10px}
input{flex:1;background:var(--input-bg);border:1px solid var(--border);color:var(--text);padding:6px 8px}
.count{color:var(--muted)}
table{width:100%;border-collapse:collapse;font-size:11px}
th,td{padding:4px 8px;text-align:left;border:1px solid var(--border);vertical-align:top}
th{background:var(--hover);color:var(--muted);font-weight:normal;text-transform:uppercase;font-size:10px}
tr:hover{background:var(--hover)}
a{color:var(--link);text-decoration:none}
a:hover{text-decoration:underline}
.num{text-align:right;font-variant-numeric:tabular-nums}
code{color:var(--code)}
.toggle{cursor:pointer;user-select:none;color:var(--muted);border:1px solid var(--border);background:var(--bg);padding:2px 6px;font-size:10px}
</style></head><body>"##,
);
index.push_str("<div class=\"row\" style=\"justify-content:space-between\"><h1>coref eval explorer</h1><span class=\"toggle\" id=\"theme-toggle\" title=\"toggle theme (auto → dark → light)\">theme: auto</span></div>");
index.push_str(&format!(
"<div class=\"meta\"><b>model</b> {} <b>resolver</b> {} <b>dataset</b> {} <b>docs</b> {}</div>",
html_escape(model_name),
html_escape(resolver_name),
html_escape(dataset_name),
gold_docs.len()
));
index.push_str(
r##"<div class="row">
<input id="doc-filter" placeholder="filter (doc id, script, text…)" />
<div id="doc-count" class="count"></div>
</div>
<table id="doc-table"><thead><tr>
<th>doc</th><th class="num">conll</th><th class="num">muc</th><th class="num">b3</th><th class="num">ceaf</th><th class="num">mentions</th><th class="num">chains</th><th>text</th>
</tr></thead><tbody>"##,
);
for &idx in selected_doc_indices {
let doc = &gold_docs[idx];
let scores = &per_doc[idx];
let doc_id = doc
.doc_id
.clone()
.unwrap_or_else(|| format!("doc_{:06}", idx));
let mention_count = doc.mention_count();
let chain_count = doc.chain_count();
let preview = preview_text(&doc.text, 180);
let href = format!("coref_{:06}.html", idx);
index.push_str(&format!(
"<tr data-hay=\"{hay}\"><td><a target=\"_blank\" rel=\"noopener\" href=\"{href}\">{doc_id}</a></td><td class=\"num\">{conll:.3}</td><td class=\"num\">{muc:.3}</td><td class=\"num\">{b3:.3}</td><td class=\"num\">{ceaf:.3}</td><td class=\"num\">{mentions}</td><td class=\"num\">{chains}</td><td><code>{text}</code></td></tr>",
hay = html_escape(&format!("{} {}", doc_id, preview)).to_lowercase(),
href = html_escape(&href),
doc_id = html_escape(&doc_id),
conll = scores.conll_f1,
muc = scores.muc.f1,
b3 = scores.b_cubed.f1,
ceaf = scores.ceaf_e.f1,
mentions = mention_count,
chains = chain_count,
text = html_escape(&preview),
));
}
index.push_str(
r##"</tbody></table>
<script>
(() => {
// Theme toggle: auto → dark → light (persisted).
const themeBtn = document.getElementById("theme-toggle");
const themeKey = "anno-theme";
const applyTheme = (theme) => {
const t = theme || "auto";
if (t === "auto") {
delete document.documentElement.dataset.theme;
} else {
document.documentElement.dataset.theme = t;
}
if (themeBtn) themeBtn.textContent = `theme: ${t}`;
};
const readTheme = () => {
try { return localStorage.getItem(themeKey) || "auto"; } catch (_) { return "auto"; }
};
const writeTheme = (t) => {
try { localStorage.setItem(themeKey, t); } catch (_) { /* ignore */ }
};
applyTheme(readTheme());
if (themeBtn) {
themeBtn.addEventListener("click", () => {
const cur = readTheme();
const next = cur === "auto" ? "dark" : (cur === "dark" ? "light" : "auto");
writeTheme(next);
applyTheme(next);
});
}
const input = document.getElementById("doc-filter");
const rows = Array.from(document.querySelectorAll("#doc-table tbody tr"));
const count = document.getElementById("doc-count");
function update() {
const q = (input.value || "").toLowerCase().trim();
let shown = 0;
for (const tr of rows) {
const hay = (tr.dataset.hay || "");
const show = !q || hay.includes(q);
tr.style.display = show ? "" : "none";
if (show) shown++;
}
count.textContent = `${shown} shown / ${rows.length} total`;
}
input.addEventListener("input", update);
update();
})();
</script>
</body></html>"##,
);
fs::write(output_path, index)
.map_err(|e| format!("Failed to write {:?}: {}", output_path, e))?;
Ok(())
}
#[cfg(feature = "eval")]
#[derive(Debug, Clone)]
struct RelHtmlSpan {
start: usize,
end: usize,
label: String,
id: String,
class: &'static str,
}
#[cfg(feature = "eval")]
fn extract_span_text(text: &str, start: usize, end: usize) -> String {
let char_count = text.chars().count();
if start >= char_count || end > char_count || start >= end {
return String::new();
}
text.chars().skip(start).take(end - start).collect()
}
#[cfg(feature = "eval")]
fn annotate_text_with_rel_spans(text: &str, spans: &[RelHtmlSpan]) -> String {
let mut sorted = spans.to_vec();
sorted.sort_by_key(|s| (s.start, s.end));
let char_count = text.chars().count();
let mut out = String::new();
let mut last_end = 0usize;
for s in sorted {
let start = s.start;
let end = s.end.min(char_count);
if start < last_end || start >= char_count || start >= end {
continue;
}
if start > last_end {
let before: String = text.chars().skip(last_end).take(start - last_end).collect();
out.push_str(&html_escape(&before));
}
let span_text: String = text.chars().skip(start).take(end - start).collect();
let title = format!("[{}] {}..{}", s.label, start, end);
out.push_str(&format!(
"<span id=\"{id}\" class=\"e {class}\" data-label=\"{label}\" data-start=\"{start}\" data-end=\"{end}\" title=\"{title}\">{txt}</span>",
id = html_escape(&s.id),
class = s.class,
label = html_escape(&s.label),
start = start,
end = end,
title = html_escape(&title),
txt = html_escape(&span_text),
));
last_end = end;
}
if last_end < char_count {
let after: String = text.chars().skip(last_end).collect();
out.push_str(&html_escape(&after));
}
out
}
#[cfg(feature = "eval")]
fn build_rel_spans_from_gold(
text: &str,
gold: &[anno_eval::eval::relation::RelationGold],
prefix: &str,
class: &'static str,
) -> (
Vec<RelHtmlSpan>,
std::collections::HashMap<(usize, usize), String>,
) {
use std::collections::{HashMap, HashSet};
let mut uniq: HashSet<(usize, usize)> = HashSet::new();
for r in gold {
uniq.insert(r.head_span);
uniq.insert(r.tail_span);
}
let mut spans: Vec<(usize, usize)> = uniq.into_iter().collect();
spans.sort_by_key(|(s, e)| (*s, *e));
let mut out = Vec::new();
let mut map: HashMap<(usize, usize), String> = HashMap::new();
for (i, (s, e)) in spans.into_iter().enumerate() {
let id = format!("{prefix}{i}");
let mut label = String::new();
for r in gold {
if r.head_span == (s, e) {
label = r.head_type.clone();
break;
}
if r.tail_span == (s, e) {
label = r.tail_type.clone();
break;
}
}
if label.is_empty() {
label = "ENT".to_string();
}
let _surface = extract_span_text(text, s, e);
out.push(RelHtmlSpan {
start: s,
end: e,
label,
id: id.clone(),
class,
});
map.insert((s, e), id);
}
(out, map)
}
#[cfg(feature = "eval")]
fn build_rel_spans_from_pred(
_text: &str,
pred: &[anno_eval::eval::relation::RelationPrediction],
prefix: &str,
class: &'static str,
) -> (
Vec<RelHtmlSpan>,
std::collections::HashMap<(usize, usize), String>,
) {
use std::collections::{HashMap, HashSet};
let mut uniq: HashSet<(usize, usize)> = HashSet::new();
for r in pred {
uniq.insert(r.head_span);
uniq.insert(r.tail_span);
}
let mut spans: Vec<(usize, usize)> = uniq.into_iter().collect();
spans.sort_by_key(|(s, e)| (*s, *e));
let mut out = Vec::new();
let mut map: HashMap<(usize, usize), String> = HashMap::new();
for (i, (s, e)) in spans.into_iter().enumerate() {
let id = format!("{prefix}{i}");
let mut label = String::new();
for r in pred {
if r.head_span == (s, e) {
label = r.head_type.clone();
break;
}
if r.tail_span == (s, e) {
label = r.tail_type.clone();
break;
}
}
if label.is_empty() {
label = "ENT".to_string();
}
out.push(RelHtmlSpan {
start: s,
end: e,
label,
id: id.clone(),
class,
});
map.insert((s, e), id);
}
(out, map)
}
#[cfg(feature = "eval")]
fn render_relation_doc_html(
dataset_name: &str,
model_name: &str,
doc_id: &str,
text: &str,
gold: &[anno_eval::eval::relation::RelationGold],
pred: &[anno_eval::eval::relation::RelationPrediction],
metrics: &anno_eval::eval::relation::RelationMetrics,
) -> String {
let mut gold_taken = vec![false; gold.len()];
let mut pred_taken = vec![false; pred.len()];
let mut gold_to_pred: Vec<Option<usize>> = vec![None; gold.len()];
let mut pred_to_gold: Vec<Option<usize>> = vec![None; pred.len()];
for (pi, p) in pred.iter().enumerate() {
if pred_taken[pi] {
continue;
}
for (gi, g) in gold.iter().enumerate() {
if gold_taken[gi] {
continue;
}
if p.relation_type.to_lowercase() != g.relation_type.to_lowercase() {
continue;
}
let forward = p.head_span == g.head_span && p.tail_span == g.tail_span;
if forward {
gold_taken[gi] = true;
pred_taken[pi] = true;
gold_to_pred[gi] = Some(pi);
pred_to_gold[pi] = Some(gi);
break;
}
}
}
let (gold_spans, gold_id) = build_rel_spans_from_gold(text, gold, "G", "e-gold");
let (pred_spans, pred_id) = build_rel_spans_from_pred(text, pred, "P", "e-pred");
let mut html = String::new();
html.push_str("<!DOCTYPE html><html><head><meta charset=\"UTF-8\"><meta name=\"color-scheme\" content=\"dark light\">");
html.push_str(&format!(
"<title>{}</title>",
html_escape(&format!("relation explorer — {dataset_name} — {doc_id}"))
));
html.push_str(
r##"<style>
:root{
color-scheme: light dark;
--bg:#0a0a0a;
--panel-bg:#0d0d0d;
--text:#b0b0b0;
--text-strong:#fff;
--muted:#777;
--border:#222;
--border-strong:#333;
--hover:#111;
--input-bg:#080808;
--active:#ddd;
--gold-bg:#1a2e1a; --gold-br:#4a8a4a; --gold-tx:#88cc88;
--pred-bg:#1a1a2e; --pred-br:#4a4a8a; --pred-tx:#8888cc;
--fn-bg:#2a1010;
--fp-bg:#2a1c10;
--head:#ffcc66;
--tail:#66ccff;
}
@media (prefers-color-scheme: light){
:root{
--bg:#ffffff;
--panel-bg:#f7f7f7;
--text:#222;
--text-strong:#000;
--muted:#555;
--border:#d6d6d6;
--border-strong:#c6c6c6;
--hover:#f0f0f0;
--input-bg:#ffffff;
--active:#000;
--gold-bg:#e9f7e9; --gold-br:#2f8a2f; --gold-tx:#1f5a1f;
--pred-bg:#e9e9ff; --pred-br:#6c6cff; --pred-tx:#2b2b7a;
--fn-bg:#ffe9e9;
--fp-bg:#fff2df;
--head:#a05a00;
--tail:#0a5a8a;
}
}
html[data-theme='dark']{
--bg:#0a0a0a; --panel-bg:#0d0d0d; --text:#b0b0b0; --text-strong:#fff;
--muted:#777; --border:#222; --border-strong:#333; --hover:#111; --input-bg:#080808; --active:#ddd;
--gold-bg:#1a2e1a; --gold-br:#4a8a4a; --gold-tx:#88cc88;
--pred-bg:#1a1a2e; --pred-br:#4a4a8a; --pred-tx:#8888cc;
--fn-bg:#2a1010; --fp-bg:#2a1c10; --head:#ffcc66; --tail:#66ccff;
}
html[data-theme='light']{
--bg:#ffffff; --panel-bg:#f7f7f7; --text:#222; --text-strong:#000;
--muted:#555; --border:#d6d6d6; --border-strong:#c6c6c6; --hover:#f0f0f0; --input-bg:#ffffff; --active:#000;
--gold-bg:#e9f7e9; --gold-br:#2f8a2f; --gold-tx:#1f5a1f;
--pred-bg:#e9e9ff; --pred-br:#6c6cff; --pred-tx:#2b2b7a;
--fn-bg:#ffe9e9; --fp-bg:#fff2df; --head:#a05a00; --tail:#0a5a8a;
}
*{box-sizing:border-box;margin:0;padding:0}
body{font:12px/1.4 monospace;background:var(--bg);color:var(--text);padding:10px}
h1,h2{color:var(--text-strong);font-weight:normal;border-bottom:1px solid var(--border-strong);padding:4px 0;margin:12px 0 8px}
h1{font-size:14px}h2{font-size:12px}
.meta{color:var(--muted);margin:0 0 10px}
.meta b{color:var(--text);font-weight:normal}
.grid{display:grid;grid-template-columns:1fr 1fr;gap:10px}
.panel{border:1px solid var(--border);background:var(--panel-bg);padding:8px}
.text{background:var(--input-bg);border:1px solid var(--border);padding:8px;white-space:pre-wrap;word-break:break-word;line-height:1.6;min-height:110px}
table{width:100%;border-collapse:collapse;font-size:11px;margin:6px 0 0}
th,td{padding:4px 8px;text-align:left;border:1px solid var(--border);vertical-align:top}
th{background:var(--hover);color:var(--muted);font-weight:normal;text-transform:uppercase;font-size:10px}
tr:hover{background:var(--hover)}
.match-ok{opacity:0.55}
.match-fn{background:var(--fn-bg)}
.match-fp{background:var(--fp-bg)}
.e{padding:1px 2px;border-bottom:2px solid}
.seg{cursor:pointer}
.e-gold{background:var(--gold-bg);border-color:var(--gold-br);color:var(--gold-tx)}
.e-pred{background:var(--pred-bg);border-color:var(--pred-br);color:var(--pred-tx)}
.e-head{outline:2px solid var(--head);outline-offset:1px}
.e-tail{outline:2px solid var(--tail);outline-offset:1px}
.row-active{outline:1px solid var(--muted)}
.sel{color:var(--muted);margin:6px 0 12px}
.toggle{cursor:pointer;user-select:none;color:var(--muted);border:1px solid var(--border);background:var(--bg);padding:2px 6px;font-size:10px}
</style></head><body>"##,
);
html.push_str(&format!(
"<div style=\"display:flex;justify-content:space-between;align-items:center\"><h1>relation doc {}</h1><span class=\"toggle\" id=\"theme-toggle\" title=\"toggle theme (auto → dark → light)\">theme: auto</span></div>",
html_escape(doc_id)
));
html.push_str(&format!(
"<div class=\"meta\"><b>dataset</b> {} <b>model</b> {} <b>gold</b> {} <b>pred</b> {} <b>strict F1</b> {:.3} <b>boundary F1</b> {:.3}</div>",
html_escape(dataset_name),
html_escape(model_name),
gold.len(),
pred.len(),
metrics.strict_f1,
metrics.boundary_f1
));
html.push_str("<div id=\"selection\" class=\"sel\">click a relation row to highlight head/tail spans</div>");
html.push_str("<div class=\"sel\"><label><input type=\"checkbox\" id=\"only-errors\" /> only errors</label></div>");
html.push_str("<div class=\"grid\">");
html.push_str("<div class=\"panel\"><h2>gold</h2>");
html.push_str("<div class=\"text\">");
html.push_str(&annotate_text_with_rel_spans(text, &gold_spans));
html.push_str("</div>");
html.push_str("<table><tr><th>rel</th><th>head</th><th>tail</th></tr>");
for (i, r) in gold.iter().enumerate() {
let hid = gold_id.get(&r.head_span).cloned().unwrap_or_default();
let tid = gold_id.get(&r.tail_span).cloned().unwrap_or_default();
let (row_class, peer_attr) = if let Some(pi) = gold_to_pred.get(i).and_then(|x| *x) {
("match-ok", format!(" data-peer=\"RP{}\"", pi))
} else {
("match-fn", String::new())
};
html.push_str(&format!(
"<tr id=\"RG{i}\" class=\"rel-row {row_class}\" data-side=\"gold\" data-hid=\"{hid}\" data-tid=\"{tid}\" data-rel=\"{rel}\"{peer}><td><a class=\"rel-link\" href=\"#RG{i}\">{rel}</a></td><td>[{ht}] {hx}</td><td>[{tt}] {tx}</td></tr>",
i = i,
hid = html_escape(&hid),
tid = html_escape(&tid),
rel = html_escape(&r.relation_type),
ht = html_escape(&r.head_type),
hx = html_escape(&r.head_text),
tt = html_escape(&r.tail_type),
tx = html_escape(&r.tail_text),
row_class = row_class,
peer = peer_attr,
));
}
html.push_str("</table></div>");
html.push_str("<div class=\"panel\"><h2>predicted</h2>");
html.push_str("<div class=\"text\">");
html.push_str(&annotate_text_with_rel_spans(text, &pred_spans));
html.push_str("</div>");
html.push_str("<table><tr><th>rel</th><th>head</th><th>tail</th><th>conf</th></tr>");
for (i, r) in pred.iter().enumerate() {
let hid = pred_id.get(&r.head_span).cloned().unwrap_or_default();
let tid = pred_id.get(&r.tail_span).cloned().unwrap_or_default();
let head_txt = extract_span_text(text, r.head_span.0, r.head_span.1);
let tail_txt = extract_span_text(text, r.tail_span.0, r.tail_span.1);
let (row_class, peer_attr) = if let Some(gi) = pred_to_gold.get(i).and_then(|x| *x) {
("match-ok", format!(" data-peer=\"RG{}\"", gi))
} else {
("match-fp", String::new())
};
html.push_str(&format!(
"<tr id=\"RP{i}\" class=\"rel-row {row_class}\" data-side=\"pred\" data-hid=\"{hid}\" data-tid=\"{tid}\" data-rel=\"{rel}\"{peer}><td><a class=\"rel-link\" href=\"#RP{i}\">{rel}</a></td><td>[{ht}] {hx}</td><td>[{tt}] {tx}</td><td>{conf:.2}</td></tr>",
i = i,
hid = html_escape(&hid),
tid = html_escape(&tid),
rel = html_escape(&r.relation_type),
ht = html_escape(&r.head_type),
hx = html_escape(&head_txt),
tt = html_escape(&r.tail_type),
tx = html_escape(&tail_txt),
conf = r.confidence as f64,
row_class = row_class,
peer = peer_attr,
));
}
html.push_str("</table></div>");
html.push_str("</div>");
html.push_str(
r##"<script>
(() => {
// Theme toggle: auto → dark → light (persisted).
const themeBtn = document.getElementById('theme-toggle');
const themeKey = 'anno-theme';
const applyTheme = (theme) => {
const t = theme || 'auto';
if (t === 'auto') {
delete document.documentElement.dataset.theme;
} else {
document.documentElement.dataset.theme = t;
}
if (themeBtn) themeBtn.textContent = `theme: ${t}`;
};
const readTheme = () => {
try { return localStorage.getItem(themeKey) || 'auto'; } catch (_) { return 'auto'; }
};
const writeTheme = (t) => {
try { localStorage.setItem(themeKey, t); } catch (_) { /* ignore */ }
};
applyTheme(readTheme());
if (themeBtn) {
themeBtn.addEventListener('click', () => {
const cur = readTheme();
const next = cur === 'auto' ? 'dark' : (cur === 'dark' ? 'light' : 'auto');
writeTheme(next);
applyTheme(next);
});
}
function clearActive() {
document.querySelectorAll(".e-head").forEach((el) => el.classList.remove("e-head"));
document.querySelectorAll(".e-tail").forEach((el) => el.classList.remove("e-tail"));
document.querySelectorAll("tr.rel-row.row-active").forEach((el) => el.classList.remove("row-active"));
}
function activate(row) {
clearActive();
if (!row) return;
row.classList.add("row-active");
const hid = row.dataset.hid;
const tid = row.dataset.tid;
const rel = row.dataset.rel || "";
const sel = document.getElementById("selection");
const head = hid ? document.getElementById(hid) : null;
const tail = tid ? document.getElementById(tid) : null;
if (head) head.classList.add("e-head");
if (tail) tail.classList.add("e-tail");
// Also highlight matching spans (same start/end) in the opposite panel.
const peerClass = (row.dataset.side === 'gold') ? 'e-pred' : 'e-gold';
const highlightPeer = (el, cls) => {
if (!el) return;
const s = el.getAttribute('data-start');
const e = el.getAttribute('data-end');
if (s === null || e === null) return;
document.querySelectorAll(`span.${peerClass}[data-start='${s}'][data-end='${e}']`).forEach((p) => p.classList.add(cls));
};
highlightPeer(head, "e-head");
highlightPeer(tail, "e-tail");
// Also highlight the matched peer row (if any).
const peerId = row.dataset.peer;
if (peerId) {
const peerRow = document.getElementById(peerId);
if (peerRow) peerRow.classList.add("row-active");
}
if (sel) {
const parts = [];
parts.push(`${row.dataset.side} ${row.id}`);
if (rel) parts.push(`rel=${rel}`);
if (head) parts.push(`head=${hid}${head.dataset.label ? ' [' + head.dataset.label + ']' : ''}`);
if (tail) parts.push(`tail=${tid}${tail.dataset.label ? ' [' + tail.dataset.label + ']' : ''}`);
sel.textContent = parts.join(" | ");
}
if (row.id) history.replaceState(null, "", '#' + row.id);
const target = head || tail || row;
if (target) target.scrollIntoView({ behavior: "smooth", block: "center" });
}
document.querySelectorAll("tr.rel-row").forEach((tr) => {
tr.addEventListener("click", () => activate(tr));
});
document.querySelectorAll("a.rel-link").forEach((a) => {
a.addEventListener("click", (ev) => {
ev.preventDefault();
const tr = a.closest("tr.rel-row");
if (tr) activate(tr);
});
});
const hash = (location.hash || '').slice(1);
if (hash && (hash.startsWith('RG') || hash.startsWith('RP'))) {
const tr = document.getElementById(hash);
if (tr && tr.classList && tr.classList.contains('rel-row')) activate(tr);
}
// Toggle: show only errors (hide matched rows).
const only = document.getElementById('only-errors');
if (only) {
const update = () => {
const hideMatched = !!only.checked;
document.querySelectorAll('tr.rel-row.match-ok').forEach((tr) => {
tr.style.display = hideMatched ? 'none' : '';
});
};
only.addEventListener('change', update);
update();
}
})();
</script>"##,
);
html.push_str("</body></html>");
html
}
#[cfg(feature = "eval")]
fn write_relation_error_explorer_html(
output_path: &Path,
dataset_name: &str,
model_name: &str,
docs: &[anno_eval::eval::loader::RelationDocument],
pred_by_doc: &[Vec<anno_eval::eval::relation::RelationPrediction>],
per_doc: &[anno_eval::eval::relation::RelationMetrics],
selected_doc_indices: &[usize],
) -> Result<(), String> {
let parent = output_path.parent().unwrap_or_else(|| Path::new("."));
let stem = output_path
.file_stem()
.and_then(|s| s.to_str())
.filter(|s| !s.is_empty())
.unwrap_or("relation_eval");
let files_dir = parent.join(format!("{stem}_files"));
fs::create_dir_all(&files_dir)
.map_err(|e| format!("Failed to create {:?}: {}", files_dir, e))?;
for &idx in selected_doc_indices {
let doc = docs
.get(idx)
.ok_or_else(|| format!("Invalid doc index {}", idx))?;
let pred = pred_by_doc
.get(idx)
.ok_or_else(|| format!("Missing pred for doc {}", idx))?;
let m = per_doc
.get(idx)
.ok_or_else(|| format!("Missing metrics for doc {}", idx))?;
let doc_id = format!("doc_{:06}", idx);
let page = render_relation_doc_html(
dataset_name,
model_name,
&doc_id,
&doc.text,
&doc.relations,
pred,
m,
);
let filename = format!("rel_{:06}.html", idx);
let path = files_dir.join(&filename);
fs::write(&path, page).map_err(|e| format!("Failed to write {:?}: {}", path, e))?;
}
let mut index = String::new();
index.push_str("<!DOCTYPE html><html><head><meta charset=\"UTF-8\"><meta name=\"color-scheme\" content=\"dark light\">");
index.push_str(&format!(
"<title>{}</title>",
html_escape(&format!(
"relation eval explorer — {model_name} — {dataset_name}"
))
));
index.push_str(
r##"<style>
:root{color-scheme:light dark;--bg:#0a0a0a;--text:#b0b0b0;--text-strong:#fff;--muted:#777;--border:#222;--border-strong:#333;--hover:#111;--input-bg:#080808;--link:#9ad;--code:#bbb}
@media (prefers-color-scheme: light){:root{--bg:#fff;--text:#222;--text-strong:#000;--muted:#555;--border:#d6d6d6;--border-strong:#c6c6c6;--hover:#f0f0f0;--input-bg:#fff;--link:#06c;--code:#333}}
html[data-theme='dark']{--bg:#0a0a0a;--text:#b0b0b0;--text-strong:#fff;--muted:#777;--border:#222;--border-strong:#333;--hover:#111;--input-bg:#080808;--link:#9ad;--code:#bbb}
html[data-theme='light']{--bg:#fff;--text:#222;--text-strong:#000;--muted:#555;--border:#d6d6d6;--border-strong:#c6c6c6;--hover:#f0f0f0;--input-bg:#fff;--link:#06c;--code:#333}
*{box-sizing:border-box;margin:0;padding:0}
body{font:12px/1.4 monospace;background:var(--bg);color:var(--text);padding:12px}
h1{font-size:14px;color:var(--text-strong);font-weight:normal;border-bottom:1px solid var(--border-strong);padding:4px 0;margin:0 0 12px}
.meta{color:var(--muted);margin:0 0 12px}
.meta b{color:var(--text);font-weight:normal}
.row{display:flex;gap:12px;align-items:center;margin:0 0 10px}
input{flex:1;background:var(--input-bg);border:1px solid var(--border);color:var(--text);padding:6px 8px}
.count{color:var(--muted)}
table{width:100%;border-collapse:collapse;font-size:11px}
th,td{padding:4px 8px;text-align:left;border:1px solid var(--border);vertical-align:top}
th{background:var(--hover);color:var(--muted);font-weight:normal;text-transform:uppercase;font-size:10px}
tr:hover{background:var(--hover)}
a{color:var(--link);text-decoration:none}
a:hover{text-decoration:underline}
.num{text-align:right;font-variant-numeric:tabular-nums}
code{color:var(--code)}
.toggle{cursor:pointer;user-select:none;color:var(--muted);border:1px solid var(--border);background:var(--bg);padding:2px 6px;font-size:10px}
</style></head><body>"##,
);
index.push_str("<div class=\"row\" style=\"justify-content:space-between\"><h1>relation eval explorer</h1><span class=\"toggle\" id=\"theme-toggle\" title=\"toggle theme (auto → dark → light)\">theme: auto</span></div>");
index.push_str(&format!(
"<div class=\"meta\"><b>model</b> {} <b>dataset</b> {} <b>docs</b> {}</div>",
html_escape(model_name),
html_escape(dataset_name),
docs.len()
));
index.push_str(
r##"<div class="row">
<input id="doc-filter" placeholder="filter (doc id, relation type, text…)" />
<div id="doc-count" class="count"></div>
</div>
<table id="doc-table"><thead><tr>
<th>doc</th><th class="num">strict f1</th><th class="num">bound f1</th><th class="num">gold</th><th class="num">pred</th><th class="num">strict ✓</th><th>text</th>
</tr></thead><tbody>"##,
);
for &idx in selected_doc_indices {
let doc_id = format!("doc_{:06}", idx);
let mut href = format!("{stem}_files/rel_{:06}.html", idx);
{
let doc = &docs[idx];
let pred = &pred_by_doc[idx];
let mut gold_taken = vec![false; doc.relations.len()];
let mut pred_taken = vec![false; pred.len()];
let mut gold_to_pred: Vec<Option<usize>> = vec![None; doc.relations.len()];
let mut pred_to_gold: Vec<Option<usize>> = vec![None; pred.len()];
for (pi, p) in pred.iter().enumerate() {
if pred_taken[pi] {
continue;
}
for (gi, g) in doc.relations.iter().enumerate() {
if gold_taken[gi] {
continue;
}
if p.relation_type.to_lowercase() != g.relation_type.to_lowercase() {
continue;
}
let forward = p.head_span == g.head_span && p.tail_span == g.tail_span;
if forward {
gold_taken[gi] = true;
pred_taken[pi] = true;
gold_to_pred[gi] = Some(pi);
pred_to_gold[pi] = Some(gi);
break;
}
}
}
if let Some((gi, _)) = gold_to_pred.iter().enumerate().find(|(_, m)| m.is_none()) {
href.push_str(&format!("#RG{}", gi));
} else if let Some((pi, _)) = pred_to_gold.iter().enumerate().find(|(_, m)| m.is_none())
{
href.push_str(&format!("#RP{}", pi));
}
}
let doc = &docs[idx];
let m = &per_doc[idx];
let preview = preview_text(&doc.text, 180);
index.push_str(&format!(
"<tr data-hay=\"{hay}\"><td><a target=\"_blank\" rel=\"noopener\" href=\"{href}\">{doc_id}</a></td><td class=\"num\">{sf1:.3}</td><td class=\"num\">{bf1:.3}</td><td class=\"num\">{g}</td><td class=\"num\">{p}</td><td class=\"num\">{ok}</td><td><code>{txt}</code></td></tr>",
hay = html_escape(&format!("{} {} {}", doc_id, preview, dataset_name)).to_lowercase(),
href = html_escape(&href),
doc_id = html_escape(&doc_id),
sf1 = m.strict_f1,
bf1 = m.boundary_f1,
g = doc.relations.len(),
p = pred_by_doc[idx].len(),
ok = m.strict_matches,
txt = html_escape(&preview),
));
}
index.push_str(
r##"</tbody></table>
<script>
(() => {
// Theme toggle: auto → dark → light (persisted).
const themeBtn = document.getElementById("theme-toggle");
const themeKey = "anno-theme";
const applyTheme = (theme) => {
const t = theme || "auto";
if (t === "auto") {
delete document.documentElement.dataset.theme;
} else {
document.documentElement.dataset.theme = t;
}
if (themeBtn) themeBtn.textContent = `theme: ${t}`;
};
const readTheme = () => {
try { return localStorage.getItem(themeKey) || "auto"; } catch (_) { return "auto"; }
};
const writeTheme = (t) => {
try { localStorage.setItem(themeKey, t); } catch (_) { /* ignore */ }
};
applyTheme(readTheme());
if (themeBtn) {
themeBtn.addEventListener("click", () => {
const cur = readTheme();
const next = cur === "auto" ? "dark" : (cur === "dark" ? "light" : "auto");
writeTheme(next);
applyTheme(next);
});
}
const input = document.getElementById("doc-filter");
const rows = Array.from(document.querySelectorAll("#doc-table tbody tr"));
const count = document.getElementById("doc-count");
function update() {
const q = (input.value || "").toLowerCase().trim();
let shown = 0;
for (const tr of rows) {
const hay = (tr.dataset.hay || "");
const show = !q || hay.includes(q);
tr.style.display = show ? "" : "none";
if (show) shown++;
}
count.textContent = `${shown} shown / ${rows.length} total`;
}
input.addEventListener("input", update);
update();
})();
</script>
</body></html>"##,
);
fs::write(output_path, index)
.map_err(|e| format!("Failed to write {:?}: {}", output_path, e))?;
Ok(())
}
fn run_info(dataset: &str) -> Result<(), String> {
println!();
#[cfg(feature = "eval")]
{
use anno_eval::eval::dataset_registry::DatasetId as RegistryDatasetId;
use anno_eval::eval::loader::LoadableDatasetId;
let registry_match = RegistryDatasetId::all()
.iter()
.find(|d| {
d.name().eq_ignore_ascii_case(dataset)
|| format!("{:?}", d).eq_ignore_ascii_case(dataset)
})
.copied();
let loadable_match = registry_match.and_then(|rid| LoadableDatasetId::try_from(rid).ok());
if let Some(registry_id) = registry_match {
println!(
"{}",
color("1;36", &format!("Dataset: {}", registry_id.name()))
);
println!();
println!(" Description: {}", registry_id.description());
println!(" Language: {}", registry_id.language());
println!(" Domain: {}", registry_id.domain());
if let Some(year) = registry_id.year() {
println!(" Year: {}", year);
}
if let Some(citation) = registry_id.citation() {
println!(" Citation: {}", citation);
}
if let Some(license) = registry_id.license() {
println!(" License: {}", license);
}
if let Some(paper_url) = registry_id.paper_url() {
println!(" Paper: {}", paper_url);
}
if let Some(size_hint) = registry_id.size_hint() {
println!(" Size: {}", size_hint);
}
let entity_types = registry_id.entity_types();
if !entity_types.is_empty() {
println!(" Entity types: {}", entity_types.join(", "));
}
println!();
println!(" Tasks:");
if registry_id.is_ner() {
println!(" - Named Entity Recognition");
}
if registry_id.is_coreference() {
println!(" - Coreference Resolution");
}
if registry_id.is_biomedical() {
println!(" (Biomedical domain)");
}
println!();
if loadable_match.is_some() {
println!(
" Status: {} (can be downloaded)",
color("1;32", "Loadable")
);
#[cfg(feature = "eval")]
{
use anno_eval::eval::loader::DatasetLoader;
if let Some(loadable_id) = loadable_match {
let loader = DatasetLoader::new()
.map_err(|e| format!("Failed to create loader: {}", e))?;
match loader.load(loadable_id) {
Ok(loaded) => {
let stats = loaded.stats();
println!();
println!(" Loaded Statistics:");
println!(" Sentences: {}", stats.sentences);
println!(" Tokens: {}", stats.tokens);
println!(" Entities: {}", stats.entities);
if stats.sentences > 0 {
println!(
" Avg entities/sentence: {:.2}",
stats.entities as f64 / stats.sentences as f64
);
}
}
Err(e) => {
println!(" (Could not load: {})", e);
}
}
}
}
} else {
let access_status = registry_id.access_status();
println!(
" Status: {} ({})",
color("1;33", "Not loadable"),
access_status.as_str()
);
}
} else {
return Err(format!(
"Unknown dataset '{}'. Use 'anno dataset list' to see available datasets.",
dataset
));
}
}
#[cfg(not(feature = "eval"))]
{
println!("Dataset: {}", dataset);
println!();
println!("Note: Full dataset info requires --features eval");
}
println!();
Ok(())
}
fn run_list(
task_filter: Option<String>,
domain_filter: Option<String>,
loadable_only: bool,
verbose: bool,
) -> Result<(), String> {
println!();
println!("{}", color("1;36", "Available Datasets"));
println!();
#[cfg(feature = "eval")]
{
use anno_eval::eval::dataset_registry::DatasetId as RegistryDatasetId;
use anno_eval::eval::loader::LoadableDatasetId;
use anno_eval::eval::task_mapping::Task;
type TaskPredicate = Box<dyn Fn(&[Task]) -> bool>;
fn task_predicate(task_raw: &str) -> Result<TaskPredicate, String> {
let t = task_raw.trim().to_lowercase();
match t.as_str() {
"ner" | "sequence_labeling" | "nested-ner" | "mner" | "pii_detection"
| "slot_filling" => Ok(Box::new(|tasks| tasks.contains(&Task::NER))),
"coref" => Ok(Box::new(|tasks| tasks.iter().any(|x| x.is_coref_family()))),
"intra-coref" | "intra_coref" | "intracoref" => {
Ok(Box::new(|tasks| tasks.contains(&Task::IntraDocCoref)))
}
"inter-coref" | "inter_coref" | "intercoref" | "cdcr" | "coalesce"
| "event_coref" => Ok(Box::new(|tasks| tasks.contains(&Task::InterDocCoref))),
"re" | "rel" | "relation" | "relation_extraction" | "relation-extraction" => Ok(
Box::new(|tasks| tasks.contains(&Task::RelationExtraction)),
),
"el" | "ned" | "entity_linking" | "entity-linking" => {
Ok(Box::new(|tasks| tasks.contains(&Task::NED)))
}
_ => Err(format!(
"Unknown --task '{}'. Expected one of: ner, coref, intra-coref, inter-coref, re, el",
task_raw
)),
}
}
if loadable_only {
let task_pred = match task_filter.as_deref() {
Some(t) => Some(task_predicate(t)?),
None => None,
};
let loadable_datasets: Vec<RegistryDatasetId> = RegistryDatasetId::all()
.iter()
.copied()
.filter(|id| LoadableDatasetId::try_from(*id).is_ok())
.collect();
println!(
" {} loadable datasets (can be downloaded and parsed):",
loadable_datasets.len()
);
println!();
for id in loadable_datasets {
let name = id.name();
if let Some(ref task_pred) = task_pred {
let tasks = id.tasks_typed();
if !task_pred(&tasks) {
continue;
}
}
if verbose {
let citation = id.citation().unwrap_or("N/A");
let license = id.license().unwrap_or("Unknown");
let year = id
.year()
.map(|y| y.to_string())
.unwrap_or_else(|| "N/A".to_string());
println!(" {:<20} [{:>4}] {} ({})", name, year, citation, license);
} else {
println!(" {}", name);
}
}
} else {
let all_datasets: Vec<_> = RegistryDatasetId::all().iter().collect();
let loadable_count = RegistryDatasetId::all()
.iter()
.copied()
.filter(|id| LoadableDatasetId::try_from(*id).is_ok())
.count();
let automatable_count = RegistryDatasetId::all()
.iter()
.copied()
.filter(|id| id.is_automatable_download())
.count();
println!(
" {} datasets in registry ({} loadable):",
all_datasets.len(),
loadable_count
);
println!();
if domain_filter.is_none() && task_filter.is_none() {
let ner_count = all_datasets.iter().filter(|d| d.is_ner()).count();
let coref_count = all_datasets.iter().filter(|d| d.is_coreference()).count();
let bio_count = all_datasets.iter().filter(|d| d.is_biomedical()).count();
println!(" NER datasets: {}", ner_count);
println!(" Coreference datasets: {}", coref_count);
println!(" Biomedical datasets: {}", bio_count);
println!(" Automatable downloads: {}", automatable_count);
println!();
println!(" Use --loadable to see only datasets with loader implementations");
println!(" Use --task ner/coref/re/el to filter by task");
println!(" Use --verbose for more details");
} else {
let task_pred = match task_filter.as_deref() {
Some(t) => Some(task_predicate(t)?),
None => None,
};
for dataset in &all_datasets {
if let Some(ref task_pred) = task_pred {
let tasks = dataset.tasks_typed();
if !task_pred(&tasks) {
continue;
}
}
if let Some(ref domain) = domain_filter {
let dataset_domain = dataset.domain().to_lowercase();
if !dataset_domain.contains(&domain.to_lowercase()) {
continue;
}
}
if verbose {
let citation = dataset.citation().unwrap_or("N/A");
let year = dataset
.year()
.map(|y| y.to_string())
.unwrap_or_else(|| "----".to_string());
println!(" {:<25} [{:>4}] {}", dataset.name(), year, citation);
} else {
println!(" {}", dataset.name());
}
}
}
}
}
#[cfg(not(feature = "eval"))]
{
let _ = (task_filter, domain_filter, loadable_only, verbose);
println!(" Synthetic (always available):");
println!(" - synthetic : Generated test cases");
println!(" - robustness : Adversarial perturbations");
println!();
println!(" Note: Full dataset catalog requires --features eval");
}
println!();
Ok(())
}
#[cfg(feature = "eval")]
fn run_check(issues_only: bool, dataset: Option<&str>, _fix: bool) -> Result<(), String> {
use anno_eval::eval::dataset_registry::DatasetId as RegistryDatasetId;
use anno_eval::eval::loader::LoadableDatasetId;
println!();
println!("{}", color("1;36", "Dataset Metadata Check"));
println!();
let errors: Vec<String> = Vec::new();
let mut warnings: Vec<String> = Vec::new();
let mut info: Vec<String> = Vec::new();
let datasets_to_check: Vec<RegistryDatasetId> = if let Some(ds_name) = dataset {
RegistryDatasetId::all()
.iter()
.find(|d| {
d.name().eq_ignore_ascii_case(ds_name)
|| format!("{:?}", d).eq_ignore_ascii_case(ds_name)
})
.copied()
.map(|d| vec![d])
.ok_or_else(|| format!("Dataset '{}' not found in registry", ds_name))?
} else {
RegistryDatasetId::all().to_vec()
};
for registry_id in &datasets_to_check {
let is_loadable = LoadableDatasetId::try_from(*registry_id).is_ok();
let url = registry_id.download_url();
if url.is_empty() {
let access_status = registry_id.access_status();
if access_status == anno_eval::eval::dataset_registry::DatasetAccessibility::Local {
} else if access_status.is_automatable() {
warnings.push(format!(
"{}: Missing download URL but marked as automatable ({})",
registry_id.name(),
access_status.as_str()
));
} else {
info.push(format!(
"{}: No URL (requires {})",
registry_id.name(),
access_status.as_str()
));
}
}
let entity_types = registry_id.entity_types();
if registry_id.supports_ner() && entity_types.is_empty() {
warnings.push(format!("{}: Missing entity_types", registry_id.name()));
} else if registry_id.supports_ner() && entity_types == ["ENTITY"] {
warnings.push(format!(
"{}: Using generic entity type 'ENTITY' (should specify actual types)",
registry_id.name()
));
}
let domain = registry_id.domain();
if domain == "general" && !registry_id.is_multilingual() {
warnings.push(format!(
"{}: Using generic domain 'general' (should specify actual domain)",
registry_id.name()
));
}
let has_loadable_task =
registry_id.supports_ner() || registry_id.supports_coref() || registry_id.supports_re();
if registry_id.access_status().is_automatable()
&& registry_id.is_automatable_download()
&& !is_loadable
&& has_loadable_task
{
warnings.push(format!(
"{}: Automatable access_status ({}) but not loadable (missing loader impl)",
registry_id.name(),
registry_id.access_status().as_str()
));
}
}
if !issues_only {
println!("Checked {} datasets", datasets_to_check.len());
println!();
}
if !errors.is_empty() {
println!("{} {} Errors:", color("31", "✗"), errors.len());
for err in &errors {
println!(" {}", err);
}
println!();
}
if !warnings.is_empty() {
println!("{} {} Warnings:", color("33", "!"), warnings.len());
for warn in &warnings {
println!(" {}", warn);
}
println!();
}
if !issues_only && !info.is_empty() {
println!("{} {} Info:", color("36", "i"), info.len());
for msg in &info {
println!(" {}", msg);
}
println!();
}
if !issues_only {
let registry_count = RegistryDatasetId::all().len();
let loadable_count = LoadableDatasetId::all().len();
let with_urls = datasets_to_check
.iter()
.filter(|d| !d.download_url().is_empty())
.count();
let with_entity_types = datasets_to_check
.iter()
.filter(|d| {
let types = d.entity_types();
!types.is_empty() && types != ["ENTITY"]
})
.count();
println!("Statistics:");
println!(" Registry datasets: {}", registry_count);
println!(" Loadable datasets: {}", loadable_count);
println!(" With download URLs: {}", with_urls);
println!(" With entity types: {}", with_entity_types);
println!();
}
if !errors.is_empty() {
return Err(format!("Found {} errors", errors.len()));
}
if issues_only && warnings.is_empty() && errors.is_empty() {
println!("{} No issues found", color("32", "✓"));
} else if !issues_only && errors.is_empty() && warnings.is_empty() {
println!("{} All checks passed", color("32", "✓"));
}
Ok(())
}
#[cfg(not(feature = "eval"))]
fn run_check(_issues_only: bool, _dataset: Option<&str>, _fix: bool) -> Result<(), String> {
println!("Dataset checking requires --features eval");
Ok(())
}
#[cfg(feature = "eval")]
fn run_check_health(
dataset: Option<&str>,
all: bool,
relaxed: bool,
verbose: bool,
workers: usize,
timeout: u64,
) -> Result<(), String> {
use anno_eval::eval::dataset_registry::DatasetId;
use std::sync::mpsc;
use std::thread;
println!();
println!("{}", color("1;36", "Dataset URL Health Check"));
println!();
anno::env::load_dotenv();
let allow_manual = matches!(
std::env::var("ANNO_DATASET_ALLOW_MANUAL").as_deref(),
Ok("1") | Ok("true") | Ok("yes")
);
let datasets_to_check: Vec<DatasetId> = if let Some(ds_name) = dataset {
ds_name
.parse::<DatasetId>()
.map(|id| vec![id])
.map_err(|e| format!("Invalid dataset '{}': {}", ds_name, e))?
} else {
let total = DatasetId::all().len();
let with_urls: Vec<DatasetId> = DatasetId::all()
.iter()
.copied()
.filter(|d| !d.download_url().is_empty())
.collect();
if verbose {
println!("Dataset filtering breakdown:");
println!(" Total in registry: {}", total);
println!(" With URLs: {}", with_urls.len());
}
let mut excluded_not_automatable = 0;
let mut excluded_not_download = 0;
let mut excluded_no_token = 0;
let mut candidates: Vec<DatasetId> = with_urls
.iter()
.copied()
.filter(|d| {
if allow_manual {
return true;
}
if !d.access_status().is_automatable() {
excluded_not_automatable += 1;
return false;
}
if !d.is_automatable_download() {
excluded_not_download += 1;
return false;
}
if d.requires_hf_token() && !anno::env::has_hf_token() {
excluded_no_token += 1;
return false;
}
true
})
.collect();
if verbose {
println!(" After filtering: {}", candidates.len());
if excluded_not_automatable > 0 {
println!(
" Excluded (not automatable): {}",
excluded_not_automatable
);
}
if excluded_not_download > 0 {
println!(
" Excluded (not automatable download): {}",
excluded_not_download
);
}
if excluded_no_token > 0 {
println!(" Excluded (requires HF_TOKEN): {}", excluded_no_token);
}
}
if !all {
let before_truncate = candidates.len();
candidates.truncate(50);
if verbose && before_truncate > 50 {
println!(" After truncation (--all not set): {}", candidates.len());
}
}
candidates
};
if datasets_to_check.is_empty() {
println!("No datasets to check (no URLs or dataset not found)");
return Ok(());
}
if verbose {
println!();
println!(
"Final selection: {} datasets to check",
datasets_to_check.len()
);
println!();
}
println!("Checking {} dataset URLs...", datasets_to_check.len());
println!();
let (tx, rx) = mpsc::channel();
let mut handles = Vec::new();
for dataset_id in datasets_to_check {
let tx = tx.clone();
let url = dataset_id.download_url().to_string();
let timeout_secs = timeout;
let handle = thread::spawn(move || {
let result = check_single_url(dataset_id.name(), &url, timeout_secs);
tx.send((dataset_id, result)).ok();
});
handles.push(handle);
if handles.len() >= workers {
for handle in handles.drain(..1) {
handle.join().ok();
}
}
}
for handle in handles {
handle.join().ok();
}
drop(tx);
let mut results: Vec<(DatasetId, URLHealthResult)> = Vec::new();
while let Ok((dataset_id, result)) = rx.recv() {
results.push((dataset_id, result));
}
results.sort_by_key(|(dataset_id, _)| dataset_id.name());
let mut ok_count = 0;
let mut strict_error_count = 0;
let mut relaxed_error_count = 0;
let mut redirect_count = 0;
for (dataset_id, result) in &results {
let name = dataset_id.name();
match result.status.as_str() {
"ok" => {
ok_count += 1;
if dataset.is_some() {
println!(
" {} {} ({})",
color("32", "OK"),
name,
result.code.unwrap_or(0)
);
}
}
"redirect" => {
redirect_count += 1;
ok_count += 1; if dataset.is_some() {
println!(
" {} {} ({}): {}",
color("33", "REDIRECT"),
name,
result.code.unwrap_or(0),
result.message
);
}
}
"missing" => {
if dataset.is_some() {
println!(" {} {}: No URL", color("33", "SKIP"), name);
}
}
_ => {
let expected_automatable = dataset_id.access_status().is_automatable()
&& dataset_id.is_automatable_download()
&& (!dataset_id.requires_hf_token() || anno::env::has_hf_token());
if expected_automatable {
strict_error_count += 1;
println!(" {} {}: {}", color("31", "ERROR"), name, result.message);
} else {
relaxed_error_count += 1;
println!(" {} {}: {}", color("33", "WARN"), name, result.message);
println!(" (non-automatable source per registry metadata)");
}
if let Some(code) = result.code {
println!(" HTTP {}", code);
}
}
}
}
println!();
if relaxed {
println!(
"Summary: {} OK ({} redirects), {} errors, {} warnings",
ok_count, redirect_count, strict_error_count, relaxed_error_count
);
if strict_error_count > 0 {
return Err(format!(
"{} expected-automatable URLs failed health check",
strict_error_count
));
}
} else {
let total_errors = strict_error_count + relaxed_error_count;
println!(
"Summary: {} OK ({} redirects), {} errors",
ok_count, redirect_count, total_errors
);
if total_errors > 0 {
return Err(format!("{} URLs failed health check", total_errors));
}
}
Ok(())
}
#[cfg(not(feature = "eval"))]
fn run_check_health(
_dataset: Option<&str>,
_all: bool,
_relaxed: bool,
_verbose: bool,
_workers: usize,
_timeout: u64,
) -> Result<(), String> {
println!("URL health checking requires --features eval");
Ok(())
}
#[cfg(feature = "eval")]
struct URLHealthResult {
status: String,
code: Option<u16>,
message: String,
}
#[cfg(feature = "eval")]
fn check_single_url(_name: &str, url: &str, timeout_secs: u64) -> URLHealthResult {
if url.is_empty() {
return URLHealthResult {
status: "missing".to_string(),
code: None,
message: "No URL defined".to_string(),
};
}
if !url.starts_with("http://") && !url.starts_with("https://") {
return URLHealthResult {
status: "skip".to_string(),
code: None,
message: "Non-HTTP URL".to_string(),
};
}
match ureq::AgentBuilder::new()
.timeout(std::time::Duration::from_secs(timeout_secs))
.build()
.head(url)
.call()
{
Ok(response) => {
let status = response.status();
if status == 200 {
URLHealthResult {
status: "ok".to_string(),
code: Some(status),
message: "OK".to_string(),
}
} else if (300..400).contains(&status) {
URLHealthResult {
status: "redirect".to_string(),
code: Some(status),
message: format!(
"Redirects to {}",
response.header("Location").unwrap_or("unknown")
),
}
} else if status == 405 {
match ureq::get(url)
.timeout(std::time::Duration::from_secs(timeout_secs))
.call()
{
Ok(resp) => URLHealthResult {
status: "ok".to_string(),
code: Some(resp.status()),
message: "OK (HEAD not allowed)".to_string(),
},
Err(e) => URLHealthResult {
status: "error".to_string(),
code: None,
message: format!("GET failed: {}", e),
},
}
} else {
URLHealthResult {
status: "error".to_string(),
code: Some(status),
message: format!("HTTP {}", status),
}
}
}
Err(ureq::Error::Status(code, _)) => URLHealthResult {
status: "error".to_string(),
code: Some(code),
message: format!("HTTP {}", code),
},
Err(ureq::Error::Transport(e)) => URLHealthResult {
status: "error".to_string(),
code: None,
message: format!("Connection error: {}", e),
},
}
}
#[cfg(feature = "eval")]
fn run_facets(touched_report: Option<&str>, gaps: bool) -> Result<(), String> {
use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::io::Write;
use anno_eval::eval::loader::DatasetId;
fn bump(map: &mut BTreeMap<String, u64>, k: impl Into<String>) {
*map.entry(k.into()).or_insert(0) += 1;
}
macro_rules! outln {
($out:expr, $($arg:tt)*) => {{
if let Err(e) = writeln!($out, $($arg)*) {
if e.kind() == std::io::ErrorKind::BrokenPipe {
return Ok(());
}
return Err(format!("stdout: {}", e));
}
}};
}
fn pct(n: u64, d: u64) -> f64 {
if d == 0 {
0.0
} else {
(n as f64) * 100.0 / (d as f64)
}
}
fn touched_set_from_report(path: &str) -> Result<BTreeSet<String>, String> {
let content = fs::read_to_string(path).map_err(|e| format!("read {}: {}", path, e))?;
let v: serde_json::Value =
serde_json::from_str(&content).map_err(|e| format!("parse {}: {}", path, e))?;
let obj = v
.get("chosen_dataset_counts")
.and_then(|x| x.as_object())
.ok_or_else(|| "distribution JSON missing chosen_dataset_counts".to_string())?;
Ok(obj.keys().cloned().collect())
}
let all: Vec<DatasetId> = DatasetId::all().to_vec();
let total = all.len() as u64;
let touched: Option<BTreeSet<String>> = match touched_report {
None => None,
Some(p) if p.trim().is_empty() => None,
Some(p) => Some(touched_set_from_report(p.trim())?),
};
let touched_total = touched.as_ref().map(|set| set.len() as u64).unwrap_or(0);
let mut out = std::io::BufWriter::new(std::io::stdout());
let mut lang_all: BTreeMap<String, u64> = BTreeMap::new();
let mut domain_all: BTreeMap<String, u64> = BTreeMap::new();
let mut access_all: BTreeMap<String, u64> = BTreeMap::new();
let mut cat_all: BTreeMap<String, u64> = BTreeMap::new();
let mut tasks_all: BTreeMap<String, u64> = BTreeMap::new();
let mut lang_touched: BTreeMap<String, u64> = BTreeMap::new();
let mut domain_touched: BTreeMap<String, u64> = BTreeMap::new();
let mut access_touched: BTreeMap<String, u64> = BTreeMap::new();
let mut cat_touched: BTreeMap<String, u64> = BTreeMap::new();
let mut tasks_touched: BTreeMap<String, u64> = BTreeMap::new();
for ds in &all {
bump(&mut lang_all, ds.language());
bump(&mut domain_all, ds.domain());
bump(&mut access_all, format!("{:?}", ds.access_status()));
for &c in ds.categories() {
bump(&mut cat_all, c);
}
for &t in ds.tasks() {
bump(&mut tasks_all, t);
}
if let Some(tset) = touched.as_ref() {
let name = format!("{ds:?}");
if tset.contains(&name) {
bump(&mut lang_touched, ds.language());
bump(&mut domain_touched, ds.domain());
bump(&mut access_touched, format!("{:?}", ds.access_status()));
for &c in ds.categories() {
bump(&mut cat_touched, c);
}
for &t in ds.tasks() {
bump(&mut tasks_touched, t);
}
}
}
}
outln!(&mut out, "Datasets: {}", total);
if let Some(p) = touched_report {
if touched.is_some() {
outln!(&mut out, "Touched: {} (from {})", touched_total, p.trim());
}
}
fn print_top(
out: &mut impl Write,
title: &str,
map: &BTreeMap<String, u64>,
top: usize,
denom: u64,
) -> Result<(), String> {
outln!(out, "");
outln!(out, "{} (top {})", title, top);
let mut items: Vec<(&String, &u64)> = map.iter().collect();
items.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0)));
for (k, v) in items.into_iter().take(top) {
outln!(out, " {:22} {:5} ({:4.1}%)", k, v, pct(*v, denom));
}
Ok(())
}
print_top(&mut out, "Languages (all)", &lang_all, 12, total)?;
print_top(&mut out, "Domains (all)", &domain_all, 12, total)?;
print_top(&mut out, "Access (all)", &access_all, 12, total)?;
print_top(&mut out, "Categories (all)", &cat_all, 15, total)?;
print_top(&mut out, "Tasks (all)", &tasks_all, 15, total)?;
if touched.is_some() {
print_top(
&mut out,
"Languages (touched)",
&lang_touched,
12,
touched_total,
)?;
print_top(
&mut out,
"Domains (touched)",
&domain_touched,
12,
touched_total,
)?;
print_top(
&mut out,
"Access (touched)",
&access_touched,
12,
touched_total,
)?;
print_top(
&mut out,
"Categories (touched)",
&cat_touched,
15,
touched_total,
)?;
print_top(
&mut out,
"Tasks (touched)",
&tasks_touched,
15,
touched_total,
)?;
}
if gaps && touched.is_some() {
fn gap_table(
out: &mut impl Write,
title: &str,
all: &BTreeMap<String, u64>,
touched: &BTreeMap<String, u64>,
total: u64,
touched_total: u64,
) -> Result<(), String> {
outln!(out, "");
outln!(out, "Under-touched {} (min_all=6)", title);
let mut rows: Vec<(f64, String, u64, u64, f64, f64)> = Vec::new();
for (k, na) in all {
if *na < 6 {
continue;
}
let nt = *touched.get(k).unwrap_or(&0);
let pa = (*na as f64) / (total as f64);
let pt = (nt as f64) / (touched_total as f64);
rows.push((
(pt - pa) * 100.0,
k.clone(),
*na,
nt,
pa * 100.0,
pt * 100.0,
));
}
rows.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
for (gap_pp, k, na, nt, pa, pt) in rows.into_iter().take(12) {
outln!(
out,
" {:22} all={:4} ({:4.1}%) touched={:4} ({:4.1}%) gap={:5.1}pp",
k,
na,
pa,
nt,
pt,
gap_pp
);
}
Ok(())
}
gap_table(
&mut out,
"domains",
&domain_all,
&domain_touched,
total,
touched_total,
)?;
gap_table(
&mut out,
"access",
&access_all,
&access_touched,
total,
touched_total,
)?;
gap_table(
&mut out,
"categories",
&cat_all,
&cat_touched,
total,
touched_total,
)?;
gap_table(
&mut out,
"tasks",
&tasks_all,
&tasks_touched,
total,
touched_total,
)?;
}
Ok(())
}