use clap::Parser;
use super::super::output::color;
use super::super::parser::ModelBackend;
#[derive(Parser, Debug)]
pub struct ExplainArgs {
#[arg(short, long)]
pub entity_id: Option<String>,
#[arg(short, long)]
pub text: Option<String>,
#[arg(short, long, value_name = "START:END")]
pub span: Option<String>,
#[arg(short, long, default_value = "stacked")]
pub model: ModelBackend,
#[arg(long)]
pub show_all: bool,
pub positional: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct FeatureContribution {
pub name: String,
pub value: String,
pub weight: f64,
}
pub fn run(args: ExplainArgs) -> Result<(), String> {
let text = if let Some(t) = args.text {
t
} else if !args.positional.is_empty() {
args.positional.join(" ")
} else {
return Err("No text provided. Use --text or provide text as positional argument.".into());
};
let span = if let Some(s) = &args.span {
let parts: Vec<&str> = s.split(':').collect();
if parts.len() != 2 {
return Err("Span must be in format START:END (e.g., 0:11)".into());
}
let start: usize = parts[0].parse().map_err(|_| "Invalid start offset")?;
let end: usize = parts[1].parse().map_err(|_| "Invalid end offset")?;
Some((start, end))
} else {
None
};
let model = args.model.create_model()?;
let entities = model
.extract_entities(&text, None)
.map_err(|e| format!("Extraction failed: {}", e))?;
if entities.is_empty() {
println!("No entities found in text.");
return Ok(());
}
let entities_to_explain: Vec<_> = if let Some((start, end)) = span {
entities
.iter()
.filter(|e| e.start() == start && e.end() == end)
.collect()
} else {
entities.iter().collect()
};
if entities_to_explain.is_empty() {
println!("No entities match the specified span.");
return Ok(());
}
for (idx, entity) in entities_to_explain.iter().enumerate() {
if idx > 0 {
println!();
}
let source = entity
.provenance
.as_ref()
.map(|p| p.source.to_string())
.unwrap_or_else(|| "unknown".to_string());
println!("{}", color("1;36", &format!("Entity: \"{}\"", entity.text)));
println!();
println!("{}:", color("1;33", "Type Decision"));
println!(
" {} ({:.0}%)",
color("32", entity.entity_type.as_label()),
entity.confidence * 100.0
);
println!();
println!("{}:", color("1;33", "Source Backend"));
println!(" {}", source);
println!();
println!("{}:", color("1;33", "Features"));
let features = analyze_features(&text, entity);
for feat in &features {
let sign = if feat.weight > 0.0 { "+" } else { "" };
println!(
" {} = {} ({}{:.2})",
color("90", &feat.name),
feat.value,
sign,
feat.weight
);
}
println!();
let ctx_start = entity.start().saturating_sub(30);
let ctx_end = (entity.end() + 30).min(text.chars().count());
let before: String = text
.chars()
.skip(ctx_start)
.take(entity.start() - ctx_start)
.collect();
let entity_text: String = text
.chars()
.skip(entity.start())
.take(entity.end() - entity.start())
.collect();
let after: String = text
.chars()
.skip(entity.end())
.take(ctx_end - entity.end())
.collect();
println!("{}:", color("1;33", "Context"));
println!(
" {}{}{}{}{}",
if ctx_start > 0 { "..." } else { "" },
color("90", &before),
color("1;33", &entity_text),
color("90", &after),
if ctx_end < text.chars().count() {
"..."
} else {
""
}
);
println!();
println!("{}:", color("1;33", "Span"));
println!(" start: {} (character offset)", entity.start());
println!(" end: {} (exclusive, character offset)", entity.end());
println!(" length: {} chars", entity.end() - entity.start());
if args.show_all && entities.len() > 1 {
println!();
println!("{}:", color("1;33", "Other Candidates"));
for other in &entities {
if other.start() == entity.start() && other.end() == entity.end() {
continue;
}
let overlaps = !(other.end() <= entity.start() || other.start() >= entity.end());
if overlaps {
let other_source = other
.provenance
.as_ref()
.map(|p| p.source.to_string())
.unwrap_or_else(|| "unknown".to_string());
println!(
" {} \"{}\" ({:.0}%) from {} - {}",
other.entity_type.as_label(),
other.text,
other.confidence * 100.0,
other_source,
color("31", "conflict resolved")
);
}
}
}
}
Ok(())
}
fn analyze_features(text: &str, entity: &anno_core::Entity) -> Vec<FeatureContribution> {
let mut features = Vec::new();
let entity_text = &entity.text;
let first_char = entity_text.chars().next();
if let Some(c) = first_char {
if c.is_uppercase() {
features.push(FeatureContribution {
name: "capitalization".into(),
value: "TitleCase".into(),
weight: 0.15,
});
}
}
if entity_text
.chars()
.all(|c| !c.is_alphabetic() || c.is_uppercase())
&& entity_text.len() > 1
{
features.push(FeatureContribution {
name: "all_caps".into(),
value: "true".into(),
weight: 0.10,
});
}
if entity_text.contains('.') {
features.push(FeatureContribution {
name: "contains_period".into(),
value: "true".into(),
weight: 0.05,
});
}
let word_count = entity_text.split_whitespace().count();
features.push(FeatureContribution {
name: "word_count".into(),
value: word_count.to_string(),
weight: if word_count > 1 { 0.05 } else { 0.0 },
});
let left_ctx: String = text.chars().take(entity.start()).collect();
let left_words: Vec<&str> = left_ctx.split_whitespace().rev().take(3).collect();
let titles = ["Dr.", "Mr.", "Mrs.", "Ms.", "Prof.", "Sir", "Lord", "Lady"];
for title in &titles {
if left_words.iter().any(|w| w.ends_with(title)) {
features.push(FeatureContribution {
name: "context_left".into(),
value: format!("preceded by '{}'", title),
weight: 0.20,
});
break;
}
}
let right_ctx: String = text.chars().skip(entity.end()).take(50).collect();
let person_verbs = ["said", "says", "told", "announced", "declared", "stated"];
for verb in &person_verbs {
if right_ctx.to_lowercase().starts_with(&format!(" {}", verb))
|| right_ctx.to_lowercase().starts_with(&format!(", {}", verb))
{
features.push(FeatureContribution {
name: "context_right".into(),
value: format!("followed by '{}'", verb),
weight: 0.15,
});
break;
}
}
let org_suffixes = [
"Inc.",
"Corp.",
"LLC",
"Ltd.",
"Co.",
"Company",
"Corporation",
];
for suffix in &org_suffixes {
if entity_text.ends_with(suffix) {
features.push(FeatureContribution {
name: "org_suffix".into(),
value: format!("ends with '{}'", suffix),
weight: 0.25,
});
break;
}
}
let loc_preps = ["in", "at", "from", "to", "near"];
for prep in &loc_preps {
if left_words.iter().any(|w| w.to_lowercase() == *prep) {
features.push(FeatureContribution {
name: "location_preposition".into(),
value: format!("preceded by '{}'", prep),
weight: 0.18,
});
break;
}
}
if entity.entity_type.as_label() == "EMAIL" {
features.push(FeatureContribution {
name: "pattern_match".into(),
value: "email_regex".into(),
weight: 1.0,
});
} else if entity.entity_type.as_label() == "DATE" {
features.push(FeatureContribution {
name: "pattern_match".into(),
value: "date_regex".into(),
weight: 1.0,
});
} else if entity.entity_type.as_label() == "MONEY" {
features.push(FeatureContribution {
name: "pattern_match".into(),
value: "money_regex".into(),
weight: 1.0,
});
}
features.sort_by(|a, b| {
b.weight
.partial_cmp(&a.weight)
.unwrap_or(std::cmp::Ordering::Equal)
});
features
}