use clap::{Parser, Subcommand, ValueEnum};
use crate::cli::commands;
#[derive(Parser)]
#[command(name = "anno")]
#[command(
author,
version,
about = "Information extraction CLI (NER + coreference)",
long_about = r#"
anno - Information extraction from text
EXAMPLES:
anno extract --text "Lynn Conway worked at IBM and Xerox PARC in California."
anno extract --model gliner --extract-types "DRUG,SYMPTOM" \
--text "Aspirin can treat headaches and reduce fever."
anno extract --extract-relations --relation-types "FOUNDED,WORKS_FOR" \
--text "Steve Jobs founded Apple in 1976."
anno debug --coref -t "Sophie Wilson designed the ARM processor. She revolutionized computing."
anno batch --dir ./docs --output ./results --format json
anno models download ...
anno info
OFFLINE:
anno models download ... # prefetch weights
ANNO_NO_DOWNLOADS=1 anno ... # cached-only mode
Run `anno help <command>` for details on any subcommand.
"#
)]
#[command(propagate_version = true)]
pub struct Cli {
#[command(subcommand)]
pub command: Option<Commands>,
#[arg(long, global = true, default_value = "auto", value_name = "WHEN")]
pub color: ColorMode,
#[arg(trailing_var_arg = true)]
pub text: Vec<String>,
}
#[derive(Clone, Copy, Debug, Default, ValueEnum)]
pub enum ColorMode {
#[default]
Auto,
Always,
Never,
}
#[derive(Subcommand)]
pub enum Commands {
#[command(visible_alias = "x")]
Extract(crate::cli::commands::ExtractArgs),
#[command(visible_alias = "d")]
Debug(commands::DebugArgs),
#[command(visible_alias = "e")]
Eval(commands::EvalArgs),
#[command(visible_alias = "v")]
Validate(commands::ValidateArgs),
#[command(visible_alias = "a")]
#[command(hide = true)]
Analyze(commands::AnalyzeArgs),
#[command(visible_alias = "ds")]
#[command(hide = true)]
Dataset(commands::DatasetArgs),
#[command(visible_alias = "bench")]
#[cfg(feature = "eval")]
#[command(hide = true)]
Benchmark(commands::BenchmarkArgs),
#[cfg(feature = "eval")]
#[command(hide = true)]
#[command(name = "sampler", visible_alias = "muxer")]
Muxer(commands::MuxerArgs),
#[command(visible_alias = "i")]
Info,
Models(commands::ModelsArgs),
#[command(visible_alias = "coalesce")]
#[cfg(feature = "eval")]
#[command(hide = true)]
CrossDoc(commands::CrossDocArgs),
#[command(hide = true)]
Enhance(commands::EnhanceArgs),
#[command(visible_alias = "p")]
#[command(hide = true)]
Pipeline(commands::PipelineArgs),
#[command(visible_alias = "q")]
#[command(hide = true)]
Query(commands::QueryArgs),
#[command(hide = true)]
Compare(commands::CompareArgs),
#[command(hide = true)]
Cache(commands::CacheArgs),
#[cfg(feature = "eval")]
#[command(hide = true)]
History(commands::HistoryArgs),
#[command(hide = true)]
Config(commands::ConfigArgs),
#[command(visible_alias = "b")]
#[command(hide = true)]
Batch(commands::BatchArgs),
#[command(visible_alias = "priv")]
#[command(hide = true)]
Privacy(commands::PrivacyArgs),
#[command(visible_alias = "w")]
#[command(hide = true)]
Watch(commands::WatchArgs),
#[command(hide = true)]
Domain(commands::DomainArgs),
#[command(hide = true)]
Explain(commands::ExplainArgs),
#[command(hide = true)]
Singleton(commands::SingletonArgs),
#[command(visible_alias = "ex")]
#[command(hide = true)]
Export(commands::ExportArgs),
Completions {
#[arg(value_enum)]
shell: clap_complete::Shell,
},
}
#[derive(Debug, Clone, Copy, Default, ValueEnum)]
pub enum ModelBackend {
#[value(alias = "regex")]
Pattern,
#[value(alias = "statistical")]
Heuristic,
Minimal,
Auto,
#[default]
Stacked,
Crf,
Hmm,
Ensemble,
#[value(alias = "heuristic-crf")]
HeuristicCrf,
#[value(alias = "tplink")]
Tplinker,
#[value(alias = "universal-ner", hide = true)]
UniversalNer,
#[cfg(feature = "onnx")]
Gliner,
#[cfg(feature = "onnx")]
GlinerMultitask,
#[cfg(feature = "onnx")]
Nuner,
#[cfg(feature = "onnx")]
W2ner,
#[cfg(feature = "onnx")]
#[value(alias = "bert")]
BertOnnx,
#[cfg(feature = "onnx")]
#[value(alias = "deberta")]
DebertaV3,
#[cfg(feature = "onnx")]
#[value(alias = "biomedical-ner")]
Biomedical,
#[cfg(feature = "onnx")]
#[value(alias = "gliner-pii", alias = "pii")]
GlinerPii,
#[cfg(feature = "onnx")]
#[value(alias = "gliner-relex", alias = "relex")]
GlinerRelex,
#[cfg(feature = "onnx")]
#[value(alias = "gliner-poly")]
#[value(hide = true)]
GlinerPoly,
#[cfg(feature = "candle")]
GlinerCandle,
#[cfg(feature = "candle")]
CandleNer,
}
impl ModelBackend {
pub fn create_model(self) -> Result<Box<dyn anno::Model>, String> {
#[cfg(feature = "onnx")]
if matches!(self, Self::GlinerPoly) {
return Err(
"GLiNER Poly (`gliner-poly`) is scaffolding only and does not implement inference yet. \
Use `--model gliner` instead."
.to_string(),
);
}
#[cfg(feature = "eval")]
{
use anno_eval::eval::backend_factory::BackendFactory;
let factory_name = match self {
Self::Pattern => "pattern",
Self::Heuristic => "heuristic",
Self::Minimal => "heuristic",
Self::Auto => "stacked",
Self::Stacked => "stacked",
Self::Crf => "crf",
Self::Hmm => "hmm",
Self::Ensemble => "ensemble",
Self::HeuristicCrf => "heuristic_crf",
Self::Tplinker => "tplinker",
Self::UniversalNer => "universal_ner",
#[cfg(feature = "onnx")]
Self::Gliner => "gliner_onnx",
#[cfg(feature = "onnx")]
Self::GlinerMultitask => "gliner_multitask",
#[cfg(feature = "onnx")]
Self::Nuner => "nuner",
#[cfg(feature = "onnx")]
Self::W2ner => "w2ner",
#[cfg(feature = "onnx")]
Self::BertOnnx => "bert_onnx",
#[cfg(feature = "onnx")]
Self::DebertaV3 => "deberta_v3",
#[cfg(feature = "onnx")]
Self::Biomedical => "biomedical",
#[cfg(feature = "onnx")]
Self::GlinerPii => "gliner_pii",
#[cfg(feature = "onnx")]
Self::GlinerRelex => "gliner_relex",
#[cfg(feature = "onnx")]
Self::GlinerPoly => "gliner_poly", #[cfg(feature = "candle")]
Self::GlinerCandle => "gliner_candle",
#[cfg(feature = "candle")]
Self::CandleNer => "candle_ner",
};
BackendFactory::create(factory_name)
.map_err(|e| format!("Failed to create model '{}': {}\n Tip: Run 'anno models list' to see available backends.", self.name(), e))
}
#[cfg(not(feature = "eval"))]
{
use anno::{HeuristicNER, RegexNER, StackedNER};
match self {
Self::Pattern => Ok(Box::new(RegexNER::new())),
Self::Heuristic => Ok(Box::new(HeuristicNER::new())),
Self::Minimal => Ok(Box::new(HeuristicNER::new())),
Self::Auto => Ok(Box::new(StackedNER::default())),
Self::Stacked => Ok(Box::new(StackedNER::default())),
Self::Crf => Ok(Box::new(anno::backends::crf::CrfNER::new())),
Self::Hmm => Ok(Box::new(anno::backends::hmm::HmmNER::new())),
Self::Ensemble => Ok(Box::new(anno::backends::ensemble::EnsembleNER::default())),
Self::HeuristicCrf => Ok(Box::new(anno::backends::heuristic_crf::HeuristicCrfNER::new())),
Self::Tplinker => anno::backends::tplinker::TPLinker::new()
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| format!("Failed to create TPLinker: {}\n Tip: Use 'anno models info tplinker' to check model status.", e)),
Self::UniversalNer => anno::backends::universal_ner::UniversalNER::new()
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| format!("Failed to create UniversalNER: {}\n Tip: Check API key with OPENROUTER_API_KEY or use --model gliner for offline NER.", e)),
#[cfg(feature = "onnx")]
Self::Gliner => anno::GLiNEROnnx::new(anno::DEFAULT_GLINER_MODEL)
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| format!("Failed to load GLiNER: {}\n Tip: Use 'anno models info gliner' to check model status.", e)),
#[cfg(feature = "onnx")]
Self::GlinerMultitask => anno::backends::gliner_multitask::GLiNERMultitaskOnnx::from_pretrained(anno::DEFAULT_GLINER_MULTITASK_MODEL)
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| format!("Failed to load GLiNER multi-task: {}\n Tip: Use 'anno models info gliner_multitask' to check model status.", e)),
#[cfg(feature = "onnx")]
Self::Nuner => anno::backends::nuner::NuNER::from_pretrained(anno::DEFAULT_NUNER_MODEL)
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| format!("Failed to load NuNER: {}\n Tip: Use 'anno models info nuner' to check model status.", e)),
#[cfg(feature = "onnx")]
Self::W2ner => {
let model_path = std::env::var("W2NER_MODEL_PATH")
.unwrap_or_else(|_| anno::DEFAULT_W2NER_MODEL.to_string());
anno::backends::w2ner::W2NER::from_pretrained(&model_path)
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| format!(
"W2NER model unavailable: {}\n\n\
Options:\n\
1. Set W2NER_MODEL_PATH to a local model directory\n\
2. Export your own model: uv run scripts/export_w2ner_to_onnx.py\n\
3. For HuggingFace models, set HF_TOKEN and request model access\n\n\
Alternatives:\n\
- Use --model gliner for zero-shot NER\n\
- Use --model gliner_multitask for nested entity support",
e
))
}
#[cfg(feature = "onnx")]
Self::BertOnnx => anno::backends::onnx::BertNEROnnx::new(anno::DEFAULT_BERT_ONNX_MODEL)
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| format!("Failed to load BERT ONNX: {}\n Tip: Use 'anno models info bert-onnx' to check model status.", e)),
#[cfg(feature = "onnx")]
Self::DebertaV3 => {
let candidates: Vec<String> = std::iter::once(std::env::var("DEBERTA_MODEL_PATH").ok())
.chain(std::iter::once(
dirs::home_dir().map(|h| {
h.join(".cache/huggingface/hub/models--deberta-v3-ner/onnx")
.to_string_lossy().into_owned()
})
))
.chain(std::iter::once(
dirs::cache_dir().map(|d| {
d.join("anno/models/deberta-ner")
.to_string_lossy().into_owned()
})
))
.flatten()
.collect();
for path in &candidates {
if std::path::Path::new(path).join("model.onnx").exists() {
return anno::BertNEROnnx::new(path)
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| format!("DeBERTa-v3 failed to load from {path}: {e}"));
}
}
Err("DeBERTa-v3 requires ONNX export.\n\
Export: uv run scripts/export_deberta_ner_to_onnx.py\n\
Or set: DEBERTA_MODEL_PATH=/path/to/model\n\n\
Ready alternatives: --model bert-onnx, --model gliner".to_string())
}
#[cfg(feature = "onnx")]
Self::Biomedical => {
let model_path = std::env::var("BIOMEDICAL_MODEL_PATH")
.unwrap_or_else(|_| {
dirs::home_dir()
.map(|h| h.join(".cache/anno/models/biomedical-ner").to_string_lossy().into_owned())
.unwrap_or_default()
});
if !model_path.is_empty() && std::path::Path::new(&model_path).join("model.onnx").exists() {
anno::BertNEROnnx::new(&model_path)
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| format!("Biomedical NER failed: {e}"))
} else {
Err("Biomedical NER requires ONNX export.\n\
Export: uv run scripts/export_biomedical_ner_to_onnx.py\n\
Or set: BIOMEDICAL_MODEL_PATH=/path/to/model\n\n\
Alternative: --model gliner (zero-shot, can detect biomedical entities)".to_string())
}
}
#[cfg(feature = "onnx")]
Self::GlinerPii => anno::GLiNEROnnx::new(anno::models::GLINER_PII)
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| format!("GLiNER PII failed: {e}\n Tip: Use --model gliner for general NER.")),
#[cfg(feature = "onnx")]
Self::GlinerRelex => anno::GLiNEROnnx::new(anno::models::GLINER_RELEX)
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| format!("GLiNER-RelEx failed: {e}\n Tip: Use --model gliner_multitask for multi-task NER+RE.")),
#[cfg(feature = "onnx")]
Self::GlinerPoly => unreachable!("rejected above"),
#[cfg(feature = "candle")]
Self::GlinerCandle => anno::backends::gliner_candle::GLiNERCandle::from_pretrained(anno::DEFAULT_GLINER_CANDLE_MODEL)
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| format!(
"GLiNER-Candle model unavailable: {}\n\n\
GLiNER Candle is experimental and has compatibility issues with\n\
most GLiNER models due to non-standard weight formats.\n\n\
Recommended: Use --model gliner (ONNX version) instead.\n\
It works with all GLiNER models and provides better performance.",
e
)),
#[cfg(feature = "candle")]
Self::CandleNer => anno::backends::candle::CandleNER::from_pretrained(anno::DEFAULT_CANDLE_MODEL)
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| format!(
"CandleNER model unavailable: {}\n\n\
The model may lack tokenizer.json or safetensors files.\n\n\
Alternatives:\n\
- Use --model bert-onnx (ONNX version, more compatible)\n\
- Use --model heuristic for pattern-based extraction",
e
)),
}
}
}
pub fn try_create_relation_model(
self,
) -> Option<Result<Box<dyn anno::RelationExtractor>, String>> {
match self {
Self::Tplinker => Some(
anno::backends::tplinker::TPLinker::new()
.map(|m| Box::new(m) as Box<dyn anno::RelationExtractor>)
.map_err(|e| format!("Failed to create TPLinker: {}", e)),
),
#[cfg(feature = "onnx")]
Self::GlinerMultitask => Some(
anno::backends::gliner_multitask::GLiNERMultitaskOnnx::from_pretrained(
anno::DEFAULT_GLINER_MULTITASK_MODEL,
)
.map(|m| Box::new(m) as Box<dyn anno::RelationExtractor>)
.map_err(|e| {
format!(
"Failed to load GLiNER multi-task for relation extraction: {}\n \
Tip: Use 'anno models info gliner_multitask' to check model status.",
e
)
}),
),
_ => None,
}
}
pub fn name(self) -> &'static str {
match self {
Self::Pattern => "pattern",
Self::Heuristic => "heuristic",
Self::Minimal => "minimal",
Self::Auto => "auto",
Self::Stacked => "stacked",
Self::Crf => "crf",
Self::Hmm => "hmm",
Self::Ensemble => "ensemble",
Self::HeuristicCrf => "heuristic-crf",
Self::Tplinker => "tplinker",
Self::UniversalNer => "universal-ner",
#[cfg(feature = "onnx")]
Self::Gliner => "gliner",
#[cfg(feature = "onnx")]
Self::GlinerMultitask => "gliner_multitask",
#[cfg(feature = "onnx")]
Self::Nuner => "nuner",
#[cfg(feature = "onnx")]
Self::W2ner => "w2ner",
#[cfg(feature = "onnx")]
Self::BertOnnx => "bert-onnx",
#[cfg(feature = "onnx")]
Self::DebertaV3 => "deberta-v3",
#[cfg(feature = "onnx")]
Self::Biomedical => "biomedical",
#[cfg(feature = "onnx")]
Self::GlinerPii => "gliner-pii",
#[cfg(feature = "onnx")]
Self::GlinerRelex => "gliner-relex",
#[cfg(feature = "onnx")]
Self::GlinerPoly => "gliner-poly",
#[cfg(feature = "candle")]
Self::GlinerCandle => "gliner-candle",
#[cfg(feature = "candle")]
Self::CandleNer => "candle-ner",
}
}
}
#[derive(Debug, Clone, Copy, Default, ValueEnum)]
pub enum OutputFormat {
#[default]
Human,
Json,
Jsonl,
Tsv,
Inline,
Grounded,
Html,
Tree,
Summary,
}
#[derive(Debug, Clone, Copy, Default, ValueEnum)]
pub enum EvalTask {
#[default]
Ner,
Coref,
Relation,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_model_backend_names() {
assert_eq!(ModelBackend::Pattern.name(), "pattern");
assert_eq!(ModelBackend::Heuristic.name(), "heuristic");
assert_eq!(ModelBackend::Stacked.name(), "stacked");
}
#[test]
fn test_model_backend_default_is_stacked() {
assert!(matches!(ModelBackend::default(), ModelBackend::Stacked));
}
#[cfg(feature = "onnx")]
#[test]
fn test_stacked_default_has_ml_backend_when_available() {
use anno::StackedNER;
let ner = StackedNER::default();
let stats = ner.stats();
if stats.layer_count == 3 {
let has_ml = stats.layer_names.iter().any(|name| {
let n = name.to_lowercase();
n.contains("bert") || n.contains("gliner")
});
assert!(
has_ml,
"Default stacked with 3 layers should include ML backend. Layers: {:?}",
stats.layer_names
);
}
}
#[test]
fn test_output_format_default_is_human() {
assert!(matches!(OutputFormat::default(), OutputFormat::Human));
}
#[cfg(feature = "onnx")]
#[test]
fn test_zero_shot_models_exist() {
let gliner_name = ModelBackend::Gliner.name();
assert_eq!(gliner_name, "gliner");
let gliner_multitask_name = ModelBackend::GlinerMultitask.name();
assert_eq!(gliner_multitask_name, "gliner_multitask");
}
}