use anno::{Model, Result};
pub struct BackendFactory;
impl BackendFactory {
pub fn create(backend_name: &str) -> Result<Box<dyn Model>> {
match backend_name.to_lowercase().as_str() {
"pattern" | "patternner" | "regex" | "regexner" => Ok(Box::new(anno::RegexNER::new())),
"heuristic" | "heuristicner" => Ok(Box::new(anno::HeuristicNER::new())),
"stacked" | "stackedner" => Ok(Box::new(anno::StackedNER::default())),
"crf" | "crfner" => Ok(Box::new(anno::backends::crf::CrfNER::new())),
"hmm" | "hmmner" => Ok(Box::new(anno::backends::hmm::HmmNER::new())),
"ensemble" | "ensemblener" => {
use anno::backends::ensemble::EnsembleNER;
Ok(Box::new(EnsembleNER::default()) as Box<dyn Model>)
}
"heuristic_crf" | "heuristic-crf" | "heuristiccrfner" => {
use anno::backends::heuristic_crf::HeuristicCrfNER;
Ok(Box::new(HeuristicCrfNER::new()) as Box<dyn Model>)
}
#[cfg(feature = "onnx")]
"bert_onnx" | "bertneronnx" => {
use anno::backends::onnx::BertNEROnnx;
use crate::DEFAULT_BERT_ONNX_MODEL;
BertNEROnnx::new(DEFAULT_BERT_ONNX_MODEL)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
anno::Error::FeatureNotAvailable(format!(
"Failed to create BertNEROnnx: {}",
e
))
})
}
#[cfg(not(feature = "onnx"))]
"bert_onnx" | "bertneronnx" => Err(anno::Error::FeatureNotAvailable(
"BertNEROnnx requires 'onnx' feature".to_string(),
)),
#[cfg(feature = "onnx")]
"gliner" => {
use anno::backends::gliner_onnx::GLiNEROnnx;
use crate::DEFAULT_GLINER_MODEL;
GLiNEROnnx::new(DEFAULT_GLINER_MODEL)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
anno::Error::FeatureNotAvailable(format!(
"Failed to create GLiNER (onnx): {}",
e
))
})
}
#[cfg(all(not(feature = "onnx"), feature = "candle"))]
"gliner" => {
use anno::backends::gliner_candle::GLiNERCandle;
use crate::DEFAULT_GLINER_CANDLE_MODEL;
GLiNERCandle::from_pretrained(DEFAULT_GLINER_CANDLE_MODEL)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
anno::Error::FeatureNotAvailable(format!(
"Failed to create GLiNER (candle): {}",
e
))
})
}
#[cfg(all(not(feature = "onnx"), not(feature = "candle")))]
"gliner" => Err(crate::Error::FeatureNotAvailable(
"GLiNER requires 'onnx' (preferred) or 'candle' feature".to_string(),
)),
#[cfg(feature = "onnx")]
"gliner_onnx" | "glineronnx" => {
use crate::backends::gliner_onnx::GLiNEROnnx;
use crate::DEFAULT_GLINER_MODEL;
GLiNEROnnx::new(DEFAULT_GLINER_MODEL)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
crate::Error::FeatureNotAvailable(format!(
"Failed to create GLiNEROnnx: {}",
e
))
})
}
#[cfg(not(feature = "onnx"))]
"gliner_onnx" | "glineronnx" => Err(crate::Error::FeatureNotAvailable(
"GLiNEROnnx requires 'onnx' feature".to_string(),
)),
"b2ner" => Err(crate::Error::FeatureNotAvailable(
"B2NER only has LLM-scale models (7B/20B) on HuggingFace. \
Encoder-scale ONNX weights pending release."
.to_string(),
)),
#[cfg(feature = "onnx")]
"gliner_pii" | "pii_ml" => {
use crate::backends::gliner_onnx::GLiNEROnnx;
GLiNEROnnx::new(anno::models::GLINER_PII)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
crate::Error::FeatureNotAvailable(format!(
"GLiNER PII Edge model unavailable: {}",
e
))
})
}
#[cfg(not(feature = "onnx"))]
"gliner_pii" | "pii_ml" => Err(crate::Error::FeatureNotAvailable(
"GLiNER PII requires 'onnx' feature".to_string(),
)),
#[cfg(feature = "onnx")]
"gliner_relex" | "relex" => {
use crate::backends::gliner_onnx::GLiNEROnnx;
GLiNEROnnx::new(anno::models::GLINER_RELEX)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
crate::Error::FeatureNotAvailable(format!(
"GLiNER-RelEx model unavailable: {}\n\n\
Export: uv run scripts/export_glirel_onnx.py",
e
))
})
}
#[cfg(not(feature = "onnx"))]
"gliner_relex" | "relex" => Err(crate::Error::FeatureNotAvailable(
"GLiNER-RelEx requires 'onnx' feature".to_string(),
)),
#[cfg(feature = "onnx")]
"nuner" | "nunerzero" => {
use crate::backends::nuner::NuNER;
use crate::DEFAULT_NUNER_MODEL;
NuNER::from_pretrained(DEFAULT_NUNER_MODEL)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
crate::Error::FeatureNotAvailable(format!("Failed to create NuNER: {}", e))
})
}
#[cfg(not(feature = "onnx"))]
"nuner" | "nunerzero" => Err(crate::Error::FeatureNotAvailable(
"NuNER requires 'onnx' feature".to_string(),
)),
#[cfg(feature = "onnx")]
"nuner_4k" | "nunerzero4k" => {
use crate::backends::nuner::NuNER;
NuNER::from_pretrained("numind/NuNER_Zero-4k")
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
crate::Error::FeatureNotAvailable(format!(
"Failed to create NuNER 4k: {}",
e
))
})
}
#[cfg(not(feature = "onnx"))]
"nuner_4k" | "nunerzero4k" => Err(crate::Error::FeatureNotAvailable(
"NuNER 4k requires 'onnx' feature".to_string(),
)),
#[cfg(feature = "onnx")]
"w2ner" => {
use crate::backends::w2ner::W2NER;
use crate::DEFAULT_W2NER_MODEL;
let model_path = std::env::var("W2NER_MODEL_PATH")
.unwrap_or_else(|_| DEFAULT_W2NER_MODEL.to_string());
W2NER::from_pretrained(&model_path)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
crate::Error::FeatureNotAvailable(format!(
"W2NER model unavailable: {}\n\n\
Options:\n\
1. Set W2NER_MODEL_PATH to a local model directory\n\
2. Export your own: uv run scripts/export_w2ner_to_onnx.py\n\
3. For HF models, set HF_TOKEN and request access",
e
))
})
}
#[cfg(not(feature = "onnx"))]
"w2ner" => Err(crate::Error::FeatureNotAvailable(
"W2NER requires 'onnx' feature".to_string(),
)),
#[cfg(feature = "onnx")]
"gliner_multitask" | "gliner_multitask_onnx" => {
use crate::backends::gliner_multitask::GLiNERMultitaskOnnx;
use crate::DEFAULT_GLINER_MULTITASK_MODEL;
GLiNERMultitaskOnnx::from_pretrained(DEFAULT_GLINER_MULTITASK_MODEL)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
crate::Error::FeatureNotAvailable(format!(
"Failed to create GLiNER multi-task (ONNX): {}",
e
))
})
}
#[cfg(not(feature = "onnx"))]
"gliner_multitask" | "gliner_multitask_onnx" => Err(crate::Error::FeatureNotAvailable(
"GLiNER multi-task (ONNX) requires 'onnx' feature".to_string(),
)),
#[cfg(feature = "candle")]
"candle_ner" | "candlener" => {
use crate::backends::candle::CandleNER;
use crate::DEFAULT_CANDLE_MODEL;
CandleNER::from_pretrained(DEFAULT_CANDLE_MODEL)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
crate::Error::FeatureNotAvailable(format!(
"CandleNER model unavailable: {}",
e
))
})
}
#[cfg(not(feature = "candle"))]
"candle_ner" | "candlener" => Err(crate::Error::FeatureNotAvailable(
"CandleNER requires 'candle' feature".to_string(),
)),
#[cfg(feature = "candle")]
"gliner_candle" | "glinercandle" => {
use crate::backends::gliner_candle::GLiNERCandle;
use crate::DEFAULT_GLINER_CANDLE_MODEL;
GLiNERCandle::from_pretrained(DEFAULT_GLINER_CANDLE_MODEL)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
crate::Error::FeatureNotAvailable(format!(
"GLiNERCandle model unavailable: {}",
e
))
})
}
#[cfg(not(feature = "candle"))]
"gliner_candle" | "glinercandle" => Err(crate::Error::FeatureNotAvailable(
"GLiNERCandle requires 'candle' feature".to_string(),
)),
#[cfg(all(feature = "candle", feature = "onnx"))]
"gliner_multitask_candle" => {
use crate::backends::gliner_multitask::GLiNERMultitaskCandle;
use crate::DEFAULT_GLINER_MULTITASK_MODEL;
GLiNERMultitaskCandle::from_pretrained(DEFAULT_GLINER_MULTITASK_MODEL)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
crate::Error::FeatureNotAvailable(format!(
"Failed to create GLiNER multi-task (Candle): {}",
e
))
})
}
#[cfg(not(all(feature = "candle", feature = "onnx")))]
"gliner_multitask_candle" => Err(crate::Error::FeatureNotAvailable(
"GLiNER multi-task (Candle) requires both 'candle' and 'onnx' features".to_string(),
)),
"tplinker" | "tplink" => {
use anno::backends::tplinker::TPLinker;
Ok(Box::new(TPLinker::new()?) as Box<dyn Model>)
}
#[cfg(feature = "onnx")]
"gliner_poly" | "gliner-poly" | "poly_gliner" => {
use anno::backends::gliner_poly::GLiNERPoly;
use anno::DEFAULT_GLINER_POLY_MODEL;
GLiNERPoly::new(DEFAULT_GLINER_POLY_MODEL)
.map(|m| Box::new(m) as Box<dyn anno::Model>)
.map_err(|e| crate::Error::model_init(e.to_string()))
}
#[cfg(not(feature = "onnx"))]
"gliner_poly" | "gliner-poly" | "poly_gliner" => Err(crate::Error::FeatureNotAvailable(
"GLiNERPoly requires 'onnx' feature".to_string(),
)),
#[cfg(feature = "onnx")]
"deberta_v3" | "deberta-v3" | "deberta" => {
use anno::backends::onnx::BertNEROnnx;
let Ok(model_path) = std::env::var("DEBERTA_MODEL_PATH") else {
return Err(crate::Error::FeatureNotAvailable(
"DeBERTa-v3 backend requires a local ONNX export. Set DEBERTA_MODEL_PATH (e.g. after running `uv run scripts/export_deberta_ner_to_onnx.py`)."
.to_string(),
));
};
BertNEROnnx::new(&model_path)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
crate::Error::Retrieval(format!(
"DeBERTa-v3 model unavailable: {e}\n\n\
Options:\n\
1. Export your own: uv run scripts/export_deberta_ner_to_onnx.py\n\
2. Set DEBERTA_MODEL_PATH to a local model directory\n\
3. Use --model bert-onnx or --model candle-ner instead",
))
})
}
#[cfg(not(feature = "onnx"))]
"deberta_v3" | "deberta-v3" | "deberta" => Err(crate::Error::FeatureNotAvailable(
"DeBERTa-v3 NER requires 'onnx' feature".to_string(),
)),
#[cfg(feature = "onnx")]
"albert" | "albert_ner" => {
use anno::backends::onnx::BertNEROnnx;
let Ok(model_path) = std::env::var("ALBERT_MODEL_PATH") else {
return Err(crate::Error::FeatureNotAvailable(
"ALBERT backend requires a local ONNX export. Set ALBERT_MODEL_PATH to a local model directory containing ONNX weights."
.to_string(),
));
};
BertNEROnnx::new(&model_path)
.map(|m| Box::new(m) as Box<dyn Model>)
.map_err(|e| {
crate::Error::Retrieval(format!(
"ALBERT model unavailable: {e}\n\n\
Options:\n\
1. Export your own ONNX model\n\
2. Set ALBERT_MODEL_PATH to a local model directory\n\
3. Use --model bert-onnx or --model candle-ner instead",
))
})
}
#[cfg(not(feature = "onnx"))]
"albert" | "albert_ner" => Err(crate::Error::FeatureNotAvailable(
"ALBERT NER requires 'onnx' feature".to_string(),
)),
"universal_ner" | "universal-ner" | "universalner" => {
use anno::backends::universal_ner::UniversalNER;
let m = UniversalNER::new()?;
if !m.is_available() {
return Err(crate::Error::FeatureNotAvailable(
"UniversalNER requires the `llm` feature and a non-empty API key. Set one of: OPENAI_API_KEY, ANTHROPIC_API_KEY, OPENROUTER_API_KEY, GEMINI_API_KEY, or UNIVERSAL_NER_API_KEY."
.to_string(),
));
}
Ok(Box::new(m) as Box<dyn Model>)
}
_ => Err(crate::Error::InvalidInput(format!(
"Unknown backend: '{}'. Available: pattern, heuristic, stacked, crf, hmm, ensemble, heuristic_crf, tplinker{}",
backend_name,
if cfg!(feature = "onnx") {
", bert_onnx, gliner_onnx, nuner, w2ner, gliner_multitask"
} else {
""
}
))),
}
}
#[must_use]
pub fn available_backends() -> Vec<&'static str> {
#[allow(unused_mut)] let mut backends = vec![
"pattern",
"heuristic",
"stacked",
"crf",
"hmm",
"ensemble",
"heuristic_crf",
"tplinker",
];
if cfg!(feature = "llm") {
anno::env::load_dotenv();
if anno::env::has_llm_api_key() || std::env::var("UNIVERSAL_NER_API_KEY").is_ok() {
backends.push("universal_ner");
}
}
#[cfg(feature = "onnx")]
{
backends.extend(&[
"bert_onnx",
"gliner",
"gliner_onnx",
"nuner",
"nuner_4k",
"b2ner",
"w2ner",
"gliner_multitask",
"gliner_pii",
"gliner_relex",
"gliner_poly",
]);
if std::env::var("DEBERTA_MODEL_PATH").is_ok() {
backends.push("deberta_v3");
}
if std::env::var("ALBERT_MODEL_PATH").is_ok() {
backends.push("albert");
}
}
#[cfg(feature = "candle")]
{
backends.extend(&["candle_ner", "gliner_candle"]);
if !cfg!(feature = "onnx") {
backends.push("gliner");
}
}
#[cfg(all(feature = "candle", feature = "onnx"))]
{
backends.push("gliner_multitask_candle");
}
backends
}
#[must_use]
pub fn available_coref_resolvers() -> Vec<&'static str> {
vec!["coref_resolver", "mention_ranking"]
}
#[must_use]
pub fn is_available(backend_name: &str) -> bool {
Self::available_backends().contains(&backend_name.to_lowercase().as_str())
}
}
pub fn create_coref_resolver(
name: &str,
) -> Result<Box<dyn crate::eval::coref_resolver::CoreferenceResolver>> {
match name.to_lowercase().as_str() {
"coref_resolver" | "simplecorefresolver" | "simple" => {
use crate::eval::coref_resolver::{CorefConfig, SimpleCorefResolver};
Ok(Box::new(SimpleCorefResolver::new(CorefConfig::default())))
}
"mention_ranking" | "mention-ranking" | "mentionranking" => {
use anno::backends::coref::mention_ranking::MentionRankingCoref;
Ok(Box::new(MentionRankingCoref::new()))
}
_ => Err(crate::Error::InvalidInput(format!(
"Unknown coreference resolver: '{}'. Available: coref_resolver, mention_ranking",
name
))),
}
}
pub fn create_coref_backend(name: &str) -> Result<Box<dyn anno::CorefBackend>> {
match name.to_lowercase().as_str() {
"mention_ranking" | "mention-ranking" | "mentionranking" => {
use anno::backends::coref::mention_ranking::MentionRankingCoref;
Ok(Box::new(MentionRankingCoref::new()))
}
#[cfg(feature = "onnx")]
"fcoref" | "f-coref" | "fastcoref" => {
use anno::backends::coref::fcoref::FCoref;
let model_path = std::env::var("FCOREF_MODEL_PATH").ok();
let fcoref = if let Some(path) = model_path {
FCoref::from_path(&path)?
} else {
FCoref::from_pretrained("biu-nlp/f-coref")?
};
Ok(Box::new(fcoref))
}
#[cfg(not(feature = "onnx"))]
"fcoref" | "f-coref" | "fastcoref" => Err(crate::Error::FeatureNotAvailable(
"FCoref requires 'onnx' feature. Export: uv run scripts/export_fcoref.py".to_string(),
)),
_ => Err(crate::Error::InvalidInput(format!(
"Unknown coref backend: '{}'. Available: mention_ranking, fcoref",
name
))),
}
}
pub fn available_coref_backends() -> Vec<&'static str> {
#[allow(unused_mut)]
let mut backends = vec!["mention_ranking"];
#[cfg(feature = "onnx")]
{
backends.push("fcoref");
}
backends
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn known_backends_construct_with_expected_name_prefix() {
let cases = [
("pattern", "regex"),
("heuristic", "heuristic"),
("stacked", "stacked"),
];
for (alias, expected_prefix) in cases {
let model = BackendFactory::create(alias).unwrap();
let name = model.name();
assert!(
name.starts_with(expected_prefix),
"for alias {alias:?}: expected prefix {expected_prefix:?}, got {name:?}"
);
}
}
#[test]
fn test_unknown_backend() {
let backend = BackendFactory::create("nonexistent");
assert!(backend.is_err());
}
#[test]
fn test_available_backends() {
let backends = BackendFactory::available_backends();
assert!(backends.contains(&"pattern"));
assert!(backends.contains(&"heuristic"));
assert!(backends.contains(&"stacked"));
}
}
#[cfg(test)]
mod additional_tests {
use super::*;
#[test]
fn test_backend_factory_pattern_returns_regex_only() {
let model = BackendFactory::create("pattern").unwrap();
println!("Model name: {}", model.name());
assert_eq!(model.name(), "regex", "pattern should return RegexNER");
let entities = model
.extract_entities("John Smith went to Paris", None)
.unwrap();
println!("Entities: {:?}", entities);
for e in &entities {
assert!(
!matches!(e.entity_type, crate::EntityType::Person),
"Unexpected Person entity: {:?}",
e
);
assert!(
!matches!(e.entity_type, crate::EntityType::Location),
"Unexpected Location entity: {:?}",
e
);
}
}
}