Skip to main content

anno/backends/
catalog.rs

1//! NER Backend Catalog
2//!
3//! This module is documentation-only: it describes the set of backends that live
4//! under `crate::backends` and gives a few “where to start” pointers.
5//!
6//! Keep in mind:
7//! - Many backends are **feature-gated** (`onnx`, `candle`, etc.).
8//! - Any “speed” or “quality” comparisons belong in the eval harness, not in
9//!   rustdoc prose.
10//!
11//! Paper pointers (context only):
12//! - GLiNER: arXiv:2311.08526
13//! - UniversalNER: arXiv:2308.03279
14//! - W2NER: arXiv:2112.10070
15//! - TPLinker: `https://aclanthology.org/2020.coling-main.138/`
16//!
17//! Common configuration knobs you will see across GLiNER-like implementations:
18//! - `threshold`: score cutoff for accepting a span
19//! - `max_width`: maximum span width considered
20//! - `max_length`: maximum input length per window/chunk
21//! - `flat_ner`: whether to enforce non-overlapping entities
22
23/// Backend implementation status.
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum BackendStatus {
26    /// Fully implemented and tested
27    Stable,
28    /// Implemented but may have rough edges
29    Beta,
30    /// Work in progress
31    WIP,
32    /// Planned for future implementation
33    Planned,
34    /// Research only, not planned
35    Research,
36}
37
38impl std::fmt::Display for BackendStatus {
39    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40        match self {
41            BackendStatus::Stable => write!(f, "stable"),
42            BackendStatus::Beta => write!(f, "beta"),
43            BackendStatus::WIP => write!(f, "wip"),
44            BackendStatus::Planned => write!(f, "planned"),
45            BackendStatus::Research => write!(f, "research"),
46        }
47    }
48}
49
50/// Information about a backend implementation.
51#[derive(Debug, Clone)]
52pub struct BackendInfo {
53    /// Backend name
54    pub name: &'static str,
55    /// Cargo feature required (if any)
56    pub feature: Option<&'static str>,
57    /// Implementation status
58    pub status: BackendStatus,
59    /// Whether it supports zero-shot NER
60    pub zero_shot: bool,
61    /// Whether it supports GPU acceleration
62    pub gpu_support: bool,
63    /// Brief description
64    pub description: &'static str,
65    /// Recommended model IDs
66    pub recommended_models: &'static [&'static str],
67}
68
69/// Catalog of all available and potential backends.
70pub static BACKEND_CATALOG: &[BackendInfo] = &[
71    // =========================================================================
72    // Implemented Backends
73    // =========================================================================
74    BackendInfo {
75        name: "pattern",
76        feature: None,
77        status: BackendStatus::Stable,
78        zero_shot: false,
79        gpu_support: false,
80        description: "Regex-based extraction for structured entities (dates, money, emails)",
81        recommended_models: &[],
82    },
83    BackendInfo {
84        name: "heuristic",
85        feature: None,
86        status: BackendStatus::Stable,
87        zero_shot: false,
88        gpu_support: false,
89        description: "Heuristic NER baseline (capitalization + context)",
90        recommended_models: &[],
91    },
92    BackendInfo {
93        name: "stacked",
94        feature: None,
95        status: BackendStatus::Stable,
96        zero_shot: false,
97        gpu_support: false,
98        description: "Stacked NER (pattern + heuristic; default no-ML baseline)",
99        recommended_models: &[],
100    },
101    BackendInfo {
102        name: "ensemble",
103        feature: None,
104        status: BackendStatus::Beta,
105        zero_shot: false,
106        gpu_support: false,
107        description: "Ensemble NER (weighted voting across backends)",
108        recommended_models: &[],
109    },
110    BackendInfo {
111        name: "crf",
112        feature: None,
113        status: BackendStatus::Stable,
114        zero_shot: false,
115        gpu_support: false,
116        description: "CRF sequence labeling baseline (optional trained weights)",
117        recommended_models: &[],
118    },
119    BackendInfo {
120        name: "hmm",
121        feature: None,
122        status: BackendStatus::Stable,
123        zero_shot: false,
124        gpu_support: false,
125        description: "HMM sequence labeling baseline (optional bundled params)",
126        recommended_models: &[],
127    },
128    BackendInfo {
129        name: "bilstm_crf",
130        feature: None,
131        status: BackendStatus::Beta,
132        zero_shot: false,
133        gpu_support: false,
134        description: "BiLSTM+CRF neural baseline (no transformer deps)",
135        recommended_models: &[],
136    },
137    BackendInfo {
138        name: "tplinker",
139        feature: None,
140        status: BackendStatus::Beta,
141        zero_shot: false,
142        gpu_support: false,
143        description: "Relation extraction baseline (heuristic today; TPLinker neural model TBD)",
144        recommended_models: &[],
145    },
146    BackendInfo {
147        name: "universal_ner",
148        feature: Some("llm"),
149        status: BackendStatus::Beta,
150        zero_shot: true,
151        gpu_support: true,
152        description: "UniversalNER (LLM-backed zero-shot; requires API key)",
153        recommended_models: &[],
154    },
155    BackendInfo {
156        name: "gliner",
157        feature: Some("onnx"),
158        status: BackendStatus::Stable,
159        zero_shot: true,
160        gpu_support: true,
161        description: "GLiNER zero-shot NER (alias for gliner_onnx in this repo)",
162        recommended_models: &[
163            "onnx-community/gliner_small-v2.1",
164            "onnx-community/gliner_large-v2.1",
165        ],
166    },
167    BackendInfo {
168        name: "gliner_onnx",
169        feature: Some("onnx"),
170        status: BackendStatus::Beta,
171        zero_shot: true,
172        gpu_support: true,
173        description: "GLiNER via manual ONNX implementation",
174        recommended_models: &["onnx-community/gliner_small-v2.1"],
175    },
176    BackendInfo {
177        name: "bert_onnx",
178        feature: Some("onnx"),
179        status: BackendStatus::Beta,
180        zero_shot: false,
181        gpu_support: true,
182        description: "BERT NER via ONNX Runtime (PER/ORG/LOC/MISC)",
183        recommended_models: &["protectai/bert-base-NER-onnx"],
184    },
185    BackendInfo {
186        name: "gliner2",
187        feature: Some("onnx"),
188        status: BackendStatus::Beta,
189        zero_shot: true,
190        gpu_support: true,
191        description: "GLiNER2 multi-task (NER + heuristic relations + structure)",
192        recommended_models: &["onnx-community/gliner-multitask-large-v0.5"],
193    },
194    BackendInfo {
195        name: "w2ner",
196        feature: Some("onnx"),
197        status: BackendStatus::Beta,
198        zero_shot: false,
199        gpu_support: true,
200        description: "W2NER nested entity extraction (grid-based)",
201        recommended_models: &["ljynlp/w2ner-bert-base"],
202    },
203    BackendInfo {
204        name: "deberta_v3",
205        feature: Some("onnx"),
206        status: BackendStatus::WIP,
207        zero_shot: false,
208        gpu_support: true,
209        description: "DeBERTa-v3 NER (requires local ONNX export via DEBERTA_MODEL_PATH)",
210        recommended_models: &[],
211    },
212    BackendInfo {
213        name: "albert",
214        feature: Some("onnx"),
215        status: BackendStatus::WIP,
216        zero_shot: false,
217        gpu_support: true,
218        description: "ALBERT NER (requires local ONNX export via ALBERT_MODEL_PATH)",
219        recommended_models: &[],
220    },
221    // =========================================================================
222    // Implemented Backends (Beta)
223    // =========================================================================
224    BackendInfo {
225        name: "gliner_candle",
226        feature: Some("candle"),
227        status: BackendStatus::Beta,
228        zero_shot: true,
229        gpu_support: true,
230        description: "GLiNER via Candle (pure Rust, Metal/CUDA)",
231        recommended_models: &[
232            // Default factory model (kept small to reduce friction).
233            "NeuML/gliner-bert-tiny",
234        ],
235    },
236    BackendInfo {
237        name: "nuner",
238        feature: Some("onnx"),
239        status: BackendStatus::Stable,
240        zero_shot: true,
241        gpu_support: true,
242        description: "NuNER Zero (token classifier, arbitrary-length entities)",
243        recommended_models: &["numind/NuNER_Zero", "numind/NuNER_Zero_4k"],
244    },
245    BackendInfo {
246        name: "candle_ner",
247        feature: Some("candle"),
248        status: BackendStatus::Beta,
249        zero_shot: false,
250        gpu_support: true,
251        description: "BERT NER via Candle (pure Rust; Metal/CUDA)",
252        recommended_models: &["dslim/bert-base-NER"],
253    },
254    // =========================================================================
255    // Planned Backends
256    // =========================================================================
257    BackendInfo {
258        name: "rust_bert",
259        feature: Some("rust-bert"),
260        status: BackendStatus::Planned,
261        zero_shot: false,
262        gpu_support: true,
263        description: "rust-bert integration (requires libtorch)",
264        recommended_models: &[
265            "bert-base-NER",
266            "dbmdz/bert-large-cased-finetuned-conll03-english",
267        ],
268    },
269    BackendInfo {
270        name: "gliner_poly",
271        feature: Some("onnx"),
272        status: BackendStatus::Planned,
273        zero_shot: true,
274        gpu_support: true,
275        description: "GLiNER Poly-encoder scaffolding (not wired for inference yet)",
276        recommended_models: &[],
277    },
278];
279
280impl BackendInfo {
281    /// Get backend by name.
282    #[must_use]
283    pub fn by_name(name: &str) -> Option<&'static BackendInfo> {
284        BACKEND_CATALOG.iter().find(|b| b.name == name)
285    }
286
287    /// Get all stable backends.
288    #[must_use]
289    pub fn stable() -> Vec<&'static BackendInfo> {
290        BACKEND_CATALOG
291            .iter()
292            .filter(|b| b.status == BackendStatus::Stable)
293            .collect()
294    }
295
296    /// Get all zero-shot capable backends.
297    #[must_use]
298    pub fn zero_shot() -> Vec<&'static BackendInfo> {
299        BACKEND_CATALOG.iter().filter(|b| b.zero_shot).collect()
300    }
301
302    /// Get all GPU-capable backends.
303    #[must_use]
304    pub fn with_gpu() -> Vec<&'static BackendInfo> {
305        BACKEND_CATALOG.iter().filter(|b| b.gpu_support).collect()
306    }
307}
308
309/// Print a summary of available backends.
310pub fn print_catalog() {
311    println!("NER Backend Catalog");
312    println!("{}", "=".repeat(80));
313    println!(
314        "{:15} {:10} {:8} {:5} {:5} Description",
315        "Name", "Feature", "Status", "0-shot", "GPU"
316    );
317    println!("{}", "-".repeat(80));
318
319    for backend in BACKEND_CATALOG {
320        let feature = backend.feature.unwrap_or("-");
321        let zero_shot = if backend.zero_shot { "yes" } else { "no" };
322        let gpu = if backend.gpu_support { "yes" } else { "no" };
323
324        println!(
325            "{:15} {:10} {:8} {:5} {:5} {}",
326            backend.name, feature, backend.status, zero_shot, gpu, backend.description
327        );
328    }
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334
335    #[test]
336    fn test_backend_lookup() {
337        assert!(BackendInfo::by_name("pattern").is_some());
338        assert!(BackendInfo::by_name("gliner").is_some());
339        assert!(BackendInfo::by_name("nonexistent").is_none());
340    }
341
342    #[test]
343    fn test_stable_backends() {
344        let stable = BackendInfo::stable();
345        assert!(!stable.is_empty());
346        assert!(stable.iter().all(|b| b.status == BackendStatus::Stable));
347    }
348
349    #[test]
350    fn test_zero_shot_backends() {
351        let zero_shot = BackendInfo::zero_shot();
352        assert!(!zero_shot.is_empty());
353        assert!(zero_shot.iter().all(|b| b.zero_shot));
354    }
355}