Skip to main content

anno/backends/
albert.rs

1//! ALBERT NER Backend
2//!
3//! ALBERT (A Lite BERT) is an efficient, smaller model that achieves competitive
4//! performance on NER tasks, especially in domain-specific scenarios.
5//!
6//! # Architecture
7//!
8//! ALBERT improves efficiency over BERT with:
9//! - **Factorized embedding parameterization**: Shares embeddings across layers
10//! - **Cross-layer parameter sharing**: Reduces model size significantly
11//! - **Smaller model size**: 11MB vs 110MB for BERT-base
12//! - **Domain-specific performance**: Excellent for biomedical and specialized domains
13//!
14//! # Research
15//!
16//! Treat ALBERT as a size/latency trade-off option; any quality claims should be
17//! established via the `anno` eval harness for the specific dataset/task mix.
18//!
19//! # Usage
20//!
21//! ```rust,ignore
22//! use anno::backends::albert::ALBERTNER;
23//!
24//! let model = ALBERTNER::new("albert-base-v2")?;
25//! let entities = model.extract_entities("Steve Jobs founded Apple.", None)?;
26//! ```
27
28use crate::{Entity, EntityType, Model, Result};
29
30#[cfg(feature = "onnx")]
31use crate::backends::onnx::BertNEROnnx;
32
33/// ALBERT NER backend using ONNX Runtime.
34///
35/// Currently wraps BertNEROnnx with ALBERT model support.
36/// ALBERT models use the same ONNX interface as BERT.
37pub struct ALBERTNER {
38    #[cfg(feature = "onnx")]
39    inner: BertNEROnnx,
40    /// Model name for debugging/logging (e.g., "albert-base-v2")
41    #[allow(dead_code)] // Reserved for future logging/debugging
42    model_name: String,
43}
44
45impl ALBERTNER {
46    /// Create a new ALBERT NER model.
47    ///
48    /// # Arguments
49    /// * `model_name` - HuggingFace model ID (e.g., "albert-base-v2")
50    pub fn new(model_name: &str) -> Result<Self> {
51        #[cfg(feature = "onnx")]
52        {
53            // ALBERT uses same ONNX interface as BERT
54            let inner = BertNEROnnx::new(model_name)?;
55            Ok(Self {
56                inner,
57                model_name: model_name.to_string(),
58            })
59        }
60        #[cfg(not(feature = "onnx"))]
61        {
62            Err(crate::Error::FeatureNotAvailable(
63                "ALBERT NER requires 'onnx' feature".to_string(),
64            ))
65        }
66    }
67}
68
69impl Model for ALBERTNER {
70    fn extract_entities(&self, text: &str, language: Option<&str>) -> Result<Vec<Entity>> {
71        #[cfg(feature = "onnx")]
72        {
73            self.inner.extract_entities(text, language)
74        }
75        #[cfg(not(feature = "onnx"))]
76        {
77            Err(crate::Error::FeatureNotAvailable(
78                "ALBERT NER requires 'onnx' feature".to_string(),
79            ))
80        }
81    }
82
83    fn supported_types(&self) -> Vec<EntityType> {
84        vec![
85            EntityType::Person,
86            EntityType::Organization,
87            EntityType::Location,
88        ]
89    }
90
91    fn is_available(&self) -> bool {
92        #[cfg(feature = "onnx")]
93        {
94            self.inner.is_available()
95        }
96        #[cfg(not(feature = "onnx"))]
97        {
98            false
99        }
100    }
101
102    fn name(&self) -> &'static str {
103        "albert"
104    }
105
106    fn description(&self) -> &'static str {
107        "ALBERT NER - efficient, small model (11MB) with competitive performance"
108    }
109}
110
111impl crate::BatchCapable for ALBERTNER {
112    fn extract_entities_batch(
113        &self,
114        texts: &[&str],
115        language: Option<&str>,
116    ) -> Result<Vec<Vec<Entity>>> {
117        #[cfg(feature = "onnx")]
118        {
119            self.inner.extract_entities_batch(texts, language)
120        }
121        #[cfg(not(feature = "onnx"))]
122        {
123            Err(crate::Error::FeatureNotAvailable(
124                "ALBERT NER requires 'onnx' feature".to_string(),
125            ))
126        }
127    }
128}
129
130impl crate::StreamingCapable for ALBERTNER {
131    fn extract_entities_streaming(&self, chunk: &str, offset: usize) -> Result<Vec<Entity>> {
132        #[cfg(feature = "onnx")]
133        {
134            self.inner.extract_entities_streaming(chunk, offset)
135        }
136        #[cfg(not(feature = "onnx"))]
137        {
138            Err(crate::Error::FeatureNotAvailable(
139                "ALBERT NER requires 'onnx' feature".to_string(),
140            ))
141        }
142    }
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148
149    #[test]
150    fn test_albert_name() {
151        if let Ok(model) = ALBERTNER::new("albert-base-v2") {
152            assert_eq!(model.name(), "albert");
153        }
154        // If model creation fails (e.g., feature not enabled), test is skipped
155    }
156}