Skip to main content

anno/backends/
albert.rs

1//! ALBERT NER Backend
2//!
3//! ALBERT (A Lite BERT) is an efficient, smaller model that achieves competitive
4//! performance on NER tasks, especially in domain-specific scenarios.
5//!
6//! # Architecture
7//!
8//! ALBERT improves efficiency over BERT with:
9//! - **Factorized embedding parameterization**: Shares embeddings across layers
10//! - **Cross-layer parameter sharing**: Reduces model size significantly
11//! - **Smaller model size**: 11MB vs 110MB for BERT-base
12//! - **Domain-specific performance**: Excellent for biomedical and specialized domains
13//!
14//! # Research
15//!
16//! Treat ALBERT as a size/latency trade-off option; any quality claims should be
17//! established via the `anno` eval harness for the specific dataset/task mix.
18//!
19//! # Usage
20//!
21//! ```rust,ignore
22//! use anno::backends::albert::ALBERTNER;
23//!
24//! let model = ALBERTNER::new("albert-base-v2")?;
25//! let entities = model.extract_entities("Steve Jobs founded Apple.", None)?;
26//! ```
27
28use crate::{Entity, EntityType, Model, Result};
29
30#[cfg(feature = "onnx")]
31use crate::backends::onnx::BertNEROnnx;
32
33/// ALBERT NER backend using ONNX Runtime.
34///
35/// Currently wraps BertNEROnnx with ALBERT model support.
36/// ALBERT models use the same ONNX interface as BERT.
37pub struct ALBERTNER {
38    #[cfg(feature = "onnx")]
39    inner: BertNEROnnx,
40    /// Model name for debugging/logging (e.g., "albert-base-v2")
41    #[allow(dead_code)] // Reserved for future logging/debugging
42    model_name: String,
43}
44
45impl ALBERTNER {
46    /// Create a new ALBERT NER model.
47    ///
48    /// # Arguments
49    /// * `model_name` - HuggingFace model ID (e.g., "albert-base-v2")
50    pub fn new(model_name: &str) -> Result<Self> {
51        #[cfg(feature = "onnx")]
52        {
53            // ALBERT uses same ONNX interface as BERT
54            let inner = BertNEROnnx::new(model_name)?;
55            Ok(Self {
56                inner,
57                model_name: model_name.to_string(),
58            })
59        }
60        #[cfg(not(feature = "onnx"))]
61        {
62            Err(crate::Error::FeatureNotAvailable(
63                "ALBERT NER requires 'onnx' feature".to_string(),
64            ))
65        }
66    }
67}
68
69impl Model for ALBERTNER {
70    fn extract_entities(&self, text: &str, language: Option<&str>) -> Result<Vec<Entity>> {
71        #[cfg(feature = "onnx")]
72        {
73            self.inner.extract_entities(text, language)
74        }
75        #[cfg(not(feature = "onnx"))]
76        {
77            Err(crate::Error::FeatureNotAvailable(
78                "ALBERT NER requires 'onnx' feature".to_string(),
79            ))
80        }
81    }
82
83    fn supported_types(&self) -> Vec<EntityType> {
84        vec![
85            EntityType::Person,
86            EntityType::Organization,
87            EntityType::Location,
88        ]
89    }
90
91    fn is_available(&self) -> bool {
92        #[cfg(feature = "onnx")]
93        {
94            self.inner.is_available()
95        }
96        #[cfg(not(feature = "onnx"))]
97        {
98            false
99        }
100    }
101
102    fn name(&self) -> &'static str {
103        "albert"
104    }
105
106    fn description(&self) -> &'static str {
107        "ALBERT NER - efficient, small model (11MB) with competitive performance"
108    }
109
110    fn capabilities(&self) -> crate::ModelCapabilities {
111        crate::ModelCapabilities {
112            batch_capable: true,
113            streaming_capable: true,
114            ..Default::default()
115        }
116    }
117}
118
119impl crate::NamedEntityCapable for ALBERTNER {}
120
121impl crate::BatchCapable for ALBERTNER {
122    fn extract_entities_batch(
123        &self,
124        texts: &[&str],
125        language: Option<&str>,
126    ) -> Result<Vec<Vec<Entity>>> {
127        #[cfg(feature = "onnx")]
128        {
129            self.inner.extract_entities_batch(texts, language)
130        }
131        #[cfg(not(feature = "onnx"))]
132        {
133            Err(crate::Error::FeatureNotAvailable(
134                "ALBERT NER requires 'onnx' feature".to_string(),
135            ))
136        }
137    }
138}
139
140impl crate::StreamingCapable for ALBERTNER {
141    fn extract_entities_streaming(&self, chunk: &str, offset: usize) -> Result<Vec<Entity>> {
142        #[cfg(feature = "onnx")]
143        {
144            self.inner.extract_entities_streaming(chunk, offset)
145        }
146        #[cfg(not(feature = "onnx"))]
147        {
148            Err(crate::Error::FeatureNotAvailable(
149                "ALBERT NER requires 'onnx' feature".to_string(),
150            ))
151        }
152    }
153}
154
155#[cfg(test)]
156mod tests {
157    use super::*;
158
159    #[test]
160    fn test_albert_name() {
161        if let Ok(model) = ALBERTNER::new("albert-base-v2") {
162            assert_eq!(model.name(), "albert");
163        }
164        // If model creation fails (e.g., feature not enabled), test is skipped
165    }
166}