#![cfg(feature = "gliner2-fastino")]
pub(crate) mod config;
pub(crate) mod decoder;
pub mod errors;
pub(crate) mod nms;
pub(crate) mod pipeline;
pub(crate) mod pipeline_iobinding;
pub(crate) mod processor;
pub mod schema;
pub(crate) mod sessions;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ExecutionMode {
#[default]
Standard,
IoBinding,
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct GLiNER2FastinoConfig {
pub onnx: crate::backends::hf_loader::OnnxSessionConfig,
pub execution_mode: ExecutionMode,
}
impl Default for GLiNER2FastinoConfig {
fn default() -> Self {
Self {
onnx: crate::backends::hf_loader::OnnxSessionConfig::default(),
execution_mode: ExecutionMode::Standard,
}
}
}
impl GLiNER2FastinoConfig {
#[must_use]
pub fn with_execution_mode(mut self, mode: ExecutionMode) -> Self {
self.execution_mode = mode;
self
}
#[must_use]
pub fn with_onnx(mut self, onnx: crate::backends::hf_loader::OnnxSessionConfig) -> Self {
self.onnx = onnx;
self
}
#[must_use]
pub fn with_onnx_provider_preferences(
mut self,
use_cpu_provider: bool,
prefer_coreml: bool,
prefer_cuda: bool,
) -> Self {
self.onnx.use_cpu_provider = use_cpu_provider;
self.onnx.prefer_coreml = prefer_coreml;
self.onnx.prefer_cuda = prefer_cuda;
self
}
}
pub enum BatchSchemaMode<'a> {
Shared(&'a [&'a str]),
PerSample(&'a [Vec<&'a str>]),
}
pub struct GLiNER2Fastino {
#[allow(dead_code)] pub(crate) tokenizer: tokenizers::Tokenizer,
#[allow(dead_code)] pub(crate) special: processor::SpecialTokenIds,
pub(crate) transformer: processor::SchemaTransformer,
pub(crate) config: config::FastinoConfig,
pub(crate) sessions: sessions::Sessions,
pub(crate) model_id: String,
pub(crate) execution_mode: ExecutionMode,
}
impl std::fmt::Debug for GLiNER2Fastino {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("GLiNER2Fastino")
.field("model_id", &self.model_id)
.field("hidden_size", &self.config.hidden_size)
.finish()
}
}
use std::path::Path;
impl GLiNER2Fastino {
pub fn from_local_with_config(
model_dir: &Path,
cfg: GLiNER2FastinoConfig,
) -> crate::Result<Self> {
if model_dir.join("adapter_config.json").exists() {
return Err(errors::Error::LoraAdapterNotSupported {
path: model_dir.to_path_buf(),
}
.into());
}
let (sessions, subdir) = sessions::Sessions::from_dir_with_cfg_mode(
model_dir,
cfg.onnx.clone(),
cfg.execution_mode,
)?;
let tokenizer_path = if subdir.join("tokenizer.json").exists() {
subdir.join("tokenizer.json")
} else {
model_dir.join("tokenizer.json")
};
if !tokenizer_path.exists() {
return Err(errors::Error::TokenizerMissing(tokenizer_path).into());
}
let tokenizer = crate::backends::hf_loader::load_tokenizer(&tokenizer_path)
.map_err(|e| crate::Error::Backend(format!("gliner2_fastino: tokenizer: {e}")))?;
let special = processor::SpecialTokenIds::resolve(&tokenizer)?;
let transformer = processor::SchemaTransformer::new(tokenizer.clone())?;
let config_path = if subdir.join("config.json").exists() {
subdir.join("config.json")
} else {
model_dir.join("config.json")
};
let model_config = if config_path.exists() {
config::FastinoConfig::from_path(&config_path)?
} else {
config::FastinoConfig::default()
};
Ok(Self {
tokenizer,
special,
transformer,
config: model_config,
sessions,
model_id: model_dir
.file_name()
.map(|s| s.to_string_lossy().into_owned())
.unwrap_or_else(|| "gliner2_fastino_local".to_string()),
execution_mode: cfg.execution_mode,
})
}
pub fn from_local_with_options(
model_dir: &Path,
cfg: crate::backends::hf_loader::OnnxSessionConfig,
) -> crate::Result<Self> {
Self::from_local_with_config(
model_dir,
GLiNER2FastinoConfig {
onnx: cfg,
execution_mode: ExecutionMode::Standard,
},
)
}
pub fn from_local(model_dir: &Path) -> crate::Result<Self> {
Self::from_local_with_options(
model_dir,
crate::backends::hf_loader::OnnxSessionConfig::default(),
)
}
pub(crate) fn extract_ner(
&self,
text: &str,
types: &[&str],
threshold: f32,
) -> crate::Result<Vec<crate::Entity>> {
if types.is_empty() {
return Ok(vec![]);
}
let labels: Vec<String> = types.iter().map(|s| s.to_string()).collect();
let task = processor::SchemaTask::Entities(labels.clone());
let record = self.transformer.transform(text, &[task])?;
let num_words = record.word_to_char_maps.len();
if num_words == 0 {
return Ok(vec![]);
}
let task_map = record.tasks.first().ok_or_else(|| {
crate::Error::Backend("gliner2_fastino: transformer produced no task mapping".into())
})?;
let (scorer_out, pred_count) = pipeline_iobinding::run_pipeline_dispatch(
&self.sessions,
&record,
task_map,
self.execution_mode,
)?;
if pred_count == 0 {
return Ok(vec![]);
}
let entities = pipeline::decode_entities(
text,
&record,
task_map,
&scorer_out,
pred_count,
threshold,
false,
);
Ok(entities)
}
pub fn from_pretrained(model_id: &str) -> crate::Result<Self> {
Self::from_pretrained_with_config(model_id, GLiNER2FastinoConfig::default())
}
pub fn from_pretrained_with_config(
model_id: &str,
cfg: GLiNER2FastinoConfig,
) -> crate::Result<Self> {
let api = crate::backends::hf_loader::hf_api()
.map_err(|e| crate::Error::Backend(format!("gliner2_fastino: hf_api: {e}")))?;
let repo = api.model(model_id.to_string());
let tokenizer_path = crate::backends::hf_loader::download_model_file(
&repo,
&[
"fp32_v2/tokenizer.json",
"fp16_v2/tokenizer.json",
"tokenizer.json",
],
)
.map_err(|e| crate::Error::Backend(format!("gliner2_fastino: download tokenizer: {e}")))?;
let _ = crate::backends::hf_loader::download_model_file(
&repo,
&["fp32_v2/config.json", "fp16_v2/config.json", "config.json"],
);
let bases = [
"encoder",
"token_gather",
"span_rep",
"schema_gather",
"count_pred_argmax",
"count_lstm_fixed",
"scorer",
"classifier",
];
for base in &bases {
let candidates = [
format!("fp32_v2/{base}_fp32.onnx"),
format!("fp16_v2/{base}_fp16.onnx"),
];
let candidate_refs: Vec<&str> = candidates.iter().map(String::as_str).collect();
crate::backends::hf_loader::download_model_file(&repo, &candidate_refs).map_err(
|e| crate::Error::Backend(format!("gliner2_fastino: download {base}: {e}")),
)?;
}
let mut snapshot_dir = tokenizer_path.parent().ok_or_else(|| {
crate::Error::Backend("gliner2_fastino: tokenizer has no parent".into())
})?;
loop {
let has_dtype_subdir = ["fp32_v2", "fp16_v2", "fp32", "fp16"]
.iter()
.any(|sub| snapshot_dir.join(sub).is_dir());
if has_dtype_subdir {
break;
}
match snapshot_dir.parent() {
Some(p) => snapshot_dir = p,
None => break, }
}
let mut model = Self::from_local_with_config(snapshot_dir, cfg)?;
model.model_id = model_id.to_string();
Ok(model)
}
}
use crate::backends::inference::ZeroShotNER;
use crate::{EntityCategory, EntityType, Language};
impl crate::Model for GLiNER2Fastino {
fn extract_entities(
&self,
text: &str,
_language: Option<Language>,
) -> crate::Result<Vec<crate::Entity>> {
self.extract_ner(text, &["person", "organization", "location", "date"], 0.5)
}
fn supported_types(&self) -> Vec<EntityType> {
vec![
EntityType::Person,
EntityType::Organization,
EntityType::Location,
EntityType::Date,
EntityType::custom("misc", EntityCategory::Misc),
]
}
fn is_available(&self) -> bool {
true
}
fn name(&self) -> &'static str {
"GLiNER2Fastino"
}
fn description(&self) -> &'static str {
"fastino-ai GLiNER2 (NER + classification, ONNX, experimental)"
}
fn capabilities(&self) -> crate::ModelCapabilities {
crate::ModelCapabilities {
zero_shot: true,
..Default::default()
}
}
fn as_zero_shot(&self) -> Option<&dyn ZeroShotNER> {
Some(self)
}
}
impl ZeroShotNER for GLiNER2Fastino {
fn default_types(&self) -> &[&'static str] {
&["person", "organization", "location", "date", "event"]
}
fn extract_with_types(
&self,
text: &str,
types: &[&str],
threshold: f32,
) -> crate::Result<Vec<crate::Entity>> {
self.extract_ner(text, types, threshold)
}
fn extract_with_descriptions(
&self,
text: &str,
descriptions: &[&str],
threshold: f32,
) -> crate::Result<Vec<crate::Entity>> {
self.extract_ner(text, descriptions, threshold)
}
}
impl GLiNER2Fastino {
pub fn extract_with_label_descriptions(
&self,
text: &str,
labeled: &[(&str, &str)],
threshold: f32,
) -> crate::Result<Vec<crate::Entity>> {
if labeled.is_empty() {
return Ok(vec![]);
}
let owned: Vec<(String, String)> = labeled
.iter()
.map(|(l, d)| (l.to_string(), d.to_string()))
.collect();
let task = processor::SchemaTask::EntitiesDescribed(owned);
let record = self.transformer.transform(text, &[task])?;
let num_words = record.word_to_char_maps.len();
if num_words == 0 {
return Ok(vec![]);
}
let task_map = record.tasks.first().ok_or_else(|| {
crate::Error::Backend("gliner2_fastino: transformer produced no task mapping".into())
})?;
let (scorer_out, pred_count) = pipeline_iobinding::run_pipeline_dispatch(
&self.sessions,
&record,
task_map,
self.execution_mode,
)?;
if pred_count == 0 {
return Ok(vec![]);
}
Ok(pipeline::decode_entities(
text,
&record,
task_map,
&scorer_out,
pred_count,
threshold,
false,
))
}
pub fn extract_with_label_thresholds(
&self,
text: &str,
label_thresholds: &[(&str, f32)],
) -> crate::Result<Vec<crate::Entity>> {
if label_thresholds.is_empty() {
return Ok(vec![]);
}
let labels: Vec<String> = label_thresholds
.iter()
.map(|(l, _)| l.to_string())
.collect();
let task = processor::SchemaTask::Entities(labels);
let record = self.transformer.transform(text, &[task])?;
let num_words = record.word_to_char_maps.len();
if num_words == 0 {
return Ok(vec![]);
}
let task_map = record.tasks.first().ok_or_else(|| {
crate::Error::Backend("gliner2_fastino: transformer produced no task mapping".into())
})?;
let (scorer_out, pred_count) = pipeline_iobinding::run_pipeline_dispatch(
&self.sessions,
&record,
task_map,
self.execution_mode,
)?;
if pred_count == 0 {
return Ok(vec![]);
}
Ok(pipeline::decode_entities_with_thresholds(
text,
&record,
task_map,
&scorer_out,
pred_count,
label_thresholds,
false,
))
}
pub fn extract_structure(
&self,
text: &str,
schema: &schema::TaskSchema,
threshold: f32,
) -> crate::Result<Vec<schema::ExtractedStructure>> {
if schema.structures.is_empty() {
return Ok(vec![]);
}
let mut all_results: Vec<schema::ExtractedStructure> = Vec::new();
for st in &schema.structures {
if st.fields.is_empty() {
continue; }
let fields_owned: Vec<(String, schema::FieldType)> = st
.fields
.iter()
.map(|f| (f.name.clone(), f.field_type))
.collect();
let task = processor::SchemaTask::Structures(st.name.clone(), fields_owned.clone());
let record = self.transformer.transform(text, &[task])?;
let num_words = record.word_to_char_maps.len();
if num_words == 0 {
continue;
}
let task_map = record.tasks.first().ok_or_else(|| {
crate::Error::Backend(
"gliner2_fastino: transformer produced no task mapping".into(),
)
})?;
let (scorer_out, pred_count) = pipeline_iobinding::run_pipeline_dispatch(
&self.sessions,
&record,
task_map,
self.execution_mode,
)?;
if pred_count == 0 {
continue;
}
let task_results = pipeline::decode_structure(
text,
&record,
task_map,
&scorer_out,
pred_count,
threshold,
&fields_owned,
);
all_results.extend(task_results);
}
Ok(all_results)
}
pub fn classify(
&self,
text: &str,
labels: &[&str],
_threshold: f32,
) -> crate::Result<Vec<(String, f32)>> {
if labels.is_empty() {
return Ok(vec![]);
}
let label_strings: Vec<String> = labels.iter().map(|s| s.to_string()).collect();
let task = processor::SchemaTask::Classifications(
"classification".to_string(),
label_strings.clone(),
);
let record = self.transformer.transform(text, &[task])?;
let task_map = record.tasks.first().ok_or_else(|| {
crate::Error::Backend("gliner2_fastino: transformer produced no task mapping".into())
})?;
let probs = pipeline_iobinding::run_classify_dispatch(
&self.sessions,
&record,
task_map,
self.execution_mode,
)?;
let mut out: Vec<(String, f32)> = label_strings.into_iter().zip(probs).collect();
out.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
Ok(out)
}
pub fn batch_extract_with_schema_mode(
&self,
texts: &[&str],
schema: BatchSchemaMode<'_>,
threshold: f32,
) -> crate::Result<Vec<Vec<crate::Entity>>> {
let mut out: Vec<Vec<crate::Entity>> = Vec::with_capacity(texts.len());
match schema {
BatchSchemaMode::Shared(labels) => {
for text in texts {
out.push(self.extract_ner(text, labels, threshold)?);
}
}
BatchSchemaMode::PerSample(per_text_labels) => {
if per_text_labels.len() != texts.len() {
return Err(crate::Error::Backend(format!(
"gliner2_fastino: PerSample label count {} != texts count {}",
per_text_labels.len(),
texts.len()
)));
}
for (text, labels_owned) in texts.iter().zip(per_text_labels.iter()) {
let labels: Vec<&str> = labels_owned.to_vec();
out.push(self.extract_ner(text, &labels, threshold)?);
}
}
}
Ok(out)
}
pub fn batch_extract_streaming<F>(
&self,
texts: &[&str],
types: &[&str],
threshold: f32,
batch_size: usize,
mut on_batch: F,
) -> crate::Result<()>
where
F: FnMut(usize, &[crate::Entity]),
{
if batch_size == 0 {
return Err(crate::Error::Backend(
"gliner2_fastino: batch_size must be > 0".into(),
));
}
let mut cursor = 0;
while cursor < texts.len() {
let end = (cursor + batch_size).min(texts.len());
for (offset, text) in texts[cursor..end].iter().enumerate() {
let idx = cursor + offset;
let ents = self.extract_ner(text, types, threshold)?;
on_batch(idx, &ents);
}
cursor = end;
}
Ok(())
}
}
#[cfg(test)]
mod streaming_tests {
#[test]
fn streaming_chunking_indices_are_contiguous_and_complete() {
let texts: Vec<&str> = (0..10)
.map(|i| match i {
0 => "zero",
1 => "one",
2 => "two",
3 => "three",
4 => "four",
5 => "five",
6 => "six",
7 => "seven",
8 => "eight",
_ => "nine",
})
.collect();
let mut chunks_seen: Vec<(usize, usize)> = Vec::new();
let batch_size = 3;
let mut cursor = 0;
while cursor < texts.len() {
let end = (cursor + batch_size).min(texts.len());
chunks_seen.push((cursor, end));
cursor = end;
}
assert_eq!(chunks_seen, vec![(0, 3), (3, 6), (6, 9), (9, 10)]);
let mut exact: Vec<(usize, usize)> = Vec::new();
let mut cursor = 0;
let n = 9;
while cursor < n {
let end = (cursor + batch_size).min(n);
exact.push((cursor, end));
cursor = end;
}
assert_eq!(exact, vec![(0, 3), (3, 6), (6, 9)]);
let mut single: Vec<(usize, usize)> = Vec::new();
let mut cursor = 0;
let n = 1;
while cursor < n {
let end = (cursor + batch_size).min(n);
single.push((cursor, end));
cursor = end;
}
assert_eq!(single, vec![(0, 1)]);
}
}
#[cfg(test)]
mod from_local_tests {
use super::*;
use std::fs;
use tempfile::tempdir;
#[test]
fn from_local_rejects_lora_adapter_dir() {
let dir = tempdir().unwrap();
fs::write(dir.path().join("adapter_config.json"), "{}").unwrap();
let err = GLiNER2Fastino::from_local(dir.path()).unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("scripts/gliner2_export_onnx.py"),
"missing script path: {msg}"
);
assert!(msg.contains("--lora-adapter"), "missing flag: {msg}");
}
#[test]
fn from_local_missing_tokenizer_returns_typed_error() {
let dir = tempdir().unwrap();
let err = GLiNER2Fastino::from_local(dir.path()).unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("tokenizer") || msg.contains("no complete v2 session set"),
"got {msg}"
);
}
#[test]
fn from_local_empty_dir_returns_session_set_error() {
let dir = tempdir().unwrap();
let fixture = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("../../testdata/gliner2_fastino/stub_tokenizer.json");
fs::copy(&fixture, dir.path().join("tokenizer.json")).unwrap();
fs::write(
dir.path().join("config.json"),
r#"{"hidden_size": 768, "counting_layer": "count_lstm_v2"}"#,
)
.unwrap();
let err = GLiNER2Fastino::from_local(dir.path()).unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("no complete v2 session set"),
"Phase 3 should report missing sessions, not 'Phase 3 needed'. Got: {msg}"
);
}
#[test]
fn schema_types_reachable_via_gliner2_fastino_path() {
use crate::backends::gliner2_fastino::schema::{
ExtractedStructure, FieldType, StructureTask, TaskSchema,
};
let _schema: TaskSchema = TaskSchema::new().with_structure(
StructureTask::new("invoice")
.with_field("vendor", FieldType::String)
.with_field("amount", FieldType::String),
);
let _es: ExtractedStructure = ExtractedStructure {
structure_type: "invoice".to_string(),
fields: std::collections::HashMap::new(),
};
}
#[test]
fn config_defaults_are_standard_mode() {
let cfg = GLiNER2FastinoConfig::default();
assert_eq!(cfg.execution_mode, ExecutionMode::Standard);
assert!(!cfg.onnx.prefer_cuda);
assert!(!cfg.onnx.prefer_coreml);
}
#[test]
fn execution_mode_default_is_standard() {
assert_eq!(ExecutionMode::default(), ExecutionMode::Standard);
}
#[test]
fn from_local_with_options_delegates_to_config_with_standard_mode() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("adapter_config.json"), "{}").unwrap();
let err1 = GLiNER2Fastino::from_local_with_options(
dir.path(),
crate::backends::hf_loader::OnnxSessionConfig::default(),
)
.unwrap_err();
let err2 =
GLiNER2Fastino::from_local_with_config(dir.path(), GLiNER2FastinoConfig::default())
.unwrap_err();
assert!(err1.to_string().contains("scripts/gliner2_export_onnx.py"));
assert!(err2.to_string().contains("scripts/gliner2_export_onnx.py"));
}
#[test]
fn engine_is_send_and_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<GLiNER2Fastino>();
}
}