#![allow(missing_docs)]
pub mod addresses;
pub mod dates;
pub mod iban_intl;
pub mod orgs;
use crate::backends::inference::ZeroShotNER;
use crate::core::entity::{Entity, EntityType};
#[derive(Debug, Default, Clone)]
pub struct HeuristicFrNer;
impl HeuristicFrNer {
pub fn new() -> Self {
Self
}
}
static DEFAULT_TYPES: &[&str] = &["organization", "address", "date", "date_of_birth", "iban"];
impl ZeroShotNER for HeuristicFrNer {
fn default_types(&self) -> &[&'static str] {
DEFAULT_TYPES
}
fn extract_with_descriptions(
&self,
text: &str,
descriptions: &[&str],
threshold: f32,
) -> crate::Result<Vec<Entity>> {
self.extract_with_types(text, descriptions, threshold)
}
fn extract_with_types(
&self,
text: &str,
types: &[&str],
threshold: f32,
) -> crate::Result<Vec<Entity>> {
let mut out = Vec::new();
if types.contains(&"organization") {
out.extend(
orgs::extract_orgs(text)
.into_iter()
.filter(|e| f32::from(e.confidence) >= threshold),
);
}
if types.contains(&"address") {
out.extend(
addresses::extract_addresses(text)
.into_iter()
.filter(|e| f32::from(e.confidence) >= threshold),
);
}
if types.contains(&"date") || types.contains(&"date_of_birth") {
out.extend(
dates::extract_dates(text)
.into_iter()
.filter(|e| f32::from(e.confidence) >= threshold)
.filter(|e| match &e.entity_type {
EntityType::Date => types.contains(&"date"),
EntityType::Custom { name, .. } if name == "date_of_birth" => {
types.contains(&"date_of_birth")
}
_ => false,
}),
);
}
if types.contains(&"iban") {
out.extend(
iban_intl::extract_iban_intl(text)
.into_iter()
.filter(|e| f32::from(e.confidence) >= threshold),
);
}
Ok(out)
}
}
impl crate::Model for HeuristicFrNer {
fn extract_entities(
&self,
text: &str,
_language: Option<crate::Language>,
) -> crate::Result<Vec<Entity>> {
self.extract_with_types(text, DEFAULT_TYPES, 0.0)
}
fn supported_types(&self) -> Vec<EntityType> {
use crate::core::entity::EntityCategory;
vec![
EntityType::Organization,
EntityType::Date,
EntityType::custom("address", EntityCategory::Place),
EntityType::custom("date_of_birth", EntityCategory::Temporal),
EntityType::custom("iban", EntityCategory::Numeric),
]
}
fn is_available(&self) -> bool {
true
}
fn name(&self) -> &'static str {
"heuristic_fr"
}
fn description(&self) -> &'static str {
"French-specific heuristic NER (orgs, addresses, dates, intl IBANs)"
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detects_org_when_asked() {
let n = HeuristicFrNer::new();
let r = n
.extract_with_types("Acme Tech SAS est ici.", &["organization"], 0.5)
.unwrap();
assert_eq!(r.len(), 1);
}
#[test]
fn respects_type_filter() {
let n = HeuristicFrNer::new();
let r = n
.extract_with_types("Acme Tech SAS est ici.", &["address"], 0.5)
.unwrap();
assert!(r.is_empty());
}
#[test]
fn empty_types_returns_empty() {
let n = HeuristicFrNer::new();
let r = n
.extract_with_types("Acme Tech SAS est ici.", &[], 0.5)
.unwrap();
assert!(r.is_empty());
}
}