use crate::capabilities::Capabilities;
use serde::{Deserialize, Serialize};
use std::fmt;
#[derive(Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
pub struct Language(String);
impl Language {
pub fn new(code: impl Into<String>) -> Self {
Self(code.into())
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl fmt::Display for Language {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
impl From<&str> for Language {
fn from(value: &str) -> Self {
Self::new(value)
}
}
impl From<String> for Language {
fn from(value: String) -> Self {
Self::new(value)
}
}
pub const LANGUAGE_EN: &str = "en";
pub const LANGUAGE_DE: &str = "de";
pub const LANGUAGE_ES: &str = "es";
#[derive(Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
pub enum EntityType {
Email,
Phone,
IpAddress,
Ipv6,
CreditCard,
Iban,
Ssn,
Itin,
TaxId,
Passport,
DriverLicense,
BankAccount,
RoutingNumber,
CryptoAddress,
MacAddress,
Uuid,
Vin,
Imei,
Url,
Domain,
Hostname,
Person,
Location,
Organization,
Custom(String),
}
impl EntityType {
pub fn as_str(&self) -> String {
match self {
EntityType::Email => "Email".to_string(),
EntityType::Phone => "Phone".to_string(),
EntityType::IpAddress => "IpAddress".to_string(),
EntityType::Ipv6 => "Ipv6".to_string(),
EntityType::CreditCard => "CreditCard".to_string(),
EntityType::Iban => "Iban".to_string(),
EntityType::Ssn => "Ssn".to_string(),
EntityType::Itin => "Itin".to_string(),
EntityType::TaxId => "TaxId".to_string(),
EntityType::Passport => "Passport".to_string(),
EntityType::DriverLicense => "DriverLicense".to_string(),
EntityType::BankAccount => "BankAccount".to_string(),
EntityType::RoutingNumber => "RoutingNumber".to_string(),
EntityType::CryptoAddress => "CryptoAddress".to_string(),
EntityType::MacAddress => "MacAddress".to_string(),
EntityType::Uuid => "Uuid".to_string(),
EntityType::Vin => "Vin".to_string(),
EntityType::Imei => "Imei".to_string(),
EntityType::Url => "Url".to_string(),
EntityType::Domain => "Domain".to_string(),
EntityType::Hostname => "Hostname".to_string(),
EntityType::Person => "Person".to_string(),
EntityType::Location => "Location".to_string(),
EntityType::Organization => "Organization".to_string(),
EntityType::Custom(name) => name.clone(),
}
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Token {
pub text: String,
pub start: usize,
pub end: usize,
pub lemma: Option<String>,
pub pos: Option<String>,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct NlpArtifacts {
pub language: Language,
pub text_len: usize,
pub tokens: Vec<Token>,
pub sentences: Vec<(usize, usize)>,
pub ner: Vec<NerSpan>,
pub capabilities: Capabilities,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct NerSpan {
pub entity_type: EntityType,
pub start: usize,
pub end: usize,
pub score: f32,
pub model: String,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Detection {
pub entity_type: EntityType,
pub start: usize,
pub end: usize,
pub score: f32,
pub recognizer: String,
pub explanation: DetectionExplanation,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum DetectionExplanation {
Regex { pattern_name: String },
Validator { validator: String, passed: bool },
Dictionary { source: String },
Ner { model: String, raw_score: f32 },
ContextBoost {
base: f32,
boost: f32,
matched_terms: Vec<String>,
},
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AnalyzeResult {
pub language: Language,
pub entities: Vec<Detection>,
pub capabilities: Capabilities,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AnonymizedItem {
pub entity: Detection,
pub replacement: String,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AnonymizeResult {
pub text: String,
pub items: Vec<AnonymizedItem>,
}