1use crate::{CloakError, Result};
4use serde::{Deserialize, Serialize};
5use std::cmp::Ordering;
6use std::fmt;
7use std::hash::{Hash, Hasher};
8
9#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
27pub struct PiiEntity {
28 pub entity_type: EntityType,
30 pub span: Span,
32 pub text: String,
34 pub confidence: Confidence,
36 pub recognizer_id: String,
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
52pub struct Span {
53 pub start: usize,
55 pub end: usize,
57}
58
59impl Span {
60 #[must_use]
62 pub const fn new(start: usize, end: usize) -> Self {
63 Self { start, end }
64 }
65
66 #[must_use]
68 pub const fn len(self) -> usize {
69 self.end.saturating_sub(self.start)
70 }
71
72 #[must_use]
74 pub const fn is_empty(self) -> bool {
75 self.start >= self.end
76 }
77
78 #[must_use]
80 pub const fn overlaps(self, other: Self) -> bool {
81 self.start < other.end && other.start < self.end
82 }
83}
84
85#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
96pub struct Confidence(f64);
97
98impl Confidence {
99 pub const ZERO: Self = Self(0.0);
101
102 pub const ONE: Self = Self(1.0);
104
105 pub fn new(value: f64) -> Result<Self> {
107 if value.is_finite() && (0.0..=1.0).contains(&value) {
108 Ok(Self(value))
109 } else {
110 Err(CloakError::InvalidConfidence(value))
111 }
112 }
113
114 #[must_use]
116 pub const fn value(self) -> f64 {
117 self.0
118 }
119}
120
121impl Default for Confidence {
122 fn default() -> Self {
123 Self::ONE
124 }
125}
126
127impl fmt::Display for Confidence {
128 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
129 write!(f, "{:.3}", self.0)
130 }
131}
132
133impl PartialEq for Confidence {
134 fn eq(&self, other: &Self) -> bool {
135 self.0.to_bits() == other.0.to_bits()
136 }
137}
138
139impl Eq for Confidence {}
140
141impl PartialOrd for Confidence {
142 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
143 Some(self.cmp(other))
144 }
145}
146
147impl Ord for Confidence {
148 fn cmp(&self, other: &Self) -> Ordering {
149 self.0.total_cmp(&other.0)
150 }
151}
152
153impl Hash for Confidence {
154 fn hash<H: Hasher>(&self, state: &mut H) {
155 self.0.to_bits().hash(state);
156 }
157}
158
159#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
169pub enum EntityType {
170 Email,
172 PhoneNumber,
174 CreditCard,
176 Iban,
178 IpAddress,
180 Url,
182 DateOfBirth,
184 ApiKey,
186 Jwt,
188 AwsAccessKey,
190 CryptoAddress,
192 MacAddress,
194 Hostname,
196 UserPath,
198 PassportNumber,
200 DriversLicense,
202 Ssn,
204 Bsn,
206 Nino,
208 NhsNumber,
210 Aadhaar,
212 Pan,
214 Cpf,
216 Cnpj,
218 SteuerID,
220 InseeNir,
222 Custom(String),
224}
225
226impl EntityType {
227 #[must_use]
229 pub fn redaction_tag(&self) -> String {
230 match self {
231 Self::Email => "[EMAIL]".to_string(),
232 Self::PhoneNumber => "[PHONE]".to_string(),
233 Self::CreditCard => "[CREDIT_CARD]".to_string(),
234 Self::Iban => "[IBAN]".to_string(),
235 Self::IpAddress => "[IP_ADDRESS]".to_string(),
236 Self::Url => "[URL]".to_string(),
237 Self::DateOfBirth => "[DOB]".to_string(),
238 Self::ApiKey => "[API_KEY]".to_string(),
239 Self::Jwt => "[JWT]".to_string(),
240 Self::AwsAccessKey => "[AWS_KEY]".to_string(),
241 Self::CryptoAddress => "[CRYPTO_ADDR]".to_string(),
242 Self::MacAddress => "[MAC_ADDR]".to_string(),
243 Self::Hostname => "[HOSTNAME]".to_string(),
244 Self::UserPath => "[USER_PATH]".to_string(),
245 Self::PassportNumber => "[PASSPORT]".to_string(),
246 Self::DriversLicense => "[DRIVERS_LICENSE]".to_string(),
247 Self::Ssn => "[SSN]".to_string(),
248 Self::Bsn => "[BSN]".to_string(),
249 Self::Nino => "[NINO]".to_string(),
250 Self::NhsNumber => "[NHS_NUMBER]".to_string(),
251 Self::Aadhaar => "[AADHAAR]".to_string(),
252 Self::Pan => "[PAN]".to_string(),
253 Self::Cpf => "[CPF]".to_string(),
254 Self::Cnpj => "[CNPJ]".to_string(),
255 Self::SteuerID => "[STEUER_ID]".to_string(),
256 Self::InseeNir => "[INSEE_NIR]".to_string(),
257 Self::Custom(name) => format!("[{}]", upper_snake(name)),
258 }
259 }
260}
261
262fn upper_snake(value: &str) -> String {
263 value
264 .chars()
265 .map(|c| {
266 if c.is_ascii_alphanumeric() {
267 c.to_ascii_uppercase()
268 } else {
269 '_'
270 }
271 })
272 .collect()
273}
274
275#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
285pub enum Locale {
286 Universal,
288 US,
290 NL,
292 UK,
294 DE,
296 FR,
298 IN,
300 BR,
302 EU,
304 Custom(String),
306}
307
308impl Locale {
309 #[must_use]
311 pub fn matches(&self, candidate: Self) -> bool {
312 candidate == Self::Universal || self == &candidate
313 }
314}
315
316#[cfg(test)]
317mod tests {
318 use super::*;
319
320 #[test]
321 fn test_confidence_new_valid_value_constructs() {
322 let confidence = Confidence::new(0.75).unwrap();
323 assert_eq!(confidence.value(), 0.75);
324 }
325
326 #[test]
327 fn test_confidence_new_above_one_rejects() {
328 assert!(Confidence::new(1.1).is_err());
329 }
330
331 #[test]
332 fn test_confidence_new_nan_rejects() {
333 assert!(Confidence::new(f64::NAN).is_err());
334 }
335
336 #[test]
337 fn test_confidence_ordering_sorts_low_to_high() {
338 let low = Confidence::new(0.2).unwrap();
339 let high = Confidence::new(0.9).unwrap();
340 assert!(low < high);
341 }
342
343 #[test]
344 fn test_span_len_with_ordered_offsets_returns_difference() {
345 assert_eq!(Span::new(4, 10).len(), 6);
346 }
347
348 #[test]
349 fn test_span_overlaps_when_ranges_intersect() {
350 assert!(Span::new(4, 10).overlaps(Span::new(8, 12)));
351 }
352
353 #[test]
354 fn test_entity_type_redaction_tag_for_custom_uppercases_name() {
355 assert_eq!(
356 EntityType::Custom("customer id".to_string()).redaction_tag(),
357 "[CUSTOMER_ID]"
358 );
359 }
360
361 #[test]
362 fn test_pii_entity_serializes_to_json() {
363 let entity = PiiEntity {
364 entity_type: EntityType::Email,
365 span: Span::new(0, 16),
366 text: "user@example.com".to_string(),
367 confidence: Confidence::new(0.95).unwrap(),
368 recognizer_id: "email_regex_v1".to_string(),
369 };
370
371 let json = serde_json::to_string(&entity).unwrap();
372 assert!(json.contains("user@example.com"));
373 }
374}