Skip to main content

cloakrs_core/
finding.rs

1//! Detection result types.
2
3use crate::{CloakError, Result};
4use serde::{Deserialize, Serialize};
5use std::cmp::Ordering;
6use std::fmt;
7use std::hash::{Hash, Hasher};
8
9/// A detected PII entity within text.
10///
11/// # Examples
12///
13/// ```
14/// use cloakrs_core::{Confidence, EntityType, PiiEntity, Span};
15///
16/// let entity = PiiEntity {
17///     entity_type: EntityType::Email,
18///     span: Span::new(11, 27),
19///     text: "user@example.com".to_string(),
20///     confidence: Confidence::new(0.95).unwrap(),
21///     recognizer_id: "email_regex_v1".to_string(),
22/// };
23///
24/// assert_eq!(entity.span.len(), 16);
25/// ```
26#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
27pub struct PiiEntity {
28    /// The type of PII detected.
29    pub entity_type: EntityType,
30    /// Byte offset range in the source text, using `[start, end)`.
31    pub span: Span,
32    /// The matched value.
33    pub text: String,
34    /// Confidence score from `0.0` to `1.0`.
35    pub confidence: Confidence,
36    /// Identifier of the recognizer that produced this finding.
37    pub recognizer_id: String,
38}
39
40/// Byte offset range in source text, using `[start, end)`.
41///
42/// # Examples
43///
44/// ```
45/// use cloakrs_core::Span;
46///
47/// let span = Span::new(3, 8);
48/// assert_eq!(span.len(), 5);
49/// assert!(!span.is_empty());
50/// ```
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
52pub struct Span {
53    /// Inclusive start byte offset.
54    pub start: usize,
55    /// Exclusive end byte offset.
56    pub end: usize,
57}
58
59impl Span {
60    /// Creates a new span.
61    #[must_use]
62    pub const fn new(start: usize, end: usize) -> Self {
63        Self { start, end }
64    }
65
66    /// Returns the span length in bytes.
67    #[must_use]
68    pub const fn len(self) -> usize {
69        self.end.saturating_sub(self.start)
70    }
71
72    /// Returns `true` when this span contains no bytes.
73    #[must_use]
74    pub const fn is_empty(self) -> bool {
75        self.start >= self.end
76    }
77
78    /// Returns `true` if the two spans overlap.
79    #[must_use]
80    pub const fn overlaps(self, other: Self) -> bool {
81        self.start < other.end && other.start < self.end
82    }
83}
84
85/// Confidence score wrapper guaranteed to contain a value from `0.0` to `1.0`.
86///
87/// # Examples
88///
89/// ```
90/// use cloakrs_core::Confidence;
91///
92/// let confidence = Confidence::new(0.8).unwrap();
93/// assert_eq!(confidence.value(), 0.8);
94/// ```
95#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
96pub struct Confidence(f64);
97
98impl Confidence {
99    /// The lowest possible confidence value.
100    pub const ZERO: Self = Self(0.0);
101
102    /// The highest possible confidence value.
103    pub const ONE: Self = Self(1.0);
104
105    /// Creates a confidence score if the value is finite and within `0.0..=1.0`.
106    pub fn new(value: f64) -> Result<Self> {
107        if value.is_finite() && (0.0..=1.0).contains(&value) {
108            Ok(Self(value))
109        } else {
110            Err(CloakError::InvalidConfidence(value))
111        }
112    }
113
114    /// Returns the wrapped numeric value.
115    #[must_use]
116    pub const fn value(self) -> f64 {
117        self.0
118    }
119}
120
121impl Default for Confidence {
122    fn default() -> Self {
123        Self::ONE
124    }
125}
126
127impl fmt::Display for Confidence {
128    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
129        write!(f, "{:.3}", self.0)
130    }
131}
132
133impl PartialEq for Confidence {
134    fn eq(&self, other: &Self) -> bool {
135        self.0.to_bits() == other.0.to_bits()
136    }
137}
138
139impl Eq for Confidence {}
140
141impl PartialOrd for Confidence {
142    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
143        Some(self.cmp(other))
144    }
145}
146
147impl Ord for Confidence {
148    fn cmp(&self, other: &Self) -> Ordering {
149        self.0.total_cmp(&other.0)
150    }
151}
152
153impl Hash for Confidence {
154    fn hash<H: Hasher>(&self, state: &mut H) {
155        self.0.to_bits().hash(state);
156    }
157}
158
159/// All supported PII entity types.
160///
161/// # Examples
162///
163/// ```
164/// use cloakrs_core::EntityType;
165///
166/// assert_eq!(EntityType::Email.redaction_tag(), "[EMAIL]");
167/// ```
168#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
169pub enum EntityType {
170    /// Email address.
171    Email,
172    /// Phone number.
173    PhoneNumber,
174    /// Payment card number.
175    CreditCard,
176    /// International Bank Account Number.
177    Iban,
178    /// IP address.
179    IpAddress,
180    /// URL.
181    Url,
182    /// Date of birth.
183    DateOfBirth,
184    /// Generic API key.
185    ApiKey,
186    /// JSON Web Token.
187    Jwt,
188    /// AWS access key.
189    AwsAccessKey,
190    /// Cryptocurrency wallet address.
191    CryptoAddress,
192    /// MAC address.
193    MacAddress,
194    /// Internal hostname or machine name.
195    Hostname,
196    /// User home-directory path.
197    UserPath,
198    /// Passport number.
199    PassportNumber,
200    /// Driver's license number.
201    DriversLicense,
202    /// United States Social Security Number.
203    Ssn,
204    /// Dutch Burgerservicenummer.
205    Bsn,
206    /// UK National Insurance number.
207    Nino,
208    /// UK NHS number.
209    NhsNumber,
210    /// Indian Aadhaar number.
211    Aadhaar,
212    /// Indian PAN card number.
213    Pan,
214    /// Brazilian CPF.
215    Cpf,
216    /// Brazilian CNPJ.
217    Cnpj,
218    /// German tax identifier.
219    SteuerID,
220    /// French INSEE/NIR number.
221    InseeNir,
222    /// User-defined entity type.
223    Custom(String),
224}
225
226impl EntityType {
227    /// Returns the redaction tag for this entity type.
228    #[must_use]
229    pub fn redaction_tag(&self) -> String {
230        match self {
231            Self::Email => "[EMAIL]".to_string(),
232            Self::PhoneNumber => "[PHONE]".to_string(),
233            Self::CreditCard => "[CREDIT_CARD]".to_string(),
234            Self::Iban => "[IBAN]".to_string(),
235            Self::IpAddress => "[IP_ADDRESS]".to_string(),
236            Self::Url => "[URL]".to_string(),
237            Self::DateOfBirth => "[DOB]".to_string(),
238            Self::ApiKey => "[API_KEY]".to_string(),
239            Self::Jwt => "[JWT]".to_string(),
240            Self::AwsAccessKey => "[AWS_KEY]".to_string(),
241            Self::CryptoAddress => "[CRYPTO_ADDR]".to_string(),
242            Self::MacAddress => "[MAC_ADDR]".to_string(),
243            Self::Hostname => "[HOSTNAME]".to_string(),
244            Self::UserPath => "[USER_PATH]".to_string(),
245            Self::PassportNumber => "[PASSPORT]".to_string(),
246            Self::DriversLicense => "[DRIVERS_LICENSE]".to_string(),
247            Self::Ssn => "[SSN]".to_string(),
248            Self::Bsn => "[BSN]".to_string(),
249            Self::Nino => "[NINO]".to_string(),
250            Self::NhsNumber => "[NHS_NUMBER]".to_string(),
251            Self::Aadhaar => "[AADHAAR]".to_string(),
252            Self::Pan => "[PAN]".to_string(),
253            Self::Cpf => "[CPF]".to_string(),
254            Self::Cnpj => "[CNPJ]".to_string(),
255            Self::SteuerID => "[STEUER_ID]".to_string(),
256            Self::InseeNir => "[INSEE_NIR]".to_string(),
257            Self::Custom(name) => format!("[{}]", upper_snake(name)),
258        }
259    }
260}
261
262fn upper_snake(value: &str) -> String {
263    value
264        .chars()
265        .map(|c| {
266            if c.is_ascii_alphanumeric() {
267                c.to_ascii_uppercase()
268            } else {
269                '_'
270            }
271        })
272        .collect()
273}
274
275/// Locale selector used to choose locale-specific recognizers.
276///
277/// # Examples
278///
279/// ```
280/// use cloakrs_core::Locale;
281///
282/// assert!(Locale::US.matches(Locale::Universal));
283/// ```
284#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
285pub enum Locale {
286    /// Universal recognizers that apply to all locales.
287    Universal,
288    /// United States.
289    US,
290    /// Netherlands.
291    NL,
292    /// United Kingdom.
293    UK,
294    /// Germany.
295    DE,
296    /// France.
297    FR,
298    /// India.
299    IN,
300    /// Brazil.
301    BR,
302    /// European Union meta-locale.
303    EU,
304    /// Custom BCP-47-like locale string.
305    Custom(String),
306}
307
308impl Locale {
309    /// Returns true if `candidate` is universal or equals this locale.
310    #[must_use]
311    pub fn matches(&self, candidate: Self) -> bool {
312        candidate == Self::Universal || self == &candidate
313    }
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319
320    #[test]
321    fn test_confidence_new_valid_value_constructs() {
322        let confidence = Confidence::new(0.75).unwrap();
323        assert_eq!(confidence.value(), 0.75);
324    }
325
326    #[test]
327    fn test_confidence_new_above_one_rejects() {
328        assert!(Confidence::new(1.1).is_err());
329    }
330
331    #[test]
332    fn test_confidence_new_nan_rejects() {
333        assert!(Confidence::new(f64::NAN).is_err());
334    }
335
336    #[test]
337    fn test_confidence_ordering_sorts_low_to_high() {
338        let low = Confidence::new(0.2).unwrap();
339        let high = Confidence::new(0.9).unwrap();
340        assert!(low < high);
341    }
342
343    #[test]
344    fn test_span_len_with_ordered_offsets_returns_difference() {
345        assert_eq!(Span::new(4, 10).len(), 6);
346    }
347
348    #[test]
349    fn test_span_overlaps_when_ranges_intersect() {
350        assert!(Span::new(4, 10).overlaps(Span::new(8, 12)));
351    }
352
353    #[test]
354    fn test_entity_type_redaction_tag_for_custom_uppercases_name() {
355        assert_eq!(
356            EntityType::Custom("customer id".to_string()).redaction_tag(),
357            "[CUSTOMER_ID]"
358        );
359    }
360
361    #[test]
362    fn test_pii_entity_serializes_to_json() {
363        let entity = PiiEntity {
364            entity_type: EntityType::Email,
365            span: Span::new(0, 16),
366            text: "user@example.com".to_string(),
367            confidence: Confidence::new(0.95).unwrap(),
368            recognizer_id: "email_regex_v1".to_string(),
369        };
370
371        let json = serde_json::to_string(&entity).unwrap();
372        assert!(json.contains("user@example.com"));
373    }
374}