Skip to main content

sanitize_engine/
category.rs

1//! Data category types for classifying sensitive values.
2//!
3//! Each sensitive value detected belongs to a `Category`, which determines
4//! the format of its replacement. For example, emails are replaced with
5//! syntactically valid emails, IPv4 addresses with valid IPv4 addresses, etc.
6
7use compact_str::CompactString;
8use std::fmt;
9
10/// Classification of a sensitive data value. Determines the replacement format.
11#[derive(Debug, Clone, PartialEq, Eq, Hash)]
12#[non_exhaustive]
13pub enum Category {
14    // ── PII ────────────────────────────────────────────────────────────
15    /// Email addresses → preserve domain, hex username
16    Email,
17    /// Person names → synthetic name from hash-indexed table
18    Name,
19    /// Phone numbers → format-preserving numeric replacement
20    Phone,
21    /// Credit card numbers → format-preserving numeric replacement (fails Luhn)
22    CreditCard,
23    /// Social Security Numbers → `000-<hash>`-formatted replacement
24    Ssn,
25
26    // ── Network & Infrastructure ───────────────────────────────────────
27    /// IPv4 addresses → preserve dots, replace digit groups
28    IpV4,
29    /// IPv6 addresses → preserve colons/`::`, replace hex digits
30    IpV6,
31    /// MAC addresses → preserve `:` or `-` separators, replace hex digits
32    MacAddress,
33    /// Hostname / FQDN → preserve domain suffix, hex prefix
34    Hostname,
35    /// Docker / container hex IDs → replace hex digits
36    ContainerId,
37
38    // ── Application & Identity ─────────────────────────────────────────
39    /// UUIDs → preserve `-` dashes, replace hex digits
40    Uuid,
41    /// JSON Web Tokens → preserve `.` separators, replace base64url chars
42    Jwt,
43    /// Opaque auth tokens / API keys / bearer tokens
44    AuthToken,
45
46    // ── System & OS ────────────────────────────────────────────────────
47    /// File paths → preserve `/`, `\`, and extension; replace segment content
48    FilePath,
49    /// Windows Security Identifiers → preserve `S-` prefix and `-` separators
50    WindowsSid,
51
52    // ── Web ────────────────────────────────────────────────────────────
53    /// URLs → preserve scheme and structural chars (`://`, `/`, `?`, `=`, `&`)
54    Url,
55
56    // ── Cloud ──────────────────────────────────────────────────────────
57    /// AWS ARNs → preserve `:` and `/` separators, replace content segments
58    AwsArn,
59    /// Azure Resource IDs → preserve `/` structure and well-known segment names
60    AzureResourceId,
61
62    // ── Catch-all ──────────────────────────────────────────────────────
63    /// Arbitrary / user-defined category
64    Custom(CompactString),
65}
66
67impl fmt::Display for Category {
68    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
69        match self {
70            Category::Custom(name) => write!(f, "custom:{name}"),
71            other => f.write_str(other.as_str()),
72        }
73    }
74}
75
76impl Category {
77    /// Return the canonical string representation for this category.
78    #[must_use]
79    pub fn as_str(&self) -> &str {
80        match self {
81            Category::Email => "email",
82            Category::Name => "name",
83            Category::Phone => "phone",
84            Category::CreditCard => "credit_card",
85            Category::Ssn => "ssn",
86            Category::IpV4 => "ipv4",
87            Category::IpV6 => "ipv6",
88            Category::MacAddress => "mac_address",
89            Category::Hostname => "hostname",
90            Category::ContainerId => "container_id",
91            Category::Uuid => "uuid",
92            Category::Jwt => "jwt",
93            Category::AuthToken => "auth_token",
94            Category::FilePath => "file_path",
95            Category::WindowsSid => "windows_sid",
96            Category::Url => "url",
97            Category::AwsArn => "aws_arn",
98            Category::AzureResourceId => "azure_resource_id",
99            Category::Custom(name) => name.as_str(),
100        }
101    }
102
103    /// Return a stable string key used for HMAC domain separation.
104    ///
105    /// Equivalent to [`as_str()`](Self::as_str) for backward compatibility.
106    /// For HMAC domain separation use [`domain_tag_hmac()`](Self::domain_tag_hmac)
107    /// which prefixes custom categories to prevent collisions.
108    #[must_use]
109    pub fn domain_tag(&self) -> &str {
110        self.as_str()
111    }
112
113    /// Return a collision-safe string key for HMAC domain separation.
114    ///
115    /// For `Custom` categories, this includes the `custom:` prefix to
116    /// prevent collisions with built-in category tags (e.g. a
117    /// `Custom("email")` won't collide with the built-in `Email` tag).
118    #[must_use]
119    pub fn domain_tag_hmac(&self) -> String {
120        match self {
121            Category::Custom(name) => format!("custom:{name}"),
122            other => other.as_str().to_owned(),
123        }
124    }
125}