Skip to main content

sanitize_engine/
category.rs

1//! Data category types for classifying sensitive values.
2//!
3//! Each sensitive value detected belongs to a `Category`, which determines
4//! the format of its replacement. For example, emails are replaced with
5//! syntactically valid emails, IPv4 addresses with valid IPv4 addresses, etc.
6
7use compact_str::CompactString;
8use std::borrow::Cow;
9use std::fmt;
10
11/// Classification of a sensitive data value. Determines the replacement format.
12#[derive(Debug, Clone, PartialEq, Eq, Hash)]
13#[non_exhaustive]
14pub enum Category {
15    // ── PII ────────────────────────────────────────────────────────────
16    /// Email addresses → preserve domain, hex username
17    Email,
18    /// Person names → synthetic name from hash-indexed table
19    Name,
20    /// Phone numbers → format-preserving numeric replacement
21    Phone,
22    /// Credit card numbers → format-preserving numeric replacement (fails Luhn)
23    CreditCard,
24    /// Social Security Numbers → `000-<hash>`-formatted replacement
25    Ssn,
26
27    // ── Network & Infrastructure ───────────────────────────────────────
28    /// IPv4 addresses → preserve dots, replace digit groups
29    IpV4,
30    /// IPv6 addresses → preserve colons/`::`, replace hex digits
31    IpV6,
32    /// MAC addresses → preserve `:` or `-` separators, replace hex digits
33    MacAddress,
34    /// Hostname / FQDN → preserve domain suffix, hex prefix
35    Hostname,
36    /// Docker / container hex IDs → replace hex digits
37    ContainerId,
38
39    // ── Application & Identity ─────────────────────────────────────────
40    /// UUIDs → preserve `-` dashes, replace hex digits
41    Uuid,
42    /// JSON Web Tokens → preserve `.` separators, replace base64url chars
43    Jwt,
44    /// Opaque auth tokens / API keys / bearer tokens
45    AuthToken,
46
47    // ── System & OS ────────────────────────────────────────────────────
48    /// File paths → preserve `/`, `\`, and extension; replace segment content
49    FilePath,
50    /// Windows Security Identifiers → preserve `S-` prefix and `-` separators
51    WindowsSid,
52
53    // ── Web ────────────────────────────────────────────────────────────
54    /// URLs → preserve scheme and structural chars (`://`, `/`, `?`, `=`, `&`)
55    Url,
56
57    // ── Cloud ──────────────────────────────────────────────────────────
58    /// AWS ARNs → preserve `:` and `/` separators, replace content segments
59    AwsArn,
60    /// Azure Resource IDs → preserve `/` structure and well-known segment names
61    AzureResourceId,
62
63    // ── Catch-all ──────────────────────────────────────────────────────
64    /// Arbitrary / user-defined category
65    Custom(CompactString),
66}
67
68impl fmt::Display for Category {
69    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70        match self {
71            Category::Custom(name) => write!(f, "custom:{name}"),
72            other => f.write_str(other.as_str()),
73        }
74    }
75}
76
77impl Category {
78    /// Return the canonical string representation for this category.
79    #[must_use]
80    pub fn as_str(&self) -> &str {
81        match self {
82            Category::Email => "email",
83            Category::Name => "name",
84            Category::Phone => "phone",
85            Category::CreditCard => "credit_card",
86            Category::Ssn => "ssn",
87            Category::IpV4 => "ipv4",
88            Category::IpV6 => "ipv6",
89            Category::MacAddress => "mac_address",
90            Category::Hostname => "hostname",
91            Category::ContainerId => "container_id",
92            Category::Uuid => "uuid",
93            Category::Jwt => "jwt",
94            Category::AuthToken => "auth_token",
95            Category::FilePath => "file_path",
96            Category::WindowsSid => "windows_sid",
97            Category::Url => "url",
98            Category::AwsArn => "aws_arn",
99            Category::AzureResourceId => "azure_resource_id",
100            Category::Custom(name) => name.as_str(),
101        }
102    }
103
104    /// Return a collision-safe key for HMAC domain separation.
105    ///
106    /// For `Custom` categories this includes the `custom:` prefix so that
107    /// `Custom("email")` cannot collide with the built-in `Email` tag.
108    /// Returns `Borrowed` for all built-in variants (zero allocation) and
109    /// `Owned` only for `Custom` (one allocation per HMAC call).
110    #[must_use]
111    pub fn domain_tag_hmac(&self) -> Cow<'_, str> {
112        match self {
113            Category::Custom(name) => Cow::Owned(format!("custom:{name}")),
114            other => Cow::Borrowed(other.as_str()),
115        }
116    }
117}