sanitize_engine/category.rs
1//! Data category types for classifying sensitive values.
2//!
3//! Each sensitive value detected belongs to a `Category`, which determines
4//! the format of its replacement. For example, emails are replaced with
5//! syntactically valid emails, IPv4 addresses with valid IPv4 addresses, etc.
6
7use compact_str::CompactString;
8use std::fmt;
9
10/// Classification of a sensitive data value. Determines the replacement format.
11#[derive(Debug, Clone, PartialEq, Eq, Hash)]
12#[non_exhaustive]
13pub enum Category {
14 // ── PII ────────────────────────────────────────────────────────────
15 /// Email addresses → preserve domain, hex username
16 Email,
17 /// Person names → synthetic name from hash-indexed table
18 Name,
19 /// Phone numbers → format-preserving numeric replacement
20 Phone,
21 /// Credit card numbers → format-preserving numeric replacement (fails Luhn)
22 CreditCard,
23 /// Social Security Numbers → `000-<hash>`-formatted replacement
24 Ssn,
25
26 // ── Network & Infrastructure ───────────────────────────────────────
27 /// IPv4 addresses → preserve dots, replace digit groups
28 IpV4,
29 /// IPv6 addresses → preserve colons/`::`, replace hex digits
30 IpV6,
31 /// MAC addresses → preserve `:` or `-` separators, replace hex digits
32 MacAddress,
33 /// Hostname / FQDN → preserve domain suffix, hex prefix
34 Hostname,
35 /// Docker / container hex IDs → replace hex digits
36 ContainerId,
37
38 // ── Application & Identity ─────────────────────────────────────────
39 /// UUIDs → preserve `-` dashes, replace hex digits
40 Uuid,
41 /// JSON Web Tokens → preserve `.` separators, replace base64url chars
42 Jwt,
43 /// Opaque auth tokens / API keys / bearer tokens
44 AuthToken,
45
46 // ── System & OS ────────────────────────────────────────────────────
47 /// File paths → preserve `/`, `\`, and extension; replace segment content
48 FilePath,
49 /// Windows Security Identifiers → preserve `S-` prefix and `-` separators
50 WindowsSid,
51
52 // ── Web ────────────────────────────────────────────────────────────
53 /// URLs → preserve scheme and structural chars (`://`, `/`, `?`, `=`, `&`)
54 Url,
55
56 // ── Cloud ──────────────────────────────────────────────────────────
57 /// AWS ARNs → preserve `:` and `/` separators, replace content segments
58 AwsArn,
59 /// Azure Resource IDs → preserve `/` structure and well-known segment names
60 AzureResourceId,
61
62 // ── Catch-all ──────────────────────────────────────────────────────
63 /// Arbitrary / user-defined category
64 Custom(CompactString),
65}
66
67impl fmt::Display for Category {
68 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
69 match self {
70 Category::Custom(name) => write!(f, "custom:{name}"),
71 other => f.write_str(other.as_str()),
72 }
73 }
74}
75
76impl Category {
77 /// Return the canonical string representation for this category.
78 #[must_use]
79 pub fn as_str(&self) -> &str {
80 match self {
81 Category::Email => "email",
82 Category::Name => "name",
83 Category::Phone => "phone",
84 Category::CreditCard => "credit_card",
85 Category::Ssn => "ssn",
86 Category::IpV4 => "ipv4",
87 Category::IpV6 => "ipv6",
88 Category::MacAddress => "mac_address",
89 Category::Hostname => "hostname",
90 Category::ContainerId => "container_id",
91 Category::Uuid => "uuid",
92 Category::Jwt => "jwt",
93 Category::AuthToken => "auth_token",
94 Category::FilePath => "file_path",
95 Category::WindowsSid => "windows_sid",
96 Category::Url => "url",
97 Category::AwsArn => "aws_arn",
98 Category::AzureResourceId => "azure_resource_id",
99 Category::Custom(name) => name.as_str(),
100 }
101 }
102
103 /// Return a stable string key used for HMAC domain separation.
104 ///
105 /// Equivalent to [`as_str()`](Self::as_str) for backward compatibility.
106 /// For HMAC domain separation use [`domain_tag_hmac()`](Self::domain_tag_hmac)
107 /// which prefixes custom categories to prevent collisions.
108 #[must_use]
109 pub fn domain_tag(&self) -> &str {
110 self.as_str()
111 }
112
113 /// Return a collision-safe string key for HMAC domain separation.
114 ///
115 /// For `Custom` categories, this includes the `custom:` prefix to
116 /// prevent collisions with built-in category tags (e.g. a
117 /// `Custom("email")` won't collide with the built-in `Email` tag).
118 #[must_use]
119 pub fn domain_tag_hmac(&self) -> String {
120 match self {
121 Category::Custom(name) => format!("custom:{name}"),
122 other => other.as_str().to_owned(),
123 }
124 }
125}