sanitize_engine/category.rs
1//! Data category types for classifying sensitive values.
2//!
3//! Each sensitive value detected belongs to a `Category`, which determines
4//! the format of its replacement. For example, emails are replaced with
5//! syntactically valid emails, IPv4 addresses with valid IPv4 addresses, etc.
6
7use compact_str::CompactString;
8use std::borrow::Cow;
9use std::fmt;
10
11/// Classification of a sensitive data value. Determines the replacement format.
12#[derive(Debug, Clone, PartialEq, Eq, Hash)]
13#[non_exhaustive]
14pub enum Category {
15 // ── PII ────────────────────────────────────────────────────────────
16 /// Email addresses → preserve domain, hex username
17 Email,
18 /// Person names → synthetic name from hash-indexed table
19 Name,
20 /// Phone numbers → format-preserving numeric replacement
21 Phone,
22 /// Credit card numbers → format-preserving numeric replacement (fails Luhn)
23 CreditCard,
24 /// Social Security Numbers → `000-<hash>`-formatted replacement
25 Ssn,
26
27 // ── Network & Infrastructure ───────────────────────────────────────
28 /// IPv4 addresses → preserve dots, replace digit groups
29 IpV4,
30 /// IPv6 addresses → preserve colons/`::`, replace hex digits
31 IpV6,
32 /// MAC addresses → preserve `:` or `-` separators, replace hex digits
33 MacAddress,
34 /// Hostname / FQDN → preserve domain suffix, hex prefix
35 Hostname,
36 /// Docker / container hex IDs → replace hex digits
37 ContainerId,
38
39 // ── Application & Identity ─────────────────────────────────────────
40 /// UUIDs → preserve `-` dashes, replace hex digits
41 Uuid,
42 /// JSON Web Tokens → preserve `.` separators, replace base64url chars
43 Jwt,
44 /// Opaque auth tokens / API keys / bearer tokens
45 AuthToken,
46
47 // ── System & OS ────────────────────────────────────────────────────
48 /// File paths → preserve `/`, `\`, and extension; replace segment content
49 FilePath,
50 /// Windows Security Identifiers → preserve `S-` prefix and `-` separators
51 WindowsSid,
52
53 // ── Web ────────────────────────────────────────────────────────────
54 /// URLs → preserve scheme and structural chars (`://`, `/`, `?`, `=`, `&`)
55 Url,
56
57 // ── Cloud ──────────────────────────────────────────────────────────
58 /// AWS ARNs → preserve `:` and `/` separators, replace content segments
59 AwsArn,
60 /// Azure Resource IDs → preserve `/` structure and well-known segment names
61 AzureResourceId,
62
63 // ── Catch-all ──────────────────────────────────────────────────────
64 /// Arbitrary / user-defined category
65 Custom(CompactString),
66}
67
68impl fmt::Display for Category {
69 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70 match self {
71 Category::Custom(name) => write!(f, "custom:{name}"),
72 other => f.write_str(other.as_str()),
73 }
74 }
75}
76
77impl Category {
78 /// Return the canonical string representation for this category.
79 #[must_use]
80 pub fn as_str(&self) -> &str {
81 match self {
82 Category::Email => "email",
83 Category::Name => "name",
84 Category::Phone => "phone",
85 Category::CreditCard => "credit_card",
86 Category::Ssn => "ssn",
87 Category::IpV4 => "ipv4",
88 Category::IpV6 => "ipv6",
89 Category::MacAddress => "mac_address",
90 Category::Hostname => "hostname",
91 Category::ContainerId => "container_id",
92 Category::Uuid => "uuid",
93 Category::Jwt => "jwt",
94 Category::AuthToken => "auth_token",
95 Category::FilePath => "file_path",
96 Category::WindowsSid => "windows_sid",
97 Category::Url => "url",
98 Category::AwsArn => "aws_arn",
99 Category::AzureResourceId => "azure_resource_id",
100 Category::Custom(name) => name.as_str(),
101 }
102 }
103
104 /// Return a collision-safe key for HMAC domain separation.
105 ///
106 /// For `Custom` categories this includes the `custom:` prefix so that
107 /// `Custom("email")` cannot collide with the built-in `Email` tag.
108 /// Returns `Borrowed` for all built-in variants (zero allocation) and
109 /// `Owned` only for `Custom` (one allocation per HMAC call).
110 #[must_use]
111 pub fn domain_tag_hmac(&self) -> Cow<'_, str> {
112 match self {
113 Category::Custom(name) => Cow::Owned(format!("custom:{name}")),
114 other => Cow::Borrowed(other.as_str()),
115 }
116 }
117}