Skip to main content

mkt_core/
pii.rs

1//! Normalization and hashing of personally identifiable information (PII)
2//! for audience uploads.
3//!
4//! Every ad platform (Meta, Google Customer Match, TikTok DMP, LinkedIn DMP)
5//! requires user identifiers to be SHA-256 hashed after normalization.
6//! The normalization rules implemented here follow the strictest common
7//! contract so a single hasher works across all providers:
8//!
9//! - **Emails**: trim whitespace, lowercase.
10//! - **Phones**: digits only (strip `+`, spaces, dashes, parentheses),
11//!   drop leading zeros. Callers should pass numbers with country code.
12//!
13//! Values that already look like SHA-256 hex digests (64 hex chars) are
14//! passed through unchanged so callers can mix pre-hashed and raw data.
15
16use sha2::{Digest, Sha256};
17
18/// Returns `true` if the value already looks like a SHA-256 hex digest.
19fn is_sha256_hex(value: &str) -> bool {
20    value.len() == 64 && value.chars().all(|c| c.is_ascii_hexdigit())
21}
22
23/// SHA-256 hash a value, returning the lowercase hex digest.
24#[must_use]
25pub fn sha256_hex(value: &str) -> String {
26    let mut hasher = Sha256::new();
27    hasher.update(value.as_bytes());
28    hex::encode(hasher.finalize())
29}
30
31/// Normalize an email address per the cross-platform contract:
32/// trim surrounding whitespace and lowercase.
33#[must_use]
34pub fn normalize_email(email: &str) -> String {
35    email.trim().to_lowercase()
36}
37
38/// Normalize a phone number per the cross-platform contract:
39/// keep digits only and strip leading zeros.
40#[must_use]
41pub fn normalize_phone(phone: &str) -> String {
42    let digits: String = phone.chars().filter(char::is_ascii_digit).collect();
43    let trimmed = digits.trim_start_matches('0');
44    trimmed.to_string()
45}
46
47/// Normalize and hash an email address.
48///
49/// Already-hashed values (64 hex chars) are passed through unchanged.
50#[must_use]
51pub fn hash_email(email: &str) -> String {
52    if is_sha256_hex(email) {
53        return email.to_lowercase();
54    }
55    sha256_hex(&normalize_email(email))
56}
57
58/// Normalize and hash a phone number.
59///
60/// Already-hashed values (64 hex chars) are passed through unchanged.
61#[must_use]
62pub fn hash_phone(phone: &str) -> String {
63    if is_sha256_hex(phone) {
64        return phone.to_lowercase();
65    }
66    sha256_hex(&normalize_phone(phone))
67}
68
69#[cfg(test)]
70mod tests {
71    use super::*;
72
73    /// SHA-256 of "test@example.com" (well-known reference vector).
74    const TEST_EMAIL_HASH: &str =
75        "973dfe463ec85785f5f95af5ba3906eedb2d931c24e69824a89ea65dba4e813b";
76
77    #[test]
78    fn sha256_hex_known_vector() {
79        assert_eq!(sha256_hex("test@example.com"), TEST_EMAIL_HASH);
80    }
81
82    #[test]
83    fn normalize_email_trims_and_lowercases() {
84        assert_eq!(
85            normalize_email("  John.Doe@Example.COM  "),
86            "john.doe@example.com"
87        );
88    }
89
90    #[test]
91    fn normalize_phone_strips_symbols_and_leading_zeros() {
92        assert_eq!(normalize_phone("+1 (555) 123-4567"), "15551234567");
93        assert_eq!(normalize_phone("0044 20 7946 0958"), "442079460958");
94    }
95
96    #[test]
97    fn hash_email_normalizes_before_hashing() {
98        // "  Test@Example.COM " normalizes to "test@example.com".
99        assert_eq!(hash_email("  Test@Example.COM "), TEST_EMAIL_HASH);
100    }
101
102    #[test]
103    fn hash_email_passes_through_existing_hash() {
104        let upper = TEST_EMAIL_HASH.to_uppercase();
105        assert_eq!(hash_email(TEST_EMAIL_HASH), TEST_EMAIL_HASH);
106        assert_eq!(hash_email(&upper), TEST_EMAIL_HASH);
107    }
108
109    #[test]
110    fn hash_phone_normalizes_before_hashing() {
111        assert_eq!(hash_phone("+1 (555) 123-4567"), sha256_hex("15551234567"));
112    }
113
114    #[test]
115    fn hash_phone_passes_through_existing_hash() {
116        let hashed = sha256_hex("15551234567");
117        assert_eq!(hash_phone(&hashed), hashed);
118    }
119
120    #[test]
121    fn non_hash_64_char_value_is_hashed_not_passed_through() {
122        // 64 chars but not hex — must be hashed, not passed through.
123        let value = "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz";
124        assert_ne!(hash_email(value), value);
125    }
126}