Skip to main content

stackforge_core/anonymize/
hash.rs

1//! Salted consistent hashing for identifiers.
2//!
3//! Provides deterministic hashing of MAC addresses, QUIC Connection IDs,
4//! and other byte-string identifiers using a session-specific salt.
5//! The same salt + identifier always produces the same output within a
6//! session, allowing ML models to track entities over time.
7
8use std::hash::{Hash, Hasher};
9
10/// Consistent salted hasher for identifiers.
11#[derive(Debug, Clone)]
12pub struct SaltedHasher {
13    salt: [u8; 32],
14}
15
16impl SaltedHasher {
17    /// Create a new hasher with the given 32-byte salt.
18    #[must_use]
19    pub fn new(salt: [u8; 32]) -> Self {
20        Self { salt }
21    }
22
23    /// Hash a MAC address (6 bytes) to a pseudonymous 6-byte MAC.
24    ///
25    /// Uses SipHash-1-3 (Rust's `DefaultHasher`) keyed with the salt.
26    /// The 64-bit hash output is truncated to 48 bits.
27    #[must_use]
28    pub fn hash_mac(&self, mac: &[u8; 6]) -> [u8; 6] {
29        let h = self.hash_bytes(mac);
30        let bytes = h.to_le_bytes();
31        [bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5]]
32    }
33
34    /// Hash a MAC address preserving the OUI (first 3 bytes).
35    ///
36    /// This allows ML models to identify device manufacturers while
37    /// still anonymizing the NIC-specific portion.
38    #[must_use]
39    pub fn hash_mac_preserve_oui(&self, mac: &[u8; 6]) -> [u8; 6] {
40        let hashed = self.hash_mac(mac);
41        [mac[0], mac[1], mac[2], hashed[3], hashed[4], hashed[5]]
42    }
43
44    /// Hash an arbitrary byte slice to a 64-bit value.
45    #[must_use]
46    pub fn hash_bytes(&self, data: &[u8]) -> u64 {
47        let mut hasher = std::collections::hash_map::DefaultHasher::new();
48        self.salt.hash(&mut hasher);
49        data.hash(&mut hasher);
50        hasher.finish()
51    }
52}
53
54#[cfg(test)]
55mod tests {
56    use super::*;
57
58    fn test_salt() -> [u8; 32] {
59        let mut salt = [0u8; 32];
60        for (i, b) in salt.iter_mut().enumerate() {
61            *b = (i * 7 + 3) as u8;
62        }
63        salt
64    }
65
66    #[test]
67    fn test_mac_hash_deterministic() {
68        let h = SaltedHasher::new(test_salt());
69        let mac = [0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF];
70        assert_eq!(h.hash_mac(&mac), h.hash_mac(&mac));
71    }
72
73    #[test]
74    fn test_mac_hash_changes_output() {
75        let h = SaltedHasher::new(test_salt());
76        let mac = [0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF];
77        let hashed = h.hash_mac(&mac);
78        // Extremely unlikely to be identical
79        assert_ne!(mac, hashed);
80    }
81
82    #[test]
83    fn test_different_salt_different_output() {
84        let mut salt2 = test_salt();
85        salt2[0] = 0xFF;
86        let h1 = SaltedHasher::new(test_salt());
87        let h2 = SaltedHasher::new(salt2);
88        let mac = [0x00, 0x11, 0x22, 0x33, 0x44, 0x55];
89        assert_ne!(h1.hash_mac(&mac), h2.hash_mac(&mac));
90    }
91
92    #[test]
93    fn test_preserve_oui() {
94        let h = SaltedHasher::new(test_salt());
95        let mac = [0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF];
96        let hashed = h.hash_mac_preserve_oui(&mac);
97        // OUI preserved
98        assert_eq!(hashed[0], 0xAA);
99        assert_eq!(hashed[1], 0xBB);
100        assert_eq!(hashed[2], 0xCC);
101    }
102
103    #[test]
104    fn test_hash_bytes_consistent() {
105        let h = SaltedHasher::new(test_salt());
106        let data = b"test-connection-id";
107        assert_eq!(h.hash_bytes(data), h.hash_bytes(data));
108    }
109
110    #[test]
111    fn test_different_inputs_different_hashes() {
112        let h = SaltedHasher::new(test_salt());
113        let mac1 = [0x00, 0x11, 0x22, 0x33, 0x44, 0x55];
114        let mac2 = [0x00, 0x11, 0x22, 0x33, 0x44, 0x56];
115        assert_ne!(h.hash_mac(&mac1), h.hash_mac(&mac2));
116    }
117}