stackforge_core/anonymize/policy.rs
1//! Anonymization policy configuration.
2//!
3//! Defines the strategies applied to each protocol field during flow
4//! anonymization. Users construct an [`AnonymizationPolicy`] describing
5//! the desired privacy-utility trade-off, and pass it to the
6//! [`AnonymizationEngine`](super::engine::AnonymizationEngine).
7
8/// How to anonymize IPv4/IPv6 addresses.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum IpAnonymizationMode {
11 /// No anonymization -- IPs pass through unchanged.
12 None,
13 /// Prefix-preserving anonymization via Crypto-PAn (AES-128).
14 ///
15 /// Two addresses sharing a *k*-bit prefix will still share a *k*-bit
16 /// prefix after anonymization, preserving subnet topology for ML models.
17 CryptoPan,
18}
19
20/// How to anonymize MAC addresses.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum MacAnonymizationMode {
23 /// No anonymization.
24 None,
25 /// Full salted hash (all 6 bytes). Destroys OUI information.
26 SaltedHash,
27 /// Preserve the OUI (first 3 bytes) and hash only the NIC-specific
28 /// portion. Allows ML models to identify device manufacturers.
29 SaltedHashPreserveOui,
30}
31
32/// How to anonymize transport ports.
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34pub enum PortAnonymizationMode {
35 /// No anonymization.
36 None,
37 /// Preserve well-known destination ports (0-1023) for service
38 /// identification; generalize source/ephemeral ports to category
39 /// sentinels (0 = well-known, 1024 = registered, 49152 = ephemeral).
40 PreserveWellKnown,
41 /// Generalize all ports to category sentinels.
42 Categorize,
43}
44
45/// How to anonymize timestamps.
46#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub enum TimestampAnonymizationMode {
48 /// No anonymization.
49 None,
50 /// Shift all timestamps by a random epoch offset (preserves perfect
51 /// ordering and all relative durations). The offset is generated once
52 /// per engine session.
53 EpochShift,
54 /// Epoch shift plus bounded per-timestamp jitter. The `jitter_ms`
55 /// value is the maximum uniform noise added to each timestamp.
56 ///
57 /// **Warning**: jitter may invert ordering of very close timestamps.
58 /// Use small values (1-10 ms) for safety.
59 EpochShiftWithJitter {
60 /// Maximum jitter in milliseconds.
61 jitter_ms: u32,
62 },
63}
64
65/// How to handle TCP sequence/acknowledgment numbers.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum TcpSeqAnonymizationMode {
68 /// No anonymization.
69 None,
70 /// Add a random per-flow offset to all sequence and acknowledgment
71 /// numbers. Preserves relative differences (bytes in flight,
72 /// retransmission detection) while hiding absolute values.
73 RandomOffset,
74}
75
76/// How to handle reassembled payload data.
77#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78pub enum PayloadAnonymizationMode {
79 /// No anonymization -- full payload retained.
80 None,
81 /// Remove all reassembled payload data.
82 TruncateAll,
83 /// Keep only the first *n* bytes of each direction's reassembled stream.
84 TruncateTo(usize),
85}
86
87/// Master anonymization policy.
88///
89/// Controls which cryptographic primitives and strategies are applied to
90/// each protocol field category during flow export.
91///
92/// # Example
93///
94/// ```rust
95/// use stackforge_core::anonymize::AnonymizationPolicy;
96///
97/// // ML-optimized defaults: prefix-preserving IPs, hashed MACs,
98/// // well-known ports preserved, epoch-shifted timestamps, payload stripped.
99/// let policy = AnonymizationPolicy::ml_optimized();
100/// ```
101#[derive(Debug, Clone)]
102pub struct AnonymizationPolicy {
103 /// IP address anonymization strategy.
104 pub ip_mode: IpAnonymizationMode,
105 /// MAC address anonymization strategy (applied if MAC data is
106 /// present in flow metadata -- currently informational for future
107 /// packet-level anonymization).
108 pub mac_mode: MacAnonymizationMode,
109 /// Transport port anonymization strategy.
110 pub port_mode: PortAnonymizationMode,
111 /// Timestamp anonymization strategy.
112 pub timestamp_mode: TimestampAnonymizationMode,
113 /// TCP sequence number anonymization strategy.
114 pub tcp_seq_mode: TcpSeqAnonymizationMode,
115 /// Reassembled payload handling.
116 pub payload_mode: PayloadAnonymizationMode,
117 /// 32-byte key for Crypto-PAn. First 16 bytes = AES-128 key,
118 /// last 16 bytes = padding material.
119 ///
120 /// If `None` and `ip_mode` is `CryptoPan`, a random key is generated.
121 pub crypto_pan_key: Option<[u8; 32]>,
122 /// 32-byte salt for consistent hashing (MAC addresses, connection IDs).
123 ///
124 /// If `None`, a random salt is generated per engine session.
125 pub hash_salt: Option<[u8; 32]>,
126}
127
128impl Default for AnonymizationPolicy {
129 /// Default policy: no anonymization.
130 fn default() -> Self {
131 Self {
132 ip_mode: IpAnonymizationMode::None,
133 mac_mode: MacAnonymizationMode::None,
134 port_mode: PortAnonymizationMode::None,
135 timestamp_mode: TimestampAnonymizationMode::None,
136 tcp_seq_mode: TcpSeqAnonymizationMode::None,
137 payload_mode: PayloadAnonymizationMode::None,
138 crypto_pan_key: None,
139 hash_salt: None,
140 }
141 }
142}
143
144impl AnonymizationPolicy {
145 /// Policy optimized for machine learning on network flows.
146 ///
147 /// - IPs: Crypto-PAn (preserves subnet topology)
148 /// - Ports: well-known destination ports preserved
149 /// - Timestamps: epoch shift (perfect ordering)
150 /// - TCP seq: random per-flow offset
151 /// - Payloads: fully truncated
152 #[must_use]
153 pub fn ml_optimized() -> Self {
154 Self {
155 ip_mode: IpAnonymizationMode::CryptoPan,
156 mac_mode: MacAnonymizationMode::SaltedHash,
157 port_mode: PortAnonymizationMode::PreserveWellKnown,
158 timestamp_mode: TimestampAnonymizationMode::EpochShift,
159 tcp_seq_mode: TcpSeqAnonymizationMode::RandomOffset,
160 payload_mode: PayloadAnonymizationMode::TruncateAll,
161 crypto_pan_key: None,
162 hash_salt: None,
163 }
164 }
165
166 /// Maximum privacy policy. Generalizes all ports, hashes all MACs,
167 /// and strips payloads.
168 #[must_use]
169 pub fn maximum_privacy() -> Self {
170 Self {
171 ip_mode: IpAnonymizationMode::CryptoPan,
172 mac_mode: MacAnonymizationMode::SaltedHash,
173 port_mode: PortAnonymizationMode::Categorize,
174 timestamp_mode: TimestampAnonymizationMode::EpochShiftWithJitter { jitter_ms: 5 },
175 tcp_seq_mode: TcpSeqAnonymizationMode::RandomOffset,
176 payload_mode: PayloadAnonymizationMode::TruncateAll,
177 crypto_pan_key: None,
178 hash_salt: None,
179 }
180 }
181}
182
183#[cfg(test)]
184mod tests {
185 use super::*;
186
187 #[test]
188 fn test_default_is_noop() {
189 let p = AnonymizationPolicy::default();
190 assert_eq!(p.ip_mode, IpAnonymizationMode::None);
191 assert_eq!(p.port_mode, PortAnonymizationMode::None);
192 assert_eq!(p.payload_mode, PayloadAnonymizationMode::None);
193 }
194
195 #[test]
196 fn test_ml_optimized_preset() {
197 let p = AnonymizationPolicy::ml_optimized();
198 assert_eq!(p.ip_mode, IpAnonymizationMode::CryptoPan);
199 assert_eq!(p.port_mode, PortAnonymizationMode::PreserveWellKnown);
200 assert_eq!(p.payload_mode, PayloadAnonymizationMode::TruncateAll);
201 }
202
203 #[test]
204 fn test_maximum_privacy_preset() {
205 let p = AnonymizationPolicy::maximum_privacy();
206 assert_eq!(p.port_mode, PortAnonymizationMode::Categorize);
207 assert!(matches!(
208 p.timestamp_mode,
209 TimestampAnonymizationMode::EpochShiftWithJitter { jitter_ms: 5 }
210 ));
211 }
212}