Skip to main content

stackforge_core/anonymize/
policy.rs

1//! Anonymization policy configuration.
2//!
3//! Defines the strategies applied to each protocol field during flow
4//! anonymization. Users construct an [`AnonymizationPolicy`] describing
5//! the desired privacy-utility trade-off, and pass it to the
6//! [`AnonymizationEngine`](super::engine::AnonymizationEngine).
7
8/// How to anonymize IPv4/IPv6 addresses.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum IpAnonymizationMode {
11    /// No anonymization -- IPs pass through unchanged.
12    None,
13    /// Prefix-preserving anonymization via Crypto-PAn (AES-128).
14    ///
15    /// Two addresses sharing a *k*-bit prefix will still share a *k*-bit
16    /// prefix after anonymization, preserving subnet topology for ML models.
17    CryptoPan,
18}
19
20/// How to anonymize MAC addresses.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum MacAnonymizationMode {
23    /// No anonymization.
24    None,
25    /// Full salted hash (all 6 bytes). Destroys OUI information.
26    SaltedHash,
27    /// Preserve the OUI (first 3 bytes) and hash only the NIC-specific
28    /// portion. Allows ML models to identify device manufacturers.
29    SaltedHashPreserveOui,
30}
31
32/// How to anonymize transport ports.
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34pub enum PortAnonymizationMode {
35    /// No anonymization.
36    None,
37    /// Preserve well-known destination ports (0-1023) for service
38    /// identification; generalize source/ephemeral ports to category
39    /// sentinels (0 = well-known, 1024 = registered, 49152 = ephemeral).
40    PreserveWellKnown,
41    /// Generalize all ports to category sentinels.
42    Categorize,
43}
44
45/// How to anonymize timestamps.
46#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub enum TimestampAnonymizationMode {
48    /// No anonymization.
49    None,
50    /// Shift all timestamps by a random epoch offset (preserves perfect
51    /// ordering and all relative durations). The offset is generated once
52    /// per engine session.
53    EpochShift,
54    /// Epoch shift plus bounded per-timestamp jitter. The `jitter_ms`
55    /// value is the maximum uniform noise added to each timestamp.
56    ///
57    /// **Warning**: jitter may invert ordering of very close timestamps.
58    /// Use small values (1-10 ms) for safety.
59    EpochShiftWithJitter {
60        /// Maximum jitter in milliseconds.
61        jitter_ms: u32,
62    },
63}
64
65/// How to handle TCP sequence/acknowledgment numbers.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum TcpSeqAnonymizationMode {
68    /// No anonymization.
69    None,
70    /// Add a random per-flow offset to all sequence and acknowledgment
71    /// numbers. Preserves relative differences (bytes in flight,
72    /// retransmission detection) while hiding absolute values.
73    RandomOffset,
74}
75
76/// How to handle reassembled payload data.
77#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78pub enum PayloadAnonymizationMode {
79    /// No anonymization -- full payload retained.
80    None,
81    /// Remove all reassembled payload data.
82    TruncateAll,
83    /// Keep only the first *n* bytes of each direction's reassembled stream.
84    TruncateTo(usize),
85}
86
87/// Master anonymization policy.
88///
89/// Controls which cryptographic primitives and strategies are applied to
90/// each protocol field category during flow export.
91///
92/// # Example
93///
94/// ```rust
95/// use stackforge_core::anonymize::AnonymizationPolicy;
96///
97/// // ML-optimized defaults: prefix-preserving IPs, hashed MACs,
98/// // well-known ports preserved, epoch-shifted timestamps, payload stripped.
99/// let policy = AnonymizationPolicy::ml_optimized();
100/// ```
101#[derive(Debug, Clone)]
102pub struct AnonymizationPolicy {
103    /// IP address anonymization strategy.
104    pub ip_mode: IpAnonymizationMode,
105    /// MAC address anonymization strategy (applied if MAC data is
106    /// present in flow metadata -- currently informational for future
107    /// packet-level anonymization).
108    pub mac_mode: MacAnonymizationMode,
109    /// Transport port anonymization strategy.
110    pub port_mode: PortAnonymizationMode,
111    /// Timestamp anonymization strategy.
112    pub timestamp_mode: TimestampAnonymizationMode,
113    /// TCP sequence number anonymization strategy.
114    pub tcp_seq_mode: TcpSeqAnonymizationMode,
115    /// Reassembled payload handling.
116    pub payload_mode: PayloadAnonymizationMode,
117    /// 32-byte key for Crypto-PAn. First 16 bytes = AES-128 key,
118    /// last 16 bytes = padding material.
119    ///
120    /// If `None` and `ip_mode` is `CryptoPan`, a random key is generated.
121    pub crypto_pan_key: Option<[u8; 32]>,
122    /// 32-byte salt for consistent hashing (MAC addresses, connection IDs).
123    ///
124    /// If `None`, a random salt is generated per engine session.
125    pub hash_salt: Option<[u8; 32]>,
126}
127
128impl Default for AnonymizationPolicy {
129    /// Default policy: no anonymization.
130    fn default() -> Self {
131        Self {
132            ip_mode: IpAnonymizationMode::None,
133            mac_mode: MacAnonymizationMode::None,
134            port_mode: PortAnonymizationMode::None,
135            timestamp_mode: TimestampAnonymizationMode::None,
136            tcp_seq_mode: TcpSeqAnonymizationMode::None,
137            payload_mode: PayloadAnonymizationMode::None,
138            crypto_pan_key: None,
139            hash_salt: None,
140        }
141    }
142}
143
144impl AnonymizationPolicy {
145    /// Policy optimized for machine learning on network flows.
146    ///
147    /// - IPs: Crypto-PAn (preserves subnet topology)
148    /// - Ports: well-known destination ports preserved
149    /// - Timestamps: epoch shift (perfect ordering)
150    /// - TCP seq: random per-flow offset
151    /// - Payloads: fully truncated
152    #[must_use]
153    pub fn ml_optimized() -> Self {
154        Self {
155            ip_mode: IpAnonymizationMode::CryptoPan,
156            mac_mode: MacAnonymizationMode::SaltedHash,
157            port_mode: PortAnonymizationMode::PreserveWellKnown,
158            timestamp_mode: TimestampAnonymizationMode::EpochShift,
159            tcp_seq_mode: TcpSeqAnonymizationMode::RandomOffset,
160            payload_mode: PayloadAnonymizationMode::TruncateAll,
161            crypto_pan_key: None,
162            hash_salt: None,
163        }
164    }
165
166    /// Maximum privacy policy. Generalizes all ports, hashes all MACs,
167    /// and strips payloads.
168    #[must_use]
169    pub fn maximum_privacy() -> Self {
170        Self {
171            ip_mode: IpAnonymizationMode::CryptoPan,
172            mac_mode: MacAnonymizationMode::SaltedHash,
173            port_mode: PortAnonymizationMode::Categorize,
174            timestamp_mode: TimestampAnonymizationMode::EpochShiftWithJitter { jitter_ms: 5 },
175            tcp_seq_mode: TcpSeqAnonymizationMode::RandomOffset,
176            payload_mode: PayloadAnonymizationMode::TruncateAll,
177            crypto_pan_key: None,
178            hash_salt: None,
179        }
180    }
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186
187    #[test]
188    fn test_default_is_noop() {
189        let p = AnonymizationPolicy::default();
190        assert_eq!(p.ip_mode, IpAnonymizationMode::None);
191        assert_eq!(p.port_mode, PortAnonymizationMode::None);
192        assert_eq!(p.payload_mode, PayloadAnonymizationMode::None);
193    }
194
195    #[test]
196    fn test_ml_optimized_preset() {
197        let p = AnonymizationPolicy::ml_optimized();
198        assert_eq!(p.ip_mode, IpAnonymizationMode::CryptoPan);
199        assert_eq!(p.port_mode, PortAnonymizationMode::PreserveWellKnown);
200        assert_eq!(p.payload_mode, PayloadAnonymizationMode::TruncateAll);
201    }
202
203    #[test]
204    fn test_maximum_privacy_preset() {
205        let p = AnonymizationPolicy::maximum_privacy();
206        assert_eq!(p.port_mode, PortAnonymizationMode::Categorize);
207        assert!(matches!(
208            p.timestamp_mode,
209            TimestampAnonymizationMode::EpochShiftWithJitter { jitter_ms: 5 }
210        ));
211    }
212}