Skip to main content

sanitize_engine/
strategy.rs

1//! Pluggable replacement strategies.
2//!
3//! This module provides the [`Strategy`] trait and five built-in
4//! implementations that can be composed with the mapping engine via
5//! [`StrategyGenerator`], an adapter that implements
6//! [`ReplacementGenerator`].
7//!
8//! # Design Note
9//!
10//! This is the **extensibility layer** for library consumers who need custom
11//! replacement logic. The CLI binary uses [`crate::generator::HmacGenerator`]
12//! and [`crate::generator::RandomGenerator`] directly with category-aware
13//! formatters for performance and simplicity. Both paths share the same
14//! [`ReplacementGenerator`] interface. See `ARCHITECTURE.md` section 2 for
15//! details on the dual-path design.
16//!
17//! # Architecture
18//!
19//! ```text
20//! ┌──────────────────────┐
21//! │    MappingStore       │  ← owns Arc<dyn ReplacementGenerator>
22//! └──────────┬───────────┘
23//!            │ calls generate(category, original)
24//!            ▼
25//! ┌──────────────────────┐
26//! │  StrategyGenerator   │  ← adapter: produces entropy, delegates to Strategy
27//! │  (ReplacementGenerator)│
28//! └──────────┬───────────┘
29//!            │ calls replace(original, &entropy)
30//!            ▼
31//! ┌──────────────────────┐
32//! │   dyn Strategy       │  ← pure function of (original, entropy) → String
33//! │                      │
34//! │  RandomString        │
35//! │  RandomUuid          │
36//! │  FakeIp              │
37//! │  PreserveLength      │
38//! │  HmacHash            │
39//! └──────────────────────┘
40//! ```
41//!
42//! # Deterministic Mode
43//!
44//! Strategies are pure functions of `(original, entropy)`. Determinism is
45//! controlled by the **entropy source** inside [`StrategyGenerator`]:
46//!
47//! - **Deterministic** (`EntropyMode::Deterministic`): entropy is derived
48//!   via HMAC-SHA256 keyed with a fixed seed — same seed + same input →
49//!   same replacement across runs.
50//! - **Random** (`EntropyMode::Random`): entropy comes from OS CSPRNG —
51//!   each call produces a fresh value (but the `MappingStore` still caches
52//!   the first result per unique input for per-run consistency).
53//!
54//! The [`HmacHash`] strategy is an exception: it carries its own HMAC key
55//! and is deterministic by construction regardless of the entropy mode.
56//!
57//! # Extensibility
58//!
59//! To add a new replacement strategy:
60//!
61//! 1. Create a struct implementing [`Strategy`].
62//! 2. Return a unique name from [`Strategy::name`].
63//! 3. Implement [`Strategy::replace`] as a pure function of `(original, entropy)`.
64//! 4. Wrap it in a [`StrategyGenerator`] to use with `MappingStore`.
65//!
66//! Third-party crates can implement `Strategy` without modifying this crate,
67//! since the trait is public and object-safe.
68
69use crate::category::Category;
70use crate::generator::ReplacementGenerator;
71use hmac::{Hmac, Mac};
72use rand::Rng;
73use sha2::Sha256;
74use zeroize::Zeroize;
75
76// ---------------------------------------------------------------------------
77// Strategy trait
78// ---------------------------------------------------------------------------
79
80/// A pluggable replacement strategy.
81///
82/// Strategies transform an original sensitive value into a sanitized
83/// replacement using 32 bytes of caller-provided entropy. They MUST be
84/// **pure functions** of their inputs: the same `(original, entropy)` pair
85/// always produces the same output.
86///
87/// Strategies are agnostic to how entropy is produced (HMAC-deterministic
88/// or CSPRNG-random). That concern is handled by [`StrategyGenerator`].
89pub trait Strategy: Send + Sync {
90    /// Human-readable, unique name for this strategy (e.g. `"random_string"`).
91    fn name(&self) -> &'static str;
92
93    /// Produce a sanitized replacement for `original` using `entropy`.
94    ///
95    /// # Contract
96    ///
97    /// - Must be deterministic: same `(original, entropy)` → same output.
98    /// - Must not perform I/O or access external mutable state.
99    /// - Returned value should be clearly synthetic / non-sensitive.
100    fn replace(&self, original: &str, entropy: &[u8; 32]) -> String;
101}
102
103// ---------------------------------------------------------------------------
104// Entropy mode (used by StrategyGenerator)
105// ---------------------------------------------------------------------------
106
107/// How entropy is produced for strategies.
108#[derive(Debug)]
109pub enum EntropyMode {
110    /// Deterministic: `entropy = HMAC-SHA256(key, category || '\0' || original)`.
111    Deterministic {
112        /// 32-byte HMAC key (seed).
113        key: [u8; 32],
114    },
115    /// Random: entropy is drawn from OS CSPRNG on every call.
116    Random,
117}
118
119impl Drop for EntropyMode {
120    fn drop(&mut self) {
121        if let EntropyMode::Deterministic { ref mut key } = self {
122            key.zeroize();
123        }
124    }
125}
126
127// ---------------------------------------------------------------------------
128// StrategyGenerator — adapter from Strategy → ReplacementGenerator
129// ---------------------------------------------------------------------------
130
131/// Adapter that bridges a [`Strategy`] into the [`ReplacementGenerator`]
132/// interface consumed by [`MappingStore`](crate::store::MappingStore).
133///
134/// It produces entropy according to the configured [`EntropyMode`] and
135/// delegates replacement formatting to the wrapped strategy.
136pub struct StrategyGenerator {
137    strategy: Box<dyn Strategy>,
138    mode: EntropyMode,
139}
140
141impl StrategyGenerator {
142    /// Create a new adapter.
143    ///
144    /// # Arguments
145    ///
146    /// - `strategy` — the replacement strategy to use.
147    /// - `mode` — how to produce entropy (deterministic seed or random).
148    #[must_use]
149    pub fn new(strategy: Box<dyn Strategy>, mode: EntropyMode) -> Self {
150        Self { strategy, mode }
151    }
152
153    /// Produce 32 bytes of entropy for `(category, original)`.
154    fn entropy(&self, category: &Category, original: &str) -> [u8; 32] {
155        match &self.mode {
156            EntropyMode::Deterministic { key } => {
157                type HmacSha256 = Hmac<Sha256>;
158                let mut mac = HmacSha256::new_from_slice(key).expect("HMAC accepts any key length");
159                let tag = category.domain_tag_hmac();
160                mac.update(tag.as_bytes());
161                mac.update(b"\x00");
162                mac.update(original.as_bytes());
163                let result = mac.finalize();
164                let mut out = [0u8; 32];
165                out.copy_from_slice(&result.into_bytes());
166                out
167            }
168            EntropyMode::Random => {
169                let mut buf = [0u8; 32];
170                rand::rng().fill(&mut buf);
171                buf
172            }
173        }
174    }
175
176    /// Access the underlying strategy.
177    #[must_use]
178    pub fn strategy(&self) -> &dyn Strategy {
179        &*self.strategy
180    }
181}
182
183impl ReplacementGenerator for StrategyGenerator {
184    fn generate(&self, category: &Category, original: &str) -> String {
185        let entropy = self.entropy(category, original);
186        self.strategy.replace(original, &entropy)
187    }
188}
189
190// ===========================================================================
191// Built-in strategies
192// ===========================================================================
193
194/// Seed a 64-bit xorshift PRNG from a 32-byte entropy buffer.
195///
196/// Folds the four 8-byte little-endian chunks via wrapping addition so that
197/// all 256 bits of entropy influence the initial state. Guards against the
198/// degenerate all-zero state that would cause xorshift64 to produce only zeros.
199#[inline]
200fn xorshift64_seed(entropy: &[u8; 32]) -> u64 {
201    let mut state = 0u64;
202    for chunk in entropy.chunks_exact(8) {
203        let arr: [u8; 8] = chunk
204            .try_into()
205            .expect("chunks_exact(8) yields 8-byte slices");
206        state = state.wrapping_add(u64::from_le_bytes(arr));
207    }
208    if state == 0 {
209        state = 0xDEAD_BEEF_CAFE_BABE;
210    }
211    state
212}
213
214// ---------------------------------------------------------------------------
215// 1. RandomString
216// ---------------------------------------------------------------------------
217
218/// Generates an alphanumeric string from entropy bytes.
219///
220/// The output length defaults to 16 characters but can be configured.
221/// Characters are drawn from `[a-zA-Z0-9]`.
222pub struct RandomString {
223    /// Desired output length (capped at 64).
224    len: usize,
225}
226
227impl RandomString {
228    /// Create with default length (16).
229    #[must_use]
230    pub fn new() -> Self {
231        Self { len: 16 }
232    }
233
234    /// Create with a specific output length (clamped to 1..=64).
235    #[must_use]
236    pub fn with_length(len: usize) -> Self {
237        Self {
238            len: len.clamp(1, 64),
239        }
240    }
241}
242
243impl Default for RandomString {
244    fn default() -> Self {
245        Self::new()
246    }
247}
248
249impl Strategy for RandomString {
250    fn name(&self) -> &'static str {
251        "random_string"
252    }
253
254    fn replace(&self, _original: &str, entropy: &[u8; 32]) -> String {
255        const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyz\
256                                  ABCDEFGHIJKLMNOPQRSTUVWXYZ\
257                                  0123456789";
258        let mut chars = String::with_capacity(self.len);
259        let mut state = xorshift64_seed(entropy);
260
261        for _ in 0..self.len {
262            // xorshift64
263            state ^= state << 13;
264            state ^= state >> 7;
265            state ^= state << 17;
266            #[allow(clippy::cast_possible_truncation)]
267            // truncation is intentional for index mapping
268            let idx = (state as usize) % CHARSET.len();
269            chars.push(CHARSET[idx] as char);
270        }
271        chars
272    }
273}
274
275// ---------------------------------------------------------------------------
276// 2. RandomUuid
277// ---------------------------------------------------------------------------
278
279/// Generates a UUID v4–formatted string from entropy bytes.
280///
281/// The output looks like `xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx` where
282/// `x` is a hex digit derived from entropy and `y ∈ {8,9,a,b}` per RFC 4122.
283/// When backed by deterministic entropy, the UUID is stable.
284pub struct RandomUuid;
285
286impl RandomUuid {
287    #[must_use]
288    pub fn new() -> Self {
289        Self
290    }
291}
292
293impl Default for RandomUuid {
294    fn default() -> Self {
295        Self::new()
296    }
297}
298
299impl Strategy for RandomUuid {
300    fn name(&self) -> &'static str {
301        "random_uuid"
302    }
303
304    fn replace(&self, _original: &str, entropy: &[u8; 32]) -> String {
305        // Take the first 16 bytes to form a UUID.
306        let mut bytes = [0u8; 16];
307        bytes.copy_from_slice(&entropy[..16]);
308
309        // Set version = 4 (bits 4-7 of byte 6).
310        bytes[6] = (bytes[6] & 0x0F) | 0x40;
311        // Set variant = RFC 4122 (bits 6-7 of byte 8).
312        bytes[8] = (bytes[8] & 0x3F) | 0x80;
313
314        format!(
315            "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
316            bytes[0], bytes[1], bytes[2], bytes[3],
317            bytes[4], bytes[5],
318            bytes[6], bytes[7],
319            bytes[8], bytes[9],
320            bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15],
321        )
322    }
323}
324
325// ---------------------------------------------------------------------------
326// 3. FakeIp
327// ---------------------------------------------------------------------------
328
329/// Generates a length-preserving fake IP address.
330///
331/// Dots (`.`) are preserved in their original positions; every other
332/// character is replaced with a deterministic decimal digit derived from
333/// `entropy`. The output is always the same byte length as `original`,
334/// preserving column widths and log formatting.
335pub struct FakeIp;
336
337impl FakeIp {
338    #[must_use]
339    pub fn new() -> Self {
340        Self
341    }
342}
343
344impl Default for FakeIp {
345    fn default() -> Self {
346        Self::new()
347    }
348}
349
350impl Strategy for FakeIp {
351    fn name(&self) -> &'static str {
352        "fake_ip"
353    }
354
355    fn replace(&self, original: &str, entropy: &[u8; 32]) -> String {
356        // Preserve dots; replace every other character with a deterministic
357        // digit so the output has the same byte length as the original.
358        let mut buf = String::with_capacity(original.len());
359        let mut hi = 0usize;
360        for ch in original.chars() {
361            if ch == '.' {
362                buf.push('.');
363            } else {
364                buf.push((b'0' + entropy[hi % 32] % 10) as char);
365                hi += 1;
366            }
367        }
368        buf
369    }
370}
371
372// ---------------------------------------------------------------------------
373// 4. PreserveLength
374// ---------------------------------------------------------------------------
375
376/// Generates a replacement with the **same byte length** as the original.
377///
378/// Useful when column widths, fixed-length fields, or alignment must be
379/// maintained. Uses lowercase hex characters derived from entropy.
380pub struct PreserveLength;
381
382impl PreserveLength {
383    #[must_use]
384    pub fn new() -> Self {
385        Self
386    }
387}
388
389impl Default for PreserveLength {
390    fn default() -> Self {
391        Self::new()
392    }
393}
394
395impl Strategy for PreserveLength {
396    fn name(&self) -> &'static str {
397        "preserve_length"
398    }
399
400    fn replace(&self, original: &str, entropy: &[u8; 32]) -> String {
401        const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyz0123456789";
402
403        let target_len = original.len();
404        if target_len == 0 {
405            return String::new();
406        }
407
408        let mut state = xorshift64_seed(entropy);
409        let mut result = String::with_capacity(target_len);
410        for _ in 0..target_len {
411            state ^= state << 13;
412            state ^= state >> 7;
413            state ^= state << 17;
414            #[allow(clippy::cast_possible_truncation)]
415            // truncation is intentional for index mapping
416            let idx = (state as usize) % CHARSET.len();
417            result.push(CHARSET[idx] as char);
418        }
419        result
420    }
421}
422
423// ---------------------------------------------------------------------------
424// 5. HmacHash
425// ---------------------------------------------------------------------------
426
427/// HMAC-SHA256 hash strategy — deterministic by construction.
428///
429/// Unlike the other strategies, `HmacHash` carries its own 32-byte key and
430/// computes `HMAC-SHA256(key, original)` directly. The caller-provided
431/// entropy is **ignored**. This makes the output deterministic regardless
432/// of the [`EntropyMode`] used by [`StrategyGenerator`].
433///
434/// The output is a lowercase hex string, optionally truncated to
435/// `output_len` characters (default: 32).
436pub struct HmacHash {
437    key: [u8; 32],
438    /// Number of hex characters to emit (max 64).
439    output_len: usize,
440}
441
442impl HmacHash {
443    /// Create with both a key and a default output length (32 hex chars).
444    #[must_use]
445    pub fn new(key: [u8; 32]) -> Self {
446        Self {
447            key,
448            output_len: 32,
449        }
450    }
451
452    /// Create with a custom output length (clamped to 1..=64).
453    #[must_use]
454    pub fn with_output_len(key: [u8; 32], output_len: usize) -> Self {
455        Self {
456            key,
457            output_len: output_len.clamp(1, 64),
458        }
459    }
460}
461
462impl Strategy for HmacHash {
463    fn name(&self) -> &'static str {
464        "hmac_hash"
465    }
466
467    fn replace(&self, original: &str, _entropy: &[u8; 32]) -> String {
468        use std::fmt::Write;
469
470        type HmacSha256 = Hmac<Sha256>;
471        let mut mac = HmacSha256::new_from_slice(&self.key).expect("HMAC accepts any key length");
472        mac.update(original.as_bytes());
473        let result = mac.finalize();
474        let hash_bytes: [u8; 32] = {
475            let mut buf = [0u8; 32];
476            buf.copy_from_slice(&result.into_bytes());
477            buf
478        };
479        let mut hex = String::with_capacity(64);
480        for b in &hash_bytes {
481            let _ = write!(hex, "{:02x}", b);
482        }
483        hex[..self.output_len].to_string()
484    }
485}
486
487// ===========================================================================
488// Tests
489// ===========================================================================
490
491#[cfg(test)]
492mod tests {
493    use super::*;
494    use crate::category::Category;
495    use std::sync::Arc;
496
497    /// Helper: fixed deterministic entropy for testing.
498    fn test_entropy() -> [u8; 32] {
499        let mut e = [0u8; 32];
500        for (i, b) in e.iter_mut().enumerate() {
501            #[allow(clippy::cast_possible_truncation)] // i is always < 32, fits in u8
502            {
503                *b = (i as u8).wrapping_mul(37).wrapping_add(7);
504            }
505        }
506        e
507    }
508
509    // ---- Strategy trait: purity / determinism ----
510
511    #[test]
512    fn strategies_are_deterministic() {
513        let entropy = test_entropy();
514        let strategies: Vec<Box<dyn Strategy>> = vec![
515            Box::new(RandomString::new()),
516            Box::new(RandomUuid::new()),
517            Box::new(FakeIp::new()),
518            Box::new(PreserveLength::new()),
519            Box::new(HmacHash::new([42u8; 32])),
520        ];
521        for s in &strategies {
522            let a = s.replace("hello world", &entropy);
523            let b = s.replace("hello world", &entropy);
524            assert_eq!(a, b, "strategy '{}' must be deterministic", s.name());
525        }
526    }
527
528    #[test]
529    fn different_entropy_different_output() {
530        let e1 = [1u8; 32];
531        let e2 = [2u8; 32];
532        let strategies: Vec<Box<dyn Strategy>> = vec![
533            Box::new(RandomString::new()),
534            Box::new(RandomUuid::new()),
535            Box::new(FakeIp::new()),
536            Box::new(PreserveLength::new()),
537        ];
538        for s in &strategies {
539            let a = s.replace("test", &e1);
540            let b = s.replace("test", &e2);
541            assert_ne!(
542                a,
543                b,
544                "strategy '{}' should differ with different entropy",
545                s.name()
546            );
547        }
548    }
549
550    // ---- RandomString ----
551
552    #[test]
553    fn random_string_default_length() {
554        let s = RandomString::new();
555        let out = s.replace("anything", &test_entropy());
556        assert_eq!(out.len(), 16);
557        assert!(
558            out.chars().all(|c| c.is_ascii_alphanumeric()),
559            "output must be alphanumeric: {}",
560            out,
561        );
562    }
563
564    #[test]
565    fn random_string_custom_length() {
566        let s = RandomString::with_length(8);
567        let out = s.replace("anything", &test_entropy());
568        assert_eq!(out.len(), 8);
569    }
570
571    #[test]
572    fn random_string_clamped_length() {
573        let s = RandomString::with_length(999);
574        assert_eq!(s.len, 64);
575        let s = RandomString::with_length(0);
576        assert_eq!(s.len, 1);
577    }
578
579    // ---- RandomUuid ----
580
581    #[test]
582    fn random_uuid_format() {
583        let s = RandomUuid::new();
584        let out = s.replace("anything", &test_entropy());
585        // 8-4-4-4-12 = 36 chars
586        assert_eq!(out.len(), 36, "UUID must be 36 chars: {}", out);
587        let parts: Vec<&str> = out.split('-').collect();
588        assert_eq!(parts.len(), 5);
589        assert_eq!(parts[0].len(), 8);
590        assert_eq!(parts[1].len(), 4);
591        assert_eq!(parts[2].len(), 4);
592        assert_eq!(parts[3].len(), 4);
593        assert_eq!(parts[4].len(), 12);
594        // Version nibble = 4
595        assert_eq!(&parts[2][0..1], "4", "version must be 4");
596        // Variant nibble ∈ {8,9,a,b}
597        let variant = &parts[3][0..1];
598        assert!(
599            ["8", "9", "a", "b"].contains(&variant),
600            "variant nibble must be 8/9/a/b, got {}",
601            variant,
602        );
603    }
604
605    // ---- FakeIp ----
606
607    #[test]
608    fn fake_ip_format() {
609        let s = FakeIp::new();
610        let input = "192.168.1.1";
611        let out = s.replace(input, &test_entropy());
612        // Length preserved.
613        assert_eq!(
614            out.len(),
615            input.len(),
616            "FakeIp must preserve length: {}",
617            out
618        );
619        // Dot positions preserved.
620        let in_dots: Vec<usize> = input
621            .char_indices()
622            .filter(|&(_, c)| c == '.')
623            .map(|(i, _)| i)
624            .collect();
625        let out_dots: Vec<usize> = out
626            .char_indices()
627            .filter(|&(_, c)| c == '.')
628            .map(|(i, _)| i)
629            .collect();
630        assert_eq!(out_dots, in_dots, "FakeIp must preserve dot positions");
631        // Non-dot characters must be ASCII digits.
632        assert!(
633            out.chars().all(|c| c == '.' || c.is_ascii_digit()),
634            "FakeIp output must contain only digits and dots: {}",
635            out
636        );
637        // Must differ from input.
638        assert_ne!(out, input, "FakeIp must change the IP");
639    }
640
641    // ---- PreserveLength ----
642
643    #[test]
644    fn preserve_length_matches() {
645        let s = PreserveLength::new();
646        for input in &["a", "hello", "this is a fairly long string indeed", ""] {
647            let out = s.replace(input, &test_entropy());
648            assert_eq!(
649                out.len(),
650                input.len(),
651                "length mismatch for input '{}'",
652                input,
653            );
654        }
655    }
656
657    #[test]
658    fn preserve_length_characters() {
659        let s = PreserveLength::new();
660        let out = s.replace("hello!", &test_entropy());
661        assert!(
662            out.chars().all(|c| c.is_ascii_alphanumeric()),
663            "output must be alphanumeric: {}",
664            out,
665        );
666    }
667
668    // ---- HmacHash ----
669
670    #[test]
671    fn hmac_hash_deterministic_with_key() {
672        let s = HmacHash::new([42u8; 32]);
673        let a = s.replace("secret", &[0u8; 32]);
674        let b = s.replace("secret", &[0xFF; 32]);
675        // Entropy is ignored — result depends only on key + original.
676        assert_eq!(a, b, "HmacHash must ignore entropy");
677    }
678
679    #[test]
680    fn hmac_hash_default_length() {
681        let s = HmacHash::new([0u8; 32]);
682        let out = s.replace("test", &[0u8; 32]);
683        assert_eq!(out.len(), 32, "default output is 32 hex chars");
684        assert!(
685            out.chars().all(|c| c.is_ascii_hexdigit()),
686            "output must be hex: {}",
687            out,
688        );
689    }
690
691    #[test]
692    fn hmac_hash_custom_length() {
693        let s = HmacHash::with_output_len([0u8; 32], 12);
694        let out = s.replace("test", &[0u8; 32]);
695        assert_eq!(out.len(), 12);
696    }
697
698    #[test]
699    fn hmac_hash_different_keys() {
700        let s1 = HmacHash::new([1u8; 32]);
701        let s2 = HmacHash::new([2u8; 32]);
702        let a = s1.replace("test", &[0u8; 32]);
703        let b = s2.replace("test", &[0u8; 32]);
704        assert_ne!(a, b, "different keys must produce different output");
705    }
706
707    #[test]
708    fn hmac_hash_different_inputs() {
709        let s = HmacHash::new([42u8; 32]);
710        let a = s.replace("alice", &[0u8; 32]);
711        let b = s.replace("bob", &[0u8; 32]);
712        assert_ne!(a, b);
713    }
714
715    // ---- StrategyGenerator integration ----
716
717    #[test]
718    fn strategy_generator_deterministic() {
719        let strat = Box::new(RandomString::new());
720        let gen = StrategyGenerator::new(strat, EntropyMode::Deterministic { key: [42u8; 32] });
721        let a = gen.generate(&Category::Email, "alice@corp.com");
722        let b = gen.generate(&Category::Email, "alice@corp.com");
723        assert_eq!(a, b, "deterministic mode must be repeatable");
724    }
725
726    #[test]
727    fn strategy_generator_different_categories() {
728        let strat = Box::new(RandomString::new());
729        let gen = StrategyGenerator::new(strat, EntropyMode::Deterministic { key: [42u8; 32] });
730        let a = gen.generate(&Category::Email, "test");
731        let b = gen.generate(&Category::Name, "test");
732        assert_ne!(a, b, "different categories must produce different entropy");
733    }
734
735    #[test]
736    fn strategy_generator_with_store() {
737        let strat = Box::new(RandomUuid::new());
738        let gen = Arc::new(StrategyGenerator::new(
739            strat,
740            EntropyMode::Deterministic { key: [99u8; 32] },
741        ));
742        let store = crate::store::MappingStore::new(gen, None);
743
744        let s1 = store
745            .get_or_insert(&Category::Email, "alice@corp.com")
746            .unwrap();
747        let s2 = store
748            .get_or_insert(&Category::Email, "alice@corp.com")
749            .unwrap();
750        assert_eq!(s1, s2, "store must cache strategy output");
751        assert_eq!(s1.len(), 36, "output must be UUID-formatted");
752    }
753
754    #[test]
755    fn strategy_generator_random_cached_in_store() {
756        let strat = Box::new(FakeIp::new());
757        let gen = Arc::new(StrategyGenerator::new(strat, EntropyMode::Random));
758        let store = crate::store::MappingStore::new(gen, None);
759
760        let s1 = store.get_or_insert(&Category::IpV4, "192.168.1.1").unwrap();
761        let s2 = store.get_or_insert(&Category::IpV4, "192.168.1.1").unwrap();
762        // Random entropy, but store caches first result.
763        assert_eq!(s1, s2);
764        assert_eq!(
765            s1.len(),
766            "192.168.1.1".len(),
767            "FakeIp must preserve input length"
768        );
769    }
770
771    #[test]
772    fn all_strategies_implement_send_sync() {
773        fn assert_send_sync<T: Send + Sync>() {}
774        assert_send_sync::<RandomString>();
775        assert_send_sync::<RandomUuid>();
776        assert_send_sync::<FakeIp>();
777        assert_send_sync::<PreserveLength>();
778        assert_send_sync::<HmacHash>();
779        assert_send_sync::<StrategyGenerator>();
780    }
781
782    #[test]
783    fn strategy_names_unique() {
784        let strategies: Vec<Box<dyn Strategy>> = vec![
785            Box::new(RandomString::new()),
786            Box::new(RandomUuid::new()),
787            Box::new(FakeIp::new()),
788            Box::new(PreserveLength::new()),
789            Box::new(HmacHash::new([0u8; 32])),
790        ];
791        let mut names: Vec<&str> = strategies.iter().map(|s| s.name()).collect();
792        let len_before = names.len();
793        names.sort_unstable();
794        names.dedup();
795        assert_eq!(names.len(), len_before, "strategy names must be unique");
796    }
797
798    // ---- Concurrent use via StrategyGenerator + MappingStore ----
799
800    #[test]
801    fn concurrent_strategy_generator() {
802        use std::thread;
803
804        let strat = Box::new(PreserveLength::new());
805        let gen = Arc::new(StrategyGenerator::new(
806            strat,
807            EntropyMode::Deterministic { key: [7u8; 32] },
808        ));
809        let store = Arc::new(crate::store::MappingStore::new(gen, None));
810
811        let mut handles = vec![];
812        for t in 0..4 {
813            let store = Arc::clone(&store);
814            handles.push(thread::spawn(move || {
815                for i in 0..500 {
816                    let val = format!("thread{}-val{}", t, i);
817                    let result = store.get_or_insert(&Category::Name, &val).unwrap();
818                    assert_eq!(
819                        result.len(),
820                        val.len(),
821                        "PreserveLength must match input length",
822                    );
823                }
824            }));
825        }
826        for h in handles {
827            h.join().unwrap();
828        }
829        assert_eq!(store.len(), 2000);
830    }
831
832    // ---- Property tests: structural invariants on generated values ----
833
834    mod property {
835        use super::*;
836        use crate::store::MappingStore;
837        use proptest::prelude::*;
838
839        fn hmac_store() -> MappingStore {
840            let gen = Arc::new(crate::generator::HmacGenerator::new([77u8; 32]));
841            MappingStore::new(gen, None)
842        }
843
844        proptest! {
845            #[test]
846            fn email_output_is_email_shaped(
847                local in "[a-z]{3,8}",
848                domain in "[a-z]{3,8}",
849                tld in "[a-z]{2,4}",
850            ) {
851                let input = format!("{local}@{domain}.{tld}");
852                let store = hmac_store();
853                let out = store.get_or_insert(&Category::Email, &input).unwrap();
854                prop_assert_eq!(out.chars().filter(|&c| c == '@').count(), 1);
855                let after = out.split('@').nth(1).unwrap_or("");
856                prop_assert!(after.contains('.'), "no dot in domain part: {out}");
857                prop_assert_eq!(out.len(), input.len());
858            }
859
860            #[test]
861            fn ipv4_output_preserves_dot_structure(
862                a in 0u8..=255u8,
863                b in 0u8..=255u8,
864                c in 0u8..=255u8,
865                d in 0u8..=255u8,
866            ) {
867                let input = format!("{a}.{b}.{c}.{d}");
868                let store = hmac_store();
869                let out = store.get_or_insert(&Category::IpV4, &input).unwrap();
870                // The strategy preserves dot positions and digit counts but does
871                // not clamp octet values to 0-255 (e.g. 114 → 987 is valid output).
872                // Invariant: 4 dot-separated groups, each containing only digits,
873                // each with the same digit count as the original octet.
874                let in_parts: Vec<&str> = input.split('.').collect();
875                let out_parts: Vec<&str> = out.split('.').collect();
876                prop_assert_eq!(out_parts.len(), 4);
877                for (inp, outp) in in_parts.iter().zip(out_parts.iter()) {
878                    prop_assert_eq!(inp.len(), outp.len());
879                    prop_assert!(outp.chars().all(|c| c.is_ascii_digit()));
880                }
881            }
882
883            #[test]
884            fn same_input_always_same_output(s in "[a-z0-9]{4,12}@[a-z]{4,8}\\.com") {
885                let store = hmac_store();
886                let out1 = store.get_or_insert(&Category::Email, &s).unwrap();
887                let out2 = store.get_or_insert(&Category::Email, &s).unwrap();
888                prop_assert_eq!(out1, out2);
889            }
890
891            #[test]
892            fn different_categories_produce_different_outputs(s in "[a-z]{6,10}") {
893                let store = hmac_store();
894                let as_email = store.get_or_insert(&Category::Email, &format!("{s}@corp.com")).unwrap();
895                let as_name  = store.get_or_insert(&Category::Name,  &format!("{s}@corp.com")).unwrap();
896                prop_assert_ne!(as_email, as_name);
897            }
898        }
899    }
900}