sql_splitter/redactor/strategy/
shuffle.rs

1//! Shuffle strategy - redistribute values within a column.
2//!
3//! This strategy collects all values for a column and randomly redistributes them.
4//! This preserves the statistical distribution while breaking the row correlation.
5//!
6//! Note: This requires collecting all values in memory, so it may not be suitable
7//! for very large tables. Consider using hash strategy for large datasets.
8
9use super::{RedactValue, Strategy, StrategyKind};
10use rand::seq::SliceRandom;
11
12/// Strategy that shuffles values within a column
13#[derive(Debug, Clone, Default)]
14pub struct ShuffleStrategy {
15    /// Collected values for shuffling
16    values: Vec<RedactValue>,
17    /// Shuffled values (populated after shuffle())
18    shuffled: Vec<RedactValue>,
19    /// Current index into shuffled values
20    index: usize,
21}
22
23impl ShuffleStrategy {
24    pub fn new() -> Self {
25        Self::default()
26    }
27
28    /// Collect a value for later shuffling
29    pub fn collect(&mut self, value: RedactValue) {
30        self.values.push(value);
31    }
32
33    /// Shuffle the collected values
34    pub fn shuffle(&mut self, rng: &mut impl rand::Rng) {
35        self.shuffled = self.values.clone();
36        self.shuffled.shuffle(rng);
37        self.index = 0;
38    }
39
40    /// Get the next shuffled value
41    pub fn next_value(&mut self) -> Option<RedactValue> {
42        if self.index < self.shuffled.len() {
43            let value = self.shuffled[self.index].clone();
44            self.index += 1;
45            Some(value)
46        } else {
47            None
48        }
49    }
50
51    /// Check if values have been collected
52    pub fn has_values(&self) -> bool {
53        !self.values.is_empty()
54    }
55
56    /// Get the number of collected values
57    pub fn len(&self) -> usize {
58        self.values.len()
59    }
60
61    /// Check if the strategy is empty
62    pub fn is_empty(&self) -> bool {
63        self.values.is_empty()
64    }
65}
66
67impl Strategy for ShuffleStrategy {
68    fn apply(&self, value: &RedactValue, _rng: &mut dyn rand::RngCore) -> RedactValue {
69        // Note: For shuffle strategy, we need a two-pass approach:
70        // 1. First pass: collect all values
71        // 2. Shuffle
72        // 3. Second pass: return shuffled values in order
73        //
74        // This trait-based apply() is called during the second pass.
75        // The actual shuffling logic is handled by the Redactor.
76        //
77        // For now, we just return the value unchanged - the Redactor
78        // will handle the shuffle logic specially.
79        value.clone()
80    }
81
82    fn kind(&self) -> StrategyKind {
83        StrategyKind::Shuffle
84    }
85}
86
87#[cfg(test)]
88mod tests {
89    use super::*;
90    use rand::SeedableRng;
91
92    #[test]
93    fn test_shuffle_strategy() {
94        let mut strategy = ShuffleStrategy::new();
95        let mut rng = rand::rngs::StdRng::seed_from_u64(42);
96
97        // Collect values
98        strategy.collect(RedactValue::Integer(1));
99        strategy.collect(RedactValue::Integer(2));
100        strategy.collect(RedactValue::Integer(3));
101        strategy.collect(RedactValue::Integer(4));
102        strategy.collect(RedactValue::Integer(5));
103
104        assert_eq!(strategy.len(), 5);
105
106        // Shuffle
107        strategy.shuffle(&mut rng);
108
109        // Get shuffled values
110        let mut results = Vec::new();
111        while let Some(v) = strategy.next_value() {
112            results.push(v);
113        }
114
115        assert_eq!(results.len(), 5);
116
117        // Verify all original values are present (just reordered)
118        let mut ints: Vec<i64> = results
119            .iter()
120            .map(|v| match v {
121                RedactValue::Integer(i) => *i,
122                _ => panic!("Expected Integer"),
123            })
124            .collect();
125        ints.sort();
126        assert_eq!(ints, vec![1, 2, 3, 4, 5]);
127    }
128
129    #[test]
130    fn test_shuffle_with_nulls() {
131        let mut strategy = ShuffleStrategy::new();
132        let mut rng = rand::rngs::StdRng::seed_from_u64(42);
133
134        strategy.collect(RedactValue::String("a".to_string()));
135        strategy.collect(RedactValue::Null);
136        strategy.collect(RedactValue::String("b".to_string()));
137
138        strategy.shuffle(&mut rng);
139
140        let mut null_count = 0;
141        let mut string_count = 0;
142
143        while let Some(v) = strategy.next_value() {
144            match v {
145                RedactValue::Null => null_count += 1,
146                RedactValue::String(_) => string_count += 1,
147                _ => {}
148            }
149        }
150
151        assert_eq!(null_count, 1);
152        assert_eq!(string_count, 2);
153    }
154}