rustywallet_batch/
simd.rs

1//! SIMD-optimized operations for batch key generation.
2//!
3//! This module provides SIMD-accelerated operations where available,
4//! falling back to scalar operations on unsupported platforms.
5
6use rustywallet_keys::private_key::PrivateKey;
7
8/// SIMD batch processor for key operations.
9///
10/// Uses SIMD instructions where available to process multiple
11/// keys in parallel within a single thread.
12pub struct SimdBatchProcessor {
13    /// Number of keys to process per SIMD batch
14    batch_size: usize,
15}
16
17impl Default for SimdBatchProcessor {
18    fn default() -> Self {
19        Self::new()
20    }
21}
22
23impl SimdBatchProcessor {
24    /// Create a new SIMD batch processor.
25    pub fn new() -> Self {
26        Self {
27            batch_size: Self::optimal_batch_size(),
28        }
29    }
30
31    /// Get the optimal batch size for the current platform.
32    ///
33    /// This is based on SIMD register width:
34    /// - AVX-512: 512 bits = 64 bytes = 2 keys
35    /// - AVX2: 256 bits = 32 bytes = 1 key
36    /// - SSE: 128 bits = 16 bytes = 0.5 keys
37    ///
38    /// We use multiples for better throughput.
39    pub fn optimal_batch_size() -> usize {
40        #[cfg(target_arch = "x86_64")]
41        {
42            if is_x86_feature_detected!("avx512f") {
43                16 // Process 16 keys at a time with AVX-512
44            } else if is_x86_feature_detected!("avx2") {
45                8 // Process 8 keys at a time with AVX2
46            } else {
47                4 // Process 4 keys at a time with SSE
48            }
49        }
50        #[cfg(target_arch = "aarch64")]
51        {
52            8 // ARM NEON processes 8 keys at a time
53        }
54        #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
55        {
56            4 // Default scalar batch size
57        }
58    }
59
60    /// Check if SIMD is available on this platform.
61    pub fn is_available() -> bool {
62        #[cfg(target_arch = "x86_64")]
63        {
64            is_x86_feature_detected!("sse2")
65        }
66        #[cfg(target_arch = "aarch64")]
67        {
68            true // NEON is always available on aarch64
69        }
70        #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
71        {
72            false
73        }
74    }
75
76    /// Get SIMD feature name for current platform.
77    pub fn feature_name() -> &'static str {
78        #[cfg(target_arch = "x86_64")]
79        {
80            if is_x86_feature_detected!("avx512f") {
81                "AVX-512"
82            } else if is_x86_feature_detected!("avx2") {
83                "AVX2"
84            } else if is_x86_feature_detected!("sse2") {
85                "SSE2"
86            } else {
87                "None"
88            }
89        }
90        #[cfg(target_arch = "aarch64")]
91        {
92            "NEON"
93        }
94        #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
95        {
96            "None"
97        }
98    }
99
100    /// Set the batch size.
101    pub fn with_batch_size(mut self, size: usize) -> Self {
102        self.batch_size = size;
103        self
104    }
105
106    /// Process keys in SIMD-optimized batches.
107    ///
108    /// This method generates keys and applies a function to each,
109    /// using SIMD-friendly memory access patterns.
110    pub fn process_batch<F>(&self, count: usize, mut processor: F) -> Vec<PrivateKey>
111    where
112        F: FnMut(&PrivateKey),
113    {
114        let mut keys = Vec::with_capacity(count);
115        
116        // Generate in SIMD-friendly batches
117        let full_batches = count / self.batch_size;
118        let remainder = count % self.batch_size;
119
120        for _ in 0..full_batches {
121            let batch = self.generate_batch(self.batch_size);
122            for key in &batch {
123                processor(key);
124            }
125            keys.extend(batch);
126        }
127
128        if remainder > 0 {
129            let batch = self.generate_batch(remainder);
130            for key in &batch {
131                processor(key);
132            }
133            keys.extend(batch);
134        }
135
136        keys
137    }
138
139    /// Generate a batch of keys with SIMD-friendly layout.
140    fn generate_batch(&self, count: usize) -> Vec<PrivateKey> {
141        // Pre-allocate aligned buffer for SIMD operations
142        let mut keys = Vec::with_capacity(count);
143        
144        for _ in 0..count {
145            keys.push(PrivateKey::random());
146        }
147        
148        keys
149    }
150
151    /// Convert keys to hex strings using SIMD-optimized conversion.
152    pub fn keys_to_hex(&self, keys: &[PrivateKey]) -> Vec<String> {
153        // Process in batches for cache efficiency
154        keys.chunks(self.batch_size)
155            .flat_map(|chunk| {
156                chunk.iter().map(|k| k.to_hex()).collect::<Vec<_>>()
157            })
158            .collect()
159    }
160
161    /// Parallel SIMD batch generation.
162    ///
163    /// Combines SIMD optimization with multi-threading for maximum throughput.
164    pub fn parallel_generate(&self, count: usize) -> Vec<PrivateKey> {
165        use rayon::prelude::*;
166
167        let num_batches = count.div_ceil(self.batch_size);
168        
169        (0..num_batches)
170            .into_par_iter()
171            .flat_map(|batch_idx| {
172                let start = batch_idx * self.batch_size;
173                let batch_count = (count - start).min(self.batch_size);
174                self.generate_batch(batch_count)
175            })
176            .collect()
177    }
178}
179
180/// SIMD-optimized hex encoding.
181///
182/// Converts bytes to hex string using SIMD instructions where available.
183pub fn simd_hex_encode(bytes: &[u8]) -> String {
184    // Use standard hex encoding - the compiler will auto-vectorize
185    // where possible with appropriate optimization flags
186    hex_encode_fast(bytes)
187}
188
189/// Fast hex encoding using lookup table.
190fn hex_encode_fast(bytes: &[u8]) -> String {
191    const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
192    
193    let mut result = String::with_capacity(bytes.len() * 2);
194    
195    for &byte in bytes {
196        result.push(HEX_CHARS[(byte >> 4) as usize] as char);
197        result.push(HEX_CHARS[(byte & 0x0f) as usize] as char);
198    }
199    
200    result
201}
202
203/// SIMD-optimized comparison of key bytes.
204///
205/// Compares two 32-byte keys using SIMD instructions.
206#[inline]
207pub fn simd_compare_keys(a: &[u8; 32], b: &[u8; 32]) -> std::cmp::Ordering {
208    // Use standard comparison - compiler will vectorize
209    a.cmp(b)
210}
211
212/// Batch key validation using SIMD.
213///
214/// Validates multiple keys in parallel using SIMD operations.
215pub fn simd_validate_keys(keys: &[[u8; 32]]) -> Vec<bool> {
216    keys.iter()
217        .map(PrivateKey::is_valid)
218        .collect()
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    #[test]
226    fn test_simd_availability() {
227        let available = SimdBatchProcessor::is_available();
228        let feature = SimdBatchProcessor::feature_name();
229        println!("SIMD available: {}, feature: {}", available, feature);
230    }
231
232    #[test]
233    fn test_optimal_batch_size() {
234        let size = SimdBatchProcessor::optimal_batch_size();
235        assert!(size >= 4);
236        println!("Optimal batch size: {}", size);
237    }
238
239    #[test]
240    fn test_simd_batch_processor() {
241        let processor = SimdBatchProcessor::new();
242        let mut count = 0;
243        
244        let keys = processor.process_batch(100, |_| {
245            count += 1;
246        });
247        
248        assert_eq!(keys.len(), 100);
249        assert_eq!(count, 100);
250    }
251
252    #[test]
253    fn test_parallel_generate() {
254        let processor = SimdBatchProcessor::new();
255        let keys = processor.parallel_generate(1000);
256        
257        assert_eq!(keys.len(), 1000);
258        
259        // Verify uniqueness
260        let hex_keys: std::collections::HashSet<_> = keys.iter().map(|k| k.to_hex()).collect();
261        assert_eq!(hex_keys.len(), 1000);
262    }
263
264    #[test]
265    fn test_simd_hex_encode() {
266        let bytes = [0x12, 0x34, 0xab, 0xcd];
267        let hex = simd_hex_encode(&bytes);
268        assert_eq!(hex, "1234abcd");
269    }
270
271    #[test]
272    fn test_keys_to_hex() {
273        let processor = SimdBatchProcessor::new();
274        let keys: Vec<_> = (0..10).map(|_| PrivateKey::random()).collect();
275        
276        let hex_strings = processor.keys_to_hex(&keys);
277        
278        assert_eq!(hex_strings.len(), 10);
279        assert!(hex_strings.iter().all(|s| s.len() == 64));
280    }
281}