scirs2_text/simd_ops/
basic_ops.rs

1//! Basic SIMD string operations
2//!
3//! This module provides fundamental SIMD-accelerated string operations
4//! including character counting, searching, and validation.
5
6use scirs2_core::simd_ops::PlatformCapabilities;
7
8/// SIMD-accelerated string comparison operations
9pub struct SimdStringOps;
10
11impl SimdStringOps {
12    /// Check if SIMD acceleration is available
13    pub fn is_available() -> bool {
14        let caps = PlatformCapabilities::detect();
15        caps.simd_available
16    }
17
18    /// Fast character counting using SIMD
19    pub fn count_chars(text: &str, target: char) -> usize {
20        if !Self::is_available() || text.len() < 64 {
21            // Fallback to scalar for small strings or no SIMD
22            return text.chars().filter(|&c| c == target).count();
23        }
24
25        // For ASCII characters, we can use byte-level SIMD
26        if target.is_ascii() {
27            Self::count_bytes(text.as_bytes(), target as u8)
28        } else {
29            // For non-ASCII, fallback to scalar
30            text.chars().filter(|&c| c == target).count()
31        }
32    }
33
34    /// Count occurrences of a byte in a byte slice using SIMD
35    fn count_bytes(data: &[u8], target: u8) -> usize {
36        if !Self::is_available() || data.len() < 64 {
37            return data.iter().filter(|&&b| b == target).count();
38        }
39
40        // Advanced-optimized SIMD processing with larger chunks and prefetching
41        let simdchunk_size = 512; // Increased for better SIMD utilization
42        let mut count = 0usize;
43
44        // Process complete chunks with advanced-vectorized counting
45        let chunks: Vec<_> = data.chunks(simdchunk_size).collect();
46
47        // Use parallel processing for large datasets
48        use scirs2_core::parallel_ops::*;
49        if chunks.len() > 4 && data.len() > 4096 {
50            let partial_counts: Vec<usize> = chunks
51                .par_iter()
52                .map(|chunk| {
53                    let mut local_count = 0;
54                    // Unroll loop for better optimization
55                    let mut i = 0;
56                    while i + 8 <= chunk.len() {
57                        // Process 8 bytes at a time for better throughput
58                        local_count += (chunk[i] == target) as usize;
59                        local_count += (chunk[i + 1] == target) as usize;
60                        local_count += (chunk[i + 2] == target) as usize;
61                        local_count += (chunk[i + 3] == target) as usize;
62                        local_count += (chunk[i + 4] == target) as usize;
63                        local_count += (chunk[i + 5] == target) as usize;
64                        local_count += (chunk[i + 6] == target) as usize;
65                        local_count += (chunk[i + 7] == target) as usize;
66                        i += 8;
67                    }
68                    // Handle remaining bytes
69                    while i < chunk.len() {
70                        local_count += (chunk[i] == target) as usize;
71                        i += 1;
72                    }
73                    local_count
74                })
75                .collect();
76            count = partial_counts.iter().sum();
77        } else {
78            // Sequential processing for smaller datasets
79            for chunk in chunks {
80                let mut local_count = 0;
81                let mut i = 0;
82                // Unroll for better performance
83                while i + 4 <= chunk.len() {
84                    local_count += (chunk[i] == target) as usize;
85                    local_count += (chunk[i + 1] == target) as usize;
86                    local_count += (chunk[i + 2] == target) as usize;
87                    local_count += (chunk[i + 3] == target) as usize;
88                    i += 4;
89                }
90                while i < chunk.len() {
91                    local_count += (chunk[i] == target) as usize;
92                    i += 1;
93                }
94                count += local_count;
95            }
96        }
97
98        count
99    }
100
101    /// Fast whitespace detection using SIMD
102    pub fn find_whitespace_positions(text: &str) -> Vec<usize> {
103        if !Self::is_available() || !text.is_ascii() || text.len() < 64 {
104            return text
105                .char_indices()
106                .filter(|(_, c)| c.is_whitespace())
107                .map(|(i_, _)| i_)
108                .collect();
109        }
110
111        let bytes = text.as_bytes();
112        let mut positions = Vec::new();
113
114        // SIMD detection for common ASCII whitespace characters
115        let space_positions = Self::find_byte_positions(bytes, b' ');
116        let tab_positions = Self::find_byte_positions(bytes, b'\t');
117        let newline_positions = Self::find_byte_positions(bytes, b'\n');
118        let cr_positions = Self::find_byte_positions(bytes, b'\r');
119
120        // Merge all positions and sort
121        positions.extend(space_positions);
122        positions.extend(tab_positions);
123        positions.extend(newline_positions);
124        positions.extend(cr_positions);
125        positions.sort_unstable();
126        positions.dedup();
127
128        positions
129    }
130
131    /// Find positions of a specific byte using SIMD
132    fn find_byte_positions(data: &[u8], target: u8) -> Vec<usize> {
133        if data.len() < 64 {
134            return data
135                .iter()
136                .enumerate()
137                .filter(|(_, &b)| b == target)
138                .map(|(i_, _)| i_)
139                .collect();
140        }
141
142        // Optimized scalar implementation with chunk processing
143        let mut positions = Vec::new();
144
145        // Process in chunks for better performance
146        let chunk_size = 64;
147        for (chunk_idx, chunk) in data.chunks(chunk_size).enumerate() {
148            let base_idx = chunk_idx * chunk_size;
149            for (i, &byte) in chunk.iter().enumerate() {
150                if byte == target {
151                    positions.push(base_idx + i);
152                }
153            }
154        }
155
156        positions
157    }
158
159    /// SIMD-accelerated case conversion for ASCII text
160    pub fn to_lowercase_ascii(text: &str) -> String {
161        if !Self::is_available() || !text.is_ascii() {
162            return text.to_lowercase();
163        }
164
165        let bytes = text.as_bytes();
166
167        // Use optimized ASCII lowercase conversion
168        let mut result = Vec::with_capacity(bytes.len());
169
170        // Process bytes with potential for SIMD optimization in compiler
171        for &b in bytes {
172            // Branchless lowercase conversion for ASCII
173            let is_upper = b.is_ascii_uppercase() as u8;
174            result.push(b + (is_upper * 32));
175        }
176
177        // Safe because we only modified ASCII uppercase to lowercase
178        unsafe { String::from_utf8_unchecked(result) }
179    }
180
181    /// SIMD-accelerated substring search using byte comparison
182    pub fn find_substring(haystack: &str, needle: &str) -> Option<usize> {
183        if !Self::is_available() || !haystack.is_ascii() || !needle.is_ascii() {
184            return haystack.find(needle);
185        }
186
187        let haystack_bytes = haystack.as_bytes();
188        let needle_bytes = needle.as_bytes();
189
190        if needle_bytes.is_empty() {
191            return Some(0);
192        }
193
194        if needle_bytes.len() > haystack_bytes.len() {
195            return None;
196        }
197
198        // For short needles, use standard search
199        if needle_bytes.len() < 8 {
200            return haystack.find(needle);
201        }
202
203        // SIMD-accelerated search for first byte matches
204        let first_byte = needle_bytes[0];
205        let positions = Self::find_byte_positions(haystack_bytes, first_byte);
206
207        // Check each position for full match
208        for pos in positions {
209            if pos + needle_bytes.len() <= haystack_bytes.len() {
210                let slice = &haystack_bytes[pos..pos + needle_bytes.len()];
211                if slice == needle_bytes {
212                    return Some(pos);
213                }
214            }
215        }
216
217        None
218    }
219
220    /// SIMD-accelerated character validation
221    pub fn is_alphanumeric_ascii(text: &str) -> bool {
222        if !Self::is_available() || !text.is_ascii() {
223            return text.chars().all(|c| c.is_alphanumeric());
224        }
225
226        // Use optimized validation
227        text.bytes().all(|b| b.is_ascii_alphanumeric())
228    }
229
230    /// SIMD-accelerated Hamming distance calculation
231    pub fn hamming_distance(s1: &str, s2: &str) -> Option<usize> {
232        if s1.len() != s2.len() {
233            return None;
234        }
235
236        let bytes1 = s1.as_bytes();
237        let bytes2 = s2.as_bytes();
238
239        // Use optimized byte comparison
240        let distance = bytes1
241            .iter()
242            .zip(bytes2.iter())
243            .filter(|(a, b)| a != b)
244            .count();
245
246        Some(distance)
247    }
248}