1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
//! Basic SIMD string operations
//!
//! This module provides fundamental SIMD-accelerated string operations
//! including character counting, searching, and validation.
use scirs2_core::simd_ops::PlatformCapabilities;
/// SIMD-accelerated string comparison operations
pub struct SimdStringOps;
impl SimdStringOps {
/// Check if SIMD acceleration is available
pub fn is_available() -> bool {
let caps = PlatformCapabilities::detect();
caps.simd_available
}
/// Fast character counting using SIMD
pub fn count_chars(text: &str, target: char) -> usize {
if !Self::is_available() || text.len() < 64 {
// Fallback to scalar for small strings or no SIMD
return text.chars().filter(|&c| c == target).count();
}
// For ASCII characters, we can use byte-level SIMD
if target.is_ascii() {
Self::count_bytes(text.as_bytes(), target as u8)
} else {
// For non-ASCII, fallback to scalar
text.chars().filter(|&c| c == target).count()
}
}
/// Count occurrences of a byte in a byte slice using SIMD
fn count_bytes(data: &[u8], target: u8) -> usize {
if !Self::is_available() || data.len() < 64 {
return data.iter().filter(|&&b| b == target).count();
}
// Advanced-optimized SIMD processing with larger chunks and prefetching
let simdchunk_size = 512; // Increased for better SIMD utilization
let mut count = 0usize;
// Process complete chunks with advanced-vectorized counting
let chunks: Vec<_> = data.chunks(simdchunk_size).collect();
// Use parallel processing for large datasets
use scirs2_core::parallel_ops::*;
if chunks.len() > 4 && data.len() > 4096 {
let partial_counts: Vec<usize> = chunks
.par_iter()
.map(|chunk| {
let mut local_count = 0;
// Unroll loop for better optimization
let mut i = 0;
while i + 8 <= chunk.len() {
// Process 8 bytes at a time for better throughput
local_count += (chunk[i] == target) as usize;
local_count += (chunk[i + 1] == target) as usize;
local_count += (chunk[i + 2] == target) as usize;
local_count += (chunk[i + 3] == target) as usize;
local_count += (chunk[i + 4] == target) as usize;
local_count += (chunk[i + 5] == target) as usize;
local_count += (chunk[i + 6] == target) as usize;
local_count += (chunk[i + 7] == target) as usize;
i += 8;
}
// Handle remaining bytes
while i < chunk.len() {
local_count += (chunk[i] == target) as usize;
i += 1;
}
local_count
})
.collect();
count = partial_counts.iter().sum();
} else {
// Sequential processing for smaller datasets
for chunk in chunks {
let mut local_count = 0;
let mut i = 0;
// Unroll for better performance
while i + 4 <= chunk.len() {
local_count += (chunk[i] == target) as usize;
local_count += (chunk[i + 1] == target) as usize;
local_count += (chunk[i + 2] == target) as usize;
local_count += (chunk[i + 3] == target) as usize;
i += 4;
}
while i < chunk.len() {
local_count += (chunk[i] == target) as usize;
i += 1;
}
count += local_count;
}
}
count
}
/// Fast whitespace detection using SIMD
pub fn find_whitespace_positions(text: &str) -> Vec<usize> {
if !Self::is_available() || !text.is_ascii() || text.len() < 64 {
return text
.char_indices()
.filter(|(_, c)| c.is_whitespace())
.map(|(i_, _)| i_)
.collect();
}
let bytes = text.as_bytes();
let mut positions = Vec::new();
// SIMD detection for common ASCII whitespace characters
let space_positions = Self::find_byte_positions(bytes, b' ');
let tab_positions = Self::find_byte_positions(bytes, b'\t');
let newline_positions = Self::find_byte_positions(bytes, b'\n');
let cr_positions = Self::find_byte_positions(bytes, b'\r');
// Merge all positions and sort
positions.extend(space_positions);
positions.extend(tab_positions);
positions.extend(newline_positions);
positions.extend(cr_positions);
positions.sort_unstable();
positions.dedup();
positions
}
/// Find positions of a specific byte using SIMD
fn find_byte_positions(data: &[u8], target: u8) -> Vec<usize> {
if data.len() < 64 {
return data
.iter()
.enumerate()
.filter(|(_, &b)| b == target)
.map(|(i_, _)| i_)
.collect();
}
// Optimized scalar implementation with chunk processing
let mut positions = Vec::new();
// Process in chunks for better performance
let chunk_size = 64;
for (chunk_idx, chunk) in data.chunks(chunk_size).enumerate() {
let base_idx = chunk_idx * chunk_size;
for (i, &byte) in chunk.iter().enumerate() {
if byte == target {
positions.push(base_idx + i);
}
}
}
positions
}
/// SIMD-accelerated case conversion for ASCII text
pub fn to_lowercase_ascii(text: &str) -> String {
if !Self::is_available() || !text.is_ascii() {
return text.to_lowercase();
}
let bytes = text.as_bytes();
// Use optimized ASCII lowercase conversion
let mut result = Vec::with_capacity(bytes.len());
// Process bytes with potential for SIMD optimization in compiler
for &b in bytes {
// Branchless lowercase conversion for ASCII
let is_upper = b.is_ascii_uppercase() as u8;
result.push(b + (is_upper * 32));
}
// Safe because we only modified ASCII uppercase to lowercase
unsafe { String::from_utf8_unchecked(result) }
}
/// SIMD-accelerated substring search using byte comparison
pub fn find_substring(haystack: &str, needle: &str) -> Option<usize> {
if !Self::is_available() || !haystack.is_ascii() || !needle.is_ascii() {
return haystack.find(needle);
}
let haystack_bytes = haystack.as_bytes();
let needle_bytes = needle.as_bytes();
if needle_bytes.is_empty() {
return Some(0);
}
if needle_bytes.len() > haystack_bytes.len() {
return None;
}
// For short needles, use standard search
if needle_bytes.len() < 8 {
return haystack.find(needle);
}
// SIMD-accelerated search for first byte matches
let first_byte = needle_bytes[0];
let positions = Self::find_byte_positions(haystack_bytes, first_byte);
// Check each position for full match
for pos in positions {
if pos + needle_bytes.len() <= haystack_bytes.len() {
let slice = &haystack_bytes[pos..pos + needle_bytes.len()];
if slice == needle_bytes {
return Some(pos);
}
}
}
None
}
/// SIMD-accelerated character validation
pub fn is_alphanumeric_ascii(text: &str) -> bool {
if !Self::is_available() || !text.is_ascii() {
return text.chars().all(|c| c.is_alphanumeric());
}
// Use optimized validation
text.bytes().all(|b| b.is_ascii_alphanumeric())
}
/// SIMD-accelerated Hamming distance calculation
pub fn hamming_distance(s1: &str, s2: &str) -> Option<usize> {
if s1.len() != s2.len() {
return None;
}
let bytes1 = s1.as_bytes();
let bytes2 = s2.as_bytes();
// Use optimized byte comparison
let distance = bytes1
.iter()
.zip(bytes2.iter())
.filter(|(a, b)| a != b)
.count();
Some(distance)
}
}