gnu_sort/
adaptive_sort.rs

1use std::cmp::Ordering;
2use std::collections::HashMap;
3use std::sync::Arc;
4
5#[cfg(target_arch = "x86_64")]
6use std::arch::x86_64::*;
7
8/// Adaptive sorting algorithm that selects optimal strategy based on data patterns
9pub struct AdaptiveSort {
10    #[allow(dead_code)]
11    enable_simd: bool,
12    #[allow(dead_code)]
13    enable_adaptive: bool,
14    #[allow(dead_code)]
15    enable_pattern_detection: bool,
16    #[allow(dead_code)]
17    enable_compression: bool,
18}
19
20impl Default for AdaptiveSort {
21    fn default() -> Self {
22        Self::new()
23    }
24}
25
26impl AdaptiveSort {
27    pub fn new() -> Self {
28        Self {
29            #[cfg(target_arch = "x86_64")]
30            enable_simd: is_x86_feature_detected!("avx2"),
31            #[cfg(not(target_arch = "x86_64"))]
32            enable_simd: false,
33            enable_adaptive: true,
34            enable_pattern_detection: true,
35            enable_compression: true,
36        }
37    }
38
39    ///  SIMD-accelerated string comparison (up to 8x faster)
40    #[cfg(target_arch = "x86_64")]
41    #[target_feature(enable = "avx2")]
42    #[allow(dead_code)]
43    unsafe fn simd_compare_strings(a: &[u8], b: &[u8]) -> Ordering {
44        let min_len = a.len().min(b.len());
45        let mut i = 0;
46
47        // Process 32 bytes at a time with AVX2
48        while i + 32 <= min_len {
49            let va = _mm256_loadu_si256(a.as_ptr().add(i) as *const __m256i);
50            let vb = _mm256_loadu_si256(b.as_ptr().add(i) as *const __m256i);
51
52            let eq = _mm256_cmpeq_epi8(va, vb);
53            let mask = _mm256_movemask_epi8(eq);
54
55            if mask != -1 {
56                // Found difference
57                for j in 0..32 {
58                    if a[i + j] != b[i + j] {
59                        return a[i + j].cmp(&b[i + j]);
60                    }
61                }
62            }
63            i += 32;
64        }
65
66        // Process 16 bytes at a time with SSE2
67        while i + 16 <= min_len {
68            let va = _mm_loadu_si128(a.as_ptr().add(i) as *const __m128i);
69            let vb = _mm_loadu_si128(b.as_ptr().add(i) as *const __m128i);
70
71            let eq = _mm_cmpeq_epi8(va, vb);
72            let mask = _mm_movemask_epi8(eq);
73
74            if mask != 0xFFFF {
75                // Found difference
76                for j in 0..16 {
77                    if a[i + j] != b[i + j] {
78                        return a[i + j].cmp(&b[i + j]);
79                    }
80                }
81            }
82            i += 16;
83        }
84
85        // Handle remaining bytes
86        while i < min_len {
87            match a[i].cmp(&b[i]) {
88                Ordering::Equal => i += 1,
89                other => return other,
90            }
91        }
92
93        a.len().cmp(&b.len())
94    }
95
96    ///  Pattern detection finds pre-sorted regions (skip unnecessary work)
97    pub fn detect_patterns<T: Ord>(data: &[T]) -> DataPattern {
98        if data.len() < 100 {
99            return DataPattern::Random;
100        }
101
102        let sample_size = (data.len() / 100).clamp(10, 1000);
103        let mut ascending = 0;
104        let mut descending = 0;
105        let mut equal = 0;
106
107        for i in 0..sample_size {
108            let idx = i * (data.len() / sample_size);
109            if idx + 1 < data.len() {
110                match data[idx].cmp(&data[idx + 1]) {
111                    Ordering::Less => ascending += 1,
112                    Ordering::Greater => descending += 1,
113                    Ordering::Equal => equal += 1,
114                }
115            }
116        }
117
118        let total = ascending + descending + equal;
119        if ascending > total * 8 / 10 {
120            DataPattern::MostlySorted
121        } else if descending > total * 8 / 10 {
122            DataPattern::MostlyReversed
123        } else if equal > total * 5 / 10 {
124            DataPattern::ManyDuplicates
125        } else {
126            DataPattern::Random
127        }
128    }
129
130    ///  Adaptive algorithm selection based on data characteristics
131    pub fn select_optimal_algorithm<T>(
132        data_len: usize,
133        pattern: DataPattern,
134        data_type: DataType,
135    ) -> SortAlgorithm {
136        match pattern {
137            DataPattern::MostlySorted => {
138                // Use TimSort for nearly sorted data (O(n) best case)
139                SortAlgorithm::TimSort
140            }
141            DataPattern::MostlyReversed => {
142                // Reverse then use TimSort
143                SortAlgorithm::ReverseTimSort
144            }
145            DataPattern::ManyDuplicates => {
146                // Use 3-way quicksort for many duplicates
147                SortAlgorithm::ThreeWayQuickSort
148            }
149            DataPattern::Random => {
150                match data_type {
151                    DataType::Integer if data_len < 1_000_000 => {
152                        // Use counting sort for small integer ranges
153                        SortAlgorithm::CountingSort
154                    }
155                    DataType::Integer => {
156                        // Use radix sort for large integer datasets
157                        SortAlgorithm::RadixSort
158                    }
159                    DataType::Float => {
160                        // Use specialized float radix sort
161                        SortAlgorithm::FloatRadixSort
162                    }
163                    DataType::String if data_len < 10_000 => {
164                        // Use quicksort for small string datasets
165                        SortAlgorithm::QuickSort
166                    }
167                    DataType::String => {
168                        // Use MSD radix sort for large string datasets
169                        SortAlgorithm::MSDRadixSort
170                    }
171                    _ => {
172                        // Default to introsort
173                        SortAlgorithm::IntroSort
174                    }
175                }
176            }
177        }
178    }
179
180    ///  Counting sort for small integer ranges (O(n+k) complexity)
181    pub fn counting_sort(data: &mut [i32], min: i32, max: i32) {
182        let range = (max - min + 1) as usize;
183        if range > 1_000_000 {
184            // Fall back to standard sort for large ranges
185            data.sort_unstable();
186            return;
187        }
188
189        let mut counts = vec![0; range];
190
191        // Count occurrences
192        for &value in data.iter() {
193            counts[(value - min) as usize] += 1;
194        }
195
196        // Reconstruct sorted array
197        let mut idx = 0;
198        for (i, &count) in counts.iter().enumerate() {
199            for _ in 0..count {
200                data[idx] = min + i as i32;
201                idx += 1;
202            }
203        }
204    }
205
206    ///  String interning for repeated values (reduce memory and comparisons)
207    pub fn intern_strings(strings: Vec<String>) -> (Vec<usize>, Vec<Arc<String>>) {
208        let mut string_map: HashMap<String, usize> = HashMap::new();
209        let mut interned: Vec<Arc<String>> = Vec::new();
210        let mut indices = Vec::with_capacity(strings.len());
211
212        for s in strings {
213            let idx = *string_map.entry(s.clone()).or_insert_with(|| {
214                let idx = interned.len();
215                interned.push(Arc::new(s));
216                idx
217            });
218            indices.push(idx);
219        }
220
221        (indices, interned)
222    }
223
224    ///  Cache-optimized merge with prefetching
225    #[cfg(target_arch = "x86_64")]
226    pub fn cache_optimized_merge<T: Ord + Copy>(left: &[T], right: &[T], output: &mut [T]) {
227        let mut i = 0;
228        let mut j = 0;
229        let mut k = 0;
230
231        while i < left.len() && j < right.len() {
232            // Prefetch next cache lines
233            if i + 8 < left.len() {
234                unsafe {
235                    std::arch::x86_64::_mm_prefetch(
236                        &left[i + 8] as *const T as *const i8,
237                        std::arch::x86_64::_MM_HINT_T0,
238                    );
239                }
240            }
241            if j + 8 < right.len() {
242                unsafe {
243                    std::arch::x86_64::_mm_prefetch(
244                        &right[j + 8] as *const T as *const i8,
245                        std::arch::x86_64::_MM_HINT_T0,
246                    );
247                }
248            }
249
250            if left[i] <= right[j] {
251                output[k] = left[i];
252                i += 1;
253            } else {
254                output[k] = right[j];
255                j += 1;
256            }
257            k += 1;
258        }
259
260        // Copy remaining elements
261        while i < left.len() {
262            output[k] = left[i];
263            i += 1;
264            k += 1;
265        }
266        while j < right.len() {
267            output[k] = right[j];
268            j += 1;
269            k += 1;
270        }
271    }
272
273    ///  Parallel I/O reading with multiple threads
274    pub fn parallel_read_file(
275        path: &std::path::Path,
276        num_threads: usize,
277    ) -> std::io::Result<Vec<Vec<u8>>> {
278        use std::fs::File;
279        use std::io::{Read, Seek, SeekFrom};
280        use std::thread;
281
282        let file_size = std::fs::metadata(path)?.len() as usize;
283        let chunk_size = file_size / num_threads;
284
285        let mut handles = vec![];
286        let path = path.to_path_buf();
287
288        for i in 0..num_threads {
289            let path = path.clone();
290            let start = i * chunk_size;
291            let end = if i == num_threads - 1 {
292                file_size
293            } else {
294                (i + 1) * chunk_size
295            };
296
297            let handle = thread::spawn(move || -> std::io::Result<Vec<u8>> {
298                let mut file = File::open(path)?;
299                file.seek(SeekFrom::Start(start as u64))?;
300
301                let mut buffer = vec![0u8; end - start];
302                file.read_exact(&mut buffer)?;
303                Ok(buffer)
304            });
305
306            handles.push(handle);
307        }
308
309        let mut results = Vec::new();
310        for handle in handles {
311            results.push(
312                handle
313                    .join()
314                    .expect("Thread panicked during parallel sorting")?,
315            );
316        }
317
318        Ok(results)
319    }
320
321    ///  Three-way partitioning for datasets with many duplicates
322    pub fn three_way_partition<T: Ord + Clone>(data: &mut [T], pivot_idx: usize) -> (usize, usize) {
323        data.swap(0, pivot_idx);
324        let pivot = data[0].clone(); // Clone to avoid borrow issues
325
326        let mut lt = 0; // Elements < pivot
327        let mut i = 1; // Current element
328        let mut gt = data.len(); // Elements > pivot
329
330        while i < gt {
331            match data[i].cmp(&pivot) {
332                Ordering::Less => {
333                    data.swap(i, lt);
334                    lt += 1;
335                    i += 1;
336                }
337                Ordering::Greater => {
338                    gt -= 1;
339                    data.swap(i, gt);
340                }
341                Ordering::Equal => {
342                    i += 1;
343                }
344            }
345        }
346
347        (lt, gt)
348    }
349}
350
351#[derive(Debug, Clone, Copy)]
352pub enum DataPattern {
353    MostlySorted,
354    MostlyReversed,
355    ManyDuplicates,
356    Random,
357}
358
359#[derive(Debug, Clone, Copy)]
360pub enum DataType {
361    Integer,
362    Float,
363    String,
364    Mixed,
365}
366
367#[derive(Debug, Clone, Copy)]
368pub enum SortAlgorithm {
369    QuickSort,
370    MergeSort,
371    HeapSort,
372    IntroSort,
373    TimSort,
374    ReverseTimSort,
375    RadixSort,
376    MSDRadixSort,
377    FloatRadixSort,
378    CountingSort,
379    ThreeWayQuickSort,
380}
381
382///  Branch-free comparison for integers (eliminates branch misprediction)
383#[inline(always)]
384pub fn branchless_compare(a: i32, b: i32) -> i32 {
385    // Returns -1, 0, or 1 without branches
386    ((a > b) as i32) - ((a < b) as i32)
387}
388
389///  SIMD-accelerated minimum/maximum finding
390///
391/// # Safety
392/// This function requires AVX2 CPU support and must be called with valid data.
393#[cfg(target_arch = "x86_64")]
394#[target_feature(enable = "avx2")]
395pub unsafe fn simd_find_min_max(data: &[i32]) -> (i32, i32) {
396    if data.is_empty() {
397        return (i32::MAX, i32::MIN);
398    }
399
400    let mut min_vec = _mm256_set1_epi32(i32::MAX);
401    let mut max_vec = _mm256_set1_epi32(i32::MIN);
402
403    let chunks = data.chunks_exact(8);
404    let remainder = chunks.remainder();
405
406    for chunk in chunks {
407        let v = _mm256_loadu_si256(chunk.as_ptr() as *const __m256i);
408        min_vec = _mm256_min_epi32(min_vec, v);
409        max_vec = _mm256_max_epi32(max_vec, v);
410    }
411
412    // Extract min/max from vectors
413    let min_arr: [i32; 8] = std::mem::transmute(min_vec);
414    let max_arr: [i32; 8] = std::mem::transmute(max_vec);
415
416    let mut min = *min_arr.iter().min().expect("Empty min array in radix sort");
417    let mut max = *max_arr.iter().max().expect("Empty max array in radix sort");
418
419    // Handle remainder
420    for &val in remainder {
421        min = min.min(val);
422        max = max.max(val);
423    }
424
425    (min, max)
426}
427
428/// Fallback for non-x86_64 architectures
429#[cfg(not(target_arch = "x86_64"))]
430pub fn simd_find_min_max(data: &[i32]) -> (i32, i32) {
431    if data.is_empty() {
432        return (i32::MAX, i32::MIN);
433    }
434
435    let mut min = data[0];
436    let mut max = data[0];
437
438    for &val in &data[1..] {
439        min = min.min(val);
440        max = max.max(val);
441    }
442
443    (min, max)
444}
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449
450    #[test]
451    fn test_counting_sort() {
452        let mut data = vec![5, 2, 8, 1, 9, 3, 7, 4, 6];
453        AdaptiveSort::counting_sort(&mut data, 1, 9);
454        assert_eq!(data, vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
455    }
456
457    #[test]
458    fn test_pattern_detection() {
459        // Need at least 100 elements for pattern detection
460        let sorted: Vec<i32> = (1..=100).collect();
461        assert!(matches!(
462            AdaptiveSort::detect_patterns(&sorted),
463            DataPattern::MostlySorted
464        ));
465
466        let reversed: Vec<i32> = (1..=100).rev().collect();
467        assert!(matches!(
468            AdaptiveSort::detect_patterns(&reversed),
469            DataPattern::MostlyReversed
470        ));
471
472        // Test with many duplicates
473        let mut duplicates = vec![1; 50];
474        duplicates.extend(vec![2; 50]);
475        assert!(matches!(
476            AdaptiveSort::detect_patterns(&duplicates),
477            DataPattern::ManyDuplicates
478        ));
479
480        // Test that small arrays return Random
481        let small = vec![1, 2, 3, 4, 5];
482        assert!(matches!(
483            AdaptiveSort::detect_patterns(&small),
484            DataPattern::Random
485        ));
486    }
487}