Skip to main content

libsais_rs/
libsais16x64.rs

1//! Rust translation of upstream [libsais](https://github.com/IlyaGrebnov/libsais)
2//! 2.10.4 by Ilya Grebnov.
3//!
4//! This module exposes the 16-bit alphabet, 64-bit suffix array, BWT, unBWT,
5//! PLCP and LCP entry points (mirroring `libsais16x64.h`).
6
7use std::mem;
8
9pub type SaSint = i64;
10pub type SaUint = u64;
11
12pub const ALPHABET_SIZE: usize = 1usize << 16;
13const SAINT_MAX: SaSint = SaSint::MAX;
14const SAINT_MIN: SaSint = SaSint::MIN;
15const SAINT_BIT: u32 = 64;
16const SUFFIX_GROUP_BIT: u32 = SAINT_BIT - 1;
17const SUFFIX_GROUP_MARKER: SaSint = 1_i64 << (SUFFIX_GROUP_BIT - 1);
18const LIBSAIS_FLAGS_BWT: SaSint = 1;
19const LIBSAIS_FLAGS_GSA: SaSint = 2;
20const LIBSAIS_LOCAL_BUFFER_SIZE: usize = 2000;
21const UNBWT_FASTBITS: usize = 17;
22const PER_THREAD_CACHE_SIZE: usize = 2_097_184;
23
24#[repr(C)]
25#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
26struct ThreadCache {
27    symbol: SaSint,
28    index: SaSint,
29}
30
31#[derive(Clone, Debug, Default, PartialEq, Eq)]
32pub struct ThreadState {
33    position: SaSint,
34    m: SaSint,
35    last_lms_suffix: SaSint,
36    count: SaSint,
37    buckets: Vec<SaSint>,
38    cache: Vec<ThreadCache>,
39    cache_entries: usize,
40}
41
42#[derive(Clone, Debug, Default, PartialEq, Eq)]
43pub struct Context {
44    buckets: Vec<SaSint>,
45    thread_state: Option<Vec<ThreadState>>,
46    threads: SaSint,
47}
48
49#[derive(Clone, Debug, Default, PartialEq, Eq)]
50pub struct UnbwtContext {
51    bucket2: Vec<usize>,
52    fastbits: Vec<u16>,
53    buckets: Option<Vec<usize>>,
54    threads: SaSint,
55}
56
57/// Creates the libsais16x64 context that allows reusing allocated memory with each libsais16x64 operation.
58///
59/// In multi-threaded environments, use one context per thread for parallel executions.
60///
61/// Returns the context, or `None` on allocation failure.
62pub fn create_ctx() -> Option<Context> {
63    create_ctx_main(1)
64}
65
66/// Creates the libsais16x64 context for parallel operations using OpenMP-style threading.
67///
68/// In multi-threaded environments, use one context per thread for parallel executions.
69///
70/// - `threads`: number of worker threads (can be 0 for the implementation default).
71///
72/// Returns the context, or `None` on allocation failure.
73pub fn create_ctx_omp(threads: SaSint) -> Option<Context> {
74    if threads < 0 {
75        None
76    } else {
77        create_ctx_main(normalize_threads(threads))
78    }
79}
80
81/// Destroys the libsais16x64 context and frees previously allocated memory.
82pub fn free_ctx(_ctx: Context) {}
83
84/// Creates the libsais16x64 reverse-BWT context that allows reusing allocated memory with each `libsais16x64_unbwt_*` operation.
85///
86/// In multi-threaded environments, use one context per thread for parallel executions.
87///
88/// Returns the context, or `None` on allocation failure.
89pub fn unbwt_create_ctx() -> Option<UnbwtContext> {
90    unbwt_create_ctx_main(1)
91}
92
93/// Creates the libsais16x64 reverse-BWT context for parallel `libsais16x64_unbwt_*` operations using OpenMP-style threading.
94///
95/// In multi-threaded environments, use one context per thread for parallel executions.
96///
97/// - `threads`: number of worker threads (can be 0 for the implementation default).
98///
99/// Returns the context, or `None` on allocation failure.
100pub fn unbwt_create_ctx_omp(threads: SaSint) -> Option<UnbwtContext> {
101    if threads < 0 {
102        None
103    } else {
104        unbwt_create_ctx_main(normalize_threads(threads))
105    }
106}
107
108/// Destroys the libsais16x64 reverse-BWT context and frees previously allocated memory.
109pub fn unbwt_free_ctx(_ctx: UnbwtContext) {}
110
111fn normalize_threads(threads: SaSint) -> SaSint {
112    if threads > 0 {
113        threads
114    } else {
115        1
116    }
117}
118
119fn align_up(value: usize, alignment: usize) -> usize {
120    (value + (alignment - 1)) & !(alignment - 1)
121}
122
123fn alloc_thread_state(threads: SaSint) -> Option<Vec<ThreadState>> {
124    let threads = usize::try_from(threads).ok()?;
125    let mut thread_state = Vec::with_capacity(threads);
126    for _ in 0..threads {
127        thread_state.push(ThreadState {
128            position: 0,
129            m: 0,
130            last_lms_suffix: 0,
131            count: 0,
132            buckets: vec![0; 4 * ALPHABET_SIZE],
133            cache: vec![ThreadCache::default(); PER_THREAD_CACHE_SIZE],
134            cache_entries: PER_THREAD_CACHE_SIZE,
135        });
136    }
137    Some(thread_state)
138}
139
140fn create_ctx_main(threads: SaSint) -> Option<Context> {
141    let buckets = vec![0; 8 * ALPHABET_SIZE];
142    let thread_state = if threads > 1 {
143        Some(alloc_thread_state(threads)?)
144    } else {
145        None
146    };
147
148    Some(Context {
149        buckets,
150        thread_state,
151        threads,
152    })
153}
154
155fn unbwt_create_ctx_main(threads: SaSint) -> Option<UnbwtContext> {
156    let bucket2 = vec![0; ALPHABET_SIZE];
157    let fastbits = vec![0; 1 + (1 << UNBWT_FASTBITS)];
158    let buckets = if threads > 1 {
159        Some(vec![0; usize::try_from(threads).ok()? * ALPHABET_SIZE])
160    } else {
161        None
162    };
163
164    Some(UnbwtContext {
165        bucket2,
166        fastbits,
167        buckets,
168        threads,
169    })
170}
171
172fn fill_freq(t: &[u16], freq: Option<&mut [SaSint]>) {
173    if let Some(freq) = freq {
174        freq[..ALPHABET_SIZE].fill(0);
175        for &symbol in t {
176            freq[symbol as usize] += 1;
177        }
178    }
179}
180
181#[allow(dead_code)]
182fn buckets_index4(c: usize, s: usize) -> usize {
183    (c << 2) + s
184}
185
186#[allow(dead_code)]
187fn buckets_index2(c: usize, s: usize) -> usize {
188    (c << 1) + s
189}
190
191#[allow(dead_code)]
192fn place_cached_suffixes(
193    sa: &mut [SaSint],
194    cache: &[ThreadCache],
195    block_start: SaSint,
196    block_size: SaSint,
197) {
198    let start = usize::try_from(block_start).expect("block_start must be non-negative");
199    let len = usize::try_from(block_size).expect("block_size must be non-negative");
200    let entries = if cache.len() >= start + len {
201        &cache[start..start + len]
202    } else {
203        &cache[..len]
204    };
205
206    for entry in entries {
207        sa[entry.symbol as usize] = entry.index;
208    }
209}
210
211#[allow(dead_code)]
212fn compact_and_place_cached_suffixes(
213    sa: &mut [SaSint],
214    cache: &mut [ThreadCache],
215    block_start: SaSint,
216    block_size: SaSint,
217) {
218    let start = usize::try_from(block_start).expect("block_start must be non-negative");
219    let len = usize::try_from(block_size).expect("block_size must be non-negative");
220    let read_start = if cache.len() >= start + len { start } else { 0 };
221    let read_end = read_start + len;
222
223    let mut write = read_start;
224    for read in read_start..read_end {
225        let entry = cache[read];
226        if entry.symbol >= 0 {
227            cache[write] = entry;
228            write += 1;
229        }
230    }
231    place_cached_suffixes(sa, cache, block_start, (write - read_start) as SaSint);
232}
233
234#[allow(dead_code)]
235fn count_negative_marked_suffixes(
236    sa: &[SaSint],
237    block_start: SaSint,
238    block_size: SaSint,
239) -> SaSint {
240    let start = block_start as usize;
241    let end = start + block_size as usize;
242    sa[start..end].iter().filter(|&&value| value < 0).count() as SaSint
243}
244
245#[allow(dead_code)]
246fn count_zero_marked_suffixes(sa: &[SaSint], block_start: SaSint, block_size: SaSint) -> SaSint {
247    let start = block_start as usize;
248    let end = start + block_size as usize;
249    sa[start..end].iter().filter(|&&value| value == 0).count() as SaSint
250}
251
252#[allow(dead_code)]
253fn accumulate_counts_s32_n(
254    buckets: &mut [SaSint],
255    bucket00: usize,
256    bucket_size: usize,
257    bucket_stride: usize,
258    num_buckets: usize,
259) {
260    for s in 0..bucket_size {
261        let mut sum = buckets[bucket00 + s];
262        for bucket in 1..num_buckets {
263            sum += buckets[bucket00 - bucket * bucket_stride + s];
264        }
265        buckets[bucket00 + s] = sum;
266    }
267}
268
269#[allow(dead_code)]
270fn accumulate_counts_s32_2(
271    buckets: &mut [SaSint],
272    bucket00: usize,
273    bucket_size: usize,
274    bucket_stride: usize,
275) {
276    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 2);
277}
278
279#[allow(dead_code)]
280fn accumulate_counts_s32_3(
281    buckets: &mut [SaSint],
282    bucket00: usize,
283    bucket_size: usize,
284    bucket_stride: usize,
285) {
286    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 3);
287}
288
289#[allow(dead_code)]
290fn accumulate_counts_s32_4(
291    buckets: &mut [SaSint],
292    bucket00: usize,
293    bucket_size: usize,
294    bucket_stride: usize,
295) {
296    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 4);
297}
298
299#[allow(dead_code)]
300fn accumulate_counts_s32_5(
301    buckets: &mut [SaSint],
302    bucket00: usize,
303    bucket_size: usize,
304    bucket_stride: usize,
305) {
306    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 5);
307}
308
309#[allow(dead_code)]
310fn accumulate_counts_s32_6(
311    buckets: &mut [SaSint],
312    bucket00: usize,
313    bucket_size: usize,
314    bucket_stride: usize,
315) {
316    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 6);
317}
318
319#[allow(dead_code)]
320fn accumulate_counts_s32_7(
321    buckets: &mut [SaSint],
322    bucket00: usize,
323    bucket_size: usize,
324    bucket_stride: usize,
325) {
326    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 7);
327}
328
329#[allow(dead_code)]
330fn accumulate_counts_s32_8(
331    buckets: &mut [SaSint],
332    bucket00: usize,
333    bucket_size: usize,
334    bucket_stride: usize,
335) {
336    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 8);
337}
338
339#[allow(dead_code)]
340fn accumulate_counts_s32_9(
341    buckets: &mut [SaSint],
342    bucket00: usize,
343    bucket_size: usize,
344    bucket_stride: usize,
345) {
346    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 9);
347}
348
349#[allow(dead_code)]
350fn accumulate_counts_s32(
351    buckets: &mut [SaSint],
352    bucket00: usize,
353    bucket_size: usize,
354    bucket_stride: usize,
355    mut num_buckets: usize,
356) {
357    while num_buckets >= 9 {
358        accumulate_counts_s32_9(
359            buckets,
360            bucket00 - (num_buckets - 9) * bucket_stride,
361            bucket_size,
362            bucket_stride,
363        );
364        num_buckets -= 8;
365    }
366
367    match num_buckets {
368        2 => accumulate_counts_s32_2(buckets, bucket00, bucket_size, bucket_stride),
369        3 => accumulate_counts_s32_3(buckets, bucket00, bucket_size, bucket_stride),
370        4 => accumulate_counts_s32_4(buckets, bucket00, bucket_size, bucket_stride),
371        5 => accumulate_counts_s32_5(buckets, bucket00, bucket_size, bucket_stride),
372        6 => accumulate_counts_s32_6(buckets, bucket00, bucket_size, bucket_stride),
373        7 => accumulate_counts_s32_7(buckets, bucket00, bucket_size, bucket_stride),
374        8 => accumulate_counts_s32_8(buckets, bucket00, bucket_size, bucket_stride),
375        _ => {}
376    }
377}
378
379#[allow(dead_code)]
380fn flip_suffix_markers_omp(sa: &mut [SaSint], l: SaSint, threads: SaSint) {
381    let len = usize::try_from(l).expect("l must be non-negative");
382    let omp_num_threads = if threads > 1 && l >= 65_536 {
383        usize::try_from(threads).expect("threads must be non-negative")
384    } else {
385        1
386    };
387    let omp_block_stride = (len / omp_num_threads) & !15usize;
388    for omp_thread_num in 0..omp_num_threads {
389        let omp_block_start = omp_thread_num * omp_block_stride;
390        let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
391            omp_block_stride
392        } else {
393            len - omp_block_start
394        };
395        for value in &mut sa[omp_block_start..omp_block_start + omp_block_size] {
396            *value ^= SAINT_MIN;
397        }
398    }
399}
400
401#[allow(dead_code)]
402fn gather_lms_suffixes_32s(t: &[SaSint], sa: &mut [SaSint], n: SaSint) -> SaSint {
403    let mut i = n - 2;
404    let mut m = n - 1;
405    let mut f0 = 1usize;
406    let mut f1: usize;
407    let mut c0 = t[(n - 1) as usize] as isize;
408    let mut c1: isize;
409
410    while i >= 3 {
411        c1 = t[i as usize] as isize;
412        f1 = usize::from(c1 > c0 - f0 as isize);
413        sa[m as usize] = i + 1;
414        m -= (f1 & !f0) as SaSint;
415
416        c0 = t[(i - 1) as usize] as isize;
417        f0 = usize::from(c0 > c1 - f1 as isize);
418        sa[m as usize] = i;
419        m -= (f0 & !f1) as SaSint;
420
421        c1 = t[(i - 2) as usize] as isize;
422        f1 = usize::from(c1 > c0 - f0 as isize);
423        sa[m as usize] = i - 1;
424        m -= (f1 & !f0) as SaSint;
425
426        c0 = t[(i - 3) as usize] as isize;
427        f0 = usize::from(c0 > c1 - f1 as isize);
428        sa[m as usize] = i - 2;
429        m -= (f0 & !f1) as SaSint;
430
431        i -= 4;
432    }
433
434    while i >= 0 {
435        c1 = c0;
436        c0 = t[i as usize] as isize;
437        f1 = f0;
438        f0 = usize::from(c0 > c1 - f1 as isize);
439        sa[m as usize] = i + 1;
440        m -= (f0 & !f1) as SaSint;
441        i -= 1;
442    }
443
444    n - 1 - m
445}
446
447#[allow(dead_code)]
448fn gather_compacted_lms_suffixes_32s(t: &[SaSint], sa: &mut [SaSint], n: SaSint) -> SaSint {
449    let mut i = n - 2;
450    let mut m = n - 1;
451    let mut f0 = 1usize;
452    let mut f1: usize;
453    let mut c0 = t[(n - 1) as usize] as isize;
454    let mut c1: isize;
455
456    while i >= 3 {
457        c1 = t[i as usize] as isize;
458        f1 = usize::from(c1 > c0 - f0 as isize);
459        sa[m as usize] = i + 1;
460        m -= (f1 & !f0 & usize::from(c0 >= 0)) as SaSint;
461
462        c0 = t[(i - 1) as usize] as isize;
463        f0 = usize::from(c0 > c1 - f1 as isize);
464        sa[m as usize] = i;
465        m -= (f0 & !f1 & usize::from(c1 >= 0)) as SaSint;
466
467        c1 = t[(i - 2) as usize] as isize;
468        f1 = usize::from(c1 > c0 - f0 as isize);
469        sa[m as usize] = i - 1;
470        m -= (f1 & !f0 & usize::from(c0 >= 0)) as SaSint;
471
472        c0 = t[(i - 3) as usize] as isize;
473        f0 = usize::from(c0 > c1 - f1 as isize);
474        sa[m as usize] = i - 2;
475        m -= (f0 & !f1 & usize::from(c1 >= 0)) as SaSint;
476
477        i -= 4;
478    }
479
480    while i >= 0 {
481        c1 = c0;
482        c0 = t[i as usize] as isize;
483        f1 = f0;
484        f0 = usize::from(c0 > c1 - f1 as isize);
485        sa[m as usize] = i + 1;
486        m -= (f0 & !f1 & usize::from(c1 >= 0)) as SaSint;
487        i -= 1;
488    }
489
490    n - 1 - m
491}
492
493#[allow(dead_code)]
494fn count_lms_suffixes_32s_4k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
495    buckets[..4 * k as usize].fill(0);
496    let mut i = n - 2;
497    let mut f0 = 1usize;
498    let mut f1: usize;
499    let mut c0 = t[(n - 1) as usize] as isize;
500    let mut c1: isize;
501
502    while i >= 3 {
503        c1 = t[i as usize] as isize;
504        f1 = usize::from(c1 > c0 - f0 as isize);
505        buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
506
507        c0 = t[(i - 1) as usize] as isize;
508        f0 = usize::from(c0 > c1 - f1 as isize);
509        buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
510
511        c1 = t[(i - 2) as usize] as isize;
512        f1 = usize::from(c1 > c0 - f0 as isize);
513        buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
514
515        c0 = t[(i - 3) as usize] as isize;
516        f0 = usize::from(c0 > c1 - f1 as isize);
517        buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
518
519        i -= 4;
520    }
521
522    while i >= 0 {
523        c1 = c0;
524        c0 = t[i as usize] as isize;
525        f1 = f0;
526        f0 = usize::from(c0 > c1 - f1 as isize);
527        buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
528        i -= 1;
529    }
530
531    buckets[buckets_index4(c0 as usize, f0 + f0)] += 1;
532}
533
534#[allow(dead_code)]
535fn count_lms_suffixes_32s_2k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
536    buckets[..2 * k as usize].fill(0);
537    let mut i = n - 2;
538    let mut f0 = 1usize;
539    let mut f1: usize;
540    let mut c0 = t[(n - 1) as usize] as isize;
541    let mut c1: isize;
542
543    while i >= 3 {
544        c1 = t[i as usize] as isize;
545        f1 = usize::from(c1 > c0 - f0 as isize);
546        buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
547
548        c0 = t[(i - 1) as usize] as isize;
549        f0 = usize::from(c0 > c1 - f1 as isize);
550        buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
551
552        c1 = t[(i - 2) as usize] as isize;
553        f1 = usize::from(c1 > c0 - f0 as isize);
554        buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
555
556        c0 = t[(i - 3) as usize] as isize;
557        f0 = usize::from(c0 > c1 - f1 as isize);
558        buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
559
560        i -= 4;
561    }
562
563    while i >= 0 {
564        c1 = c0;
565        c0 = t[i as usize] as isize;
566        f1 = f0;
567        f0 = usize::from(c0 > c1 - f1 as isize);
568        buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
569        i -= 1;
570    }
571
572    buckets[buckets_index2(c0 as usize, 0)] += 1;
573}
574
575#[allow(dead_code)]
576fn count_compacted_lms_suffixes_32s_2k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
577    buckets[..2 * k as usize].fill(0);
578    let mut i = n - 2;
579    let mut f0 = 1usize;
580    let mut f1: usize;
581    let mut c0 = t[(n - 1) as usize] as isize;
582    let mut c1: isize;
583
584    while i >= 3 {
585        c1 = t[i as usize] as isize;
586        f1 = usize::from(c1 > c0 - f0 as isize);
587        buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
588
589        c0 = t[(i - 1) as usize] as isize;
590        f0 = usize::from(c0 > c1 - f1 as isize);
591        buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
592
593        c1 = t[(i - 2) as usize] as isize;
594        f1 = usize::from(c1 > c0 - f0 as isize);
595        buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
596
597        c0 = t[(i - 3) as usize] as isize;
598        f0 = usize::from(c0 > c1 - f1 as isize);
599        buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
600
601        i -= 4;
602    }
603
604    while i >= 0 {
605        c1 = c0;
606        c0 = t[i as usize] as isize;
607        f1 = f0;
608        f0 = usize::from(c0 > c1 - f1 as isize);
609        buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
610        i -= 1;
611    }
612
613    buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, 0)] += 1;
614}
615
616#[allow(dead_code)]
617fn get_bucket_stride(free_space: SaSint, bucket_size: SaSint, num_buckets: SaSint) -> SaSint {
618    let bucket_size_1024 = (bucket_size + 1023) & !1023;
619    if free_space / (num_buckets - 1) >= bucket_size_1024 {
620        return bucket_size_1024;
621    }
622    let bucket_size_16 = (bucket_size + 15) & !15;
623    if free_space / (num_buckets - 1) >= bucket_size_16 {
624        return bucket_size_16;
625    }
626    bucket_size
627}
628
629#[allow(dead_code)]
630fn count_and_gather_lms_suffixes_32s_4k(
631    t: &[SaSint],
632    sa: &mut [SaSint],
633    n: SaSint,
634    k: SaSint,
635    buckets: &mut [SaSint],
636    omp_block_start: isize,
637    omp_block_size: isize,
638) -> SaSint {
639    buckets[..4 * k as usize].fill(0);
640    let mut m = omp_block_start + omp_block_size - 1;
641
642    if omp_block_size > 0 {
643        let mut j = m + 1;
644        let mut c0 = t[m as usize] as isize;
645        let mut c1 = -1isize;
646        while j < n as isize {
647            c1 = t[j as usize] as isize;
648            if c1 != c0 {
649                break;
650            }
651            j += 1;
652        }
653
654        let mut f0 = usize::from(c0 >= c1);
655        let mut f1: usize;
656        let mut i = m - 1;
657        j = omp_block_start + 64 + 3;
658        while i >= j {
659            c1 = t[i as usize] as isize;
660            f1 = usize::from(c1 > c0 - f0 as isize);
661            sa[m as usize] = (i + 1) as SaSint;
662            m -= (f1 & !f0) as isize;
663            buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
664
665            c0 = t[(i - 1) as usize] as isize;
666            f0 = usize::from(c0 > c1 - f1 as isize);
667            sa[m as usize] = i as SaSint;
668            m -= (f0 & !f1) as isize;
669            buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
670
671            c1 = t[(i - 2) as usize] as isize;
672            f1 = usize::from(c1 > c0 - f0 as isize);
673            sa[m as usize] = (i - 1) as SaSint;
674            m -= (f1 & !f0) as isize;
675            buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
676
677            c0 = t[(i - 3) as usize] as isize;
678            f0 = usize::from(c0 > c1 - f1 as isize);
679            sa[m as usize] = (i - 2) as SaSint;
680            m -= (f0 & !f1) as isize;
681            buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
682
683            i -= 4;
684        }
685
686        j -= 64 + 3;
687        while i >= j {
688            c1 = c0;
689            c0 = t[i as usize] as isize;
690            f1 = f0;
691            f0 = usize::from(c0 > c1 - f1 as isize);
692            sa[m as usize] = (i + 1) as SaSint;
693            m -= (f0 & !f1) as isize;
694            buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
695            i -= 1;
696        }
697
698        c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
699        f1 = usize::from(c1 > c0 - f0 as isize);
700        sa[m as usize] = (i + 1) as SaSint;
701        m -= (f1 & !f0) as isize;
702        buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
703    }
704
705    (omp_block_start + omp_block_size - 1 - m) as SaSint
706}
707
708#[allow(dead_code)]
709fn count_and_gather_lms_suffixes_32s_2k(
710    t: &[SaSint],
711    sa: &mut [SaSint],
712    n: SaSint,
713    k: SaSint,
714    buckets: &mut [SaSint],
715    omp_block_start: isize,
716    omp_block_size: isize,
717) -> SaSint {
718    buckets[..2 * k as usize].fill(0);
719    let mut m = omp_block_start + omp_block_size - 1;
720
721    if omp_block_size > 0 {
722        let mut j = m + 1;
723        let mut c0 = t[m as usize] as isize;
724        let mut c1 = -1isize;
725        while j < n as isize {
726            c1 = t[j as usize] as isize;
727            if c1 != c0 {
728                break;
729            }
730            j += 1;
731        }
732
733        let mut f0 = usize::from(c0 >= c1);
734        let mut f1: usize;
735        let mut i = m - 1;
736        j = omp_block_start + 64 + 3;
737        while i >= j {
738            c1 = t[i as usize] as isize;
739            f1 = usize::from(c1 > c0 - f0 as isize);
740            sa[m as usize] = (i + 1) as SaSint;
741            m -= (f1 & !f0) as isize;
742            buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
743
744            c0 = t[(i - 1) as usize] as isize;
745            f0 = usize::from(c0 > c1 - f1 as isize);
746            sa[m as usize] = i as SaSint;
747            m -= (f0 & !f1) as isize;
748            buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
749
750            c1 = t[(i - 2) as usize] as isize;
751            f1 = usize::from(c1 > c0 - f0 as isize);
752            sa[m as usize] = (i - 1) as SaSint;
753            m -= (f1 & !f0) as isize;
754            buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
755
756            c0 = t[(i - 3) as usize] as isize;
757            f0 = usize::from(c0 > c1 - f1 as isize);
758            sa[m as usize] = (i - 2) as SaSint;
759            m -= (f0 & !f1) as isize;
760            buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
761
762            i -= 4;
763        }
764
765        j -= 64 + 3;
766        while i >= j {
767            c1 = c0;
768            c0 = t[i as usize] as isize;
769            f1 = f0;
770            f0 = usize::from(c0 > c1 - f1 as isize);
771            sa[m as usize] = (i + 1) as SaSint;
772            m -= (f0 & !f1) as isize;
773            buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
774            i -= 1;
775        }
776
777        c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
778        f1 = usize::from(c1 > c0 - f0 as isize);
779        sa[m as usize] = (i + 1) as SaSint;
780        m -= (f1 & !f0) as isize;
781        buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
782    }
783
784    (omp_block_start + omp_block_size - 1 - m) as SaSint
785}
786
787#[allow(dead_code)]
788fn count_and_gather_compacted_lms_suffixes_32s_2k(
789    t: &[SaSint],
790    sa: &mut [SaSint],
791    n: SaSint,
792    k: SaSint,
793    buckets: &mut [SaSint],
794    omp_block_start: isize,
795    omp_block_size: isize,
796) -> SaSint {
797    buckets[..2 * k as usize].fill(0);
798    let mut m = omp_block_start + omp_block_size - 1;
799
800    if omp_block_size > 0 {
801        let mut j = m + 1;
802        let mut c0 = t[m as usize] as isize;
803        let mut c1 = -1isize;
804        while j < n as isize {
805            c1 = t[j as usize] as isize;
806            if c1 != c0 {
807                break;
808            }
809            j += 1;
810        }
811
812        let mut f0 = usize::from(c0 >= c1);
813        let mut f1: usize;
814        let mut i = m - 1;
815        j = omp_block_start + 64 + 3;
816        while i >= j {
817            c1 = t[i as usize] as isize;
818            f1 = usize::from(c1 > c0 - f0 as isize);
819            sa[m as usize] = (i + 1) as SaSint;
820            m -= (f1 & !f0 & usize::from(c0 >= 0)) as isize;
821            buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
822
823            c0 = t[(i - 1) as usize] as isize;
824            f0 = usize::from(c0 > c1 - f1 as isize);
825            sa[m as usize] = i as SaSint;
826            m -= (f0 & !f1 & usize::from(c1 >= 0)) as isize;
827            buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
828
829            c1 = t[(i - 2) as usize] as isize;
830            f1 = usize::from(c1 > c0 - f0 as isize);
831            sa[m as usize] = (i - 1) as SaSint;
832            m -= (f1 & !f0 & usize::from(c0 >= 0)) as isize;
833            buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
834
835            c0 = t[(i - 3) as usize] as isize;
836            f0 = usize::from(c0 > c1 - f1 as isize);
837            sa[m as usize] = (i - 2) as SaSint;
838            m -= (f0 & !f1 & usize::from(c1 >= 0)) as isize;
839            buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
840
841            i -= 4;
842        }
843
844        j -= 64 + 3;
845        while i >= j {
846            c1 = c0;
847            c0 = t[i as usize] as isize;
848            f1 = f0;
849            f0 = usize::from(c0 > c1 - f1 as isize);
850            sa[m as usize] = (i + 1) as SaSint;
851            m -= (f0 & !f1 & usize::from(c1 >= 0)) as isize;
852            buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
853            i -= 1;
854        }
855
856        c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
857        f1 = usize::from(c1 > c0 - f0 as isize);
858        sa[m as usize] = (i + 1) as SaSint;
859        m -= (f1 & !f0 & usize::from(c0 >= 0)) as isize;
860        buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
861    }
862
863    (omp_block_start + omp_block_size - 1 - m) as SaSint
864}
865
866#[allow(dead_code)]
867fn count_and_gather_lms_suffixes_32s_4k_fs_omp(
868    t: &[SaSint],
869    sa: &mut [SaSint],
870    n: SaSint,
871    k: SaSint,
872    buckets: &mut [SaSint],
873    local_buckets: SaSint,
874    threads: SaSint,
875    thread_state: &mut [ThreadState],
876) -> SaSint {
877    if threads == 1 || n < 65_536 {
878        return count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as isize);
879    }
880
881    let thread_count = threads as usize;
882    let n_usize = n as usize;
883    let bucket_size = 4 * k as usize;
884    let block_stride = (n / threads) & !15;
885    let free_space = if local_buckets != 0 {
886        LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
887    } else {
888        buckets.len() as SaSint
889    };
890    let bucket_stride = get_bucket_stride(free_space, 4 * k, threads) as usize;
891    let workspace_len = bucket_size + bucket_stride * thread_count.saturating_sub(1);
892    let mut workspace = vec![0; workspace_len];
893
894    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
895        let block_start = thread as SaSint * block_stride;
896        let block_size = if thread + 1 < thread_count {
897            block_stride
898        } else {
899            n - block_start
900        };
901        let workspace_end = workspace_len - thread * bucket_stride;
902        let workspace_start = workspace_end - bucket_size;
903        state.count = count_and_gather_lms_suffixes_32s_4k(
904            t,
905            sa,
906            n,
907            k,
908            &mut workspace[workspace_start..workspace_end],
909            block_start as isize,
910            block_size as isize,
911        );
912        state.position = block_start + block_size;
913    }
914
915    let mut m = 0usize;
916    for thread in (0..thread_count).rev() {
917        let count =
918            usize::try_from(thread_state[thread].count).expect("count must be non-negative");
919        m += count;
920        if thread + 1 != thread_count && count > 0 {
921            let src_end = usize::try_from(thread_state[thread].position)
922                .expect("position must be non-negative");
923            let src_start = src_end - count;
924            let dst_start = n_usize - m;
925            sa.copy_within(src_start..src_end, dst_start);
926        }
927    }
928
929    let accumulation_threads = thread_count - 1;
930    let block_stride = (bucket_size / accumulation_threads) & !15usize;
931    for thread in 0..accumulation_threads {
932        let block_start = thread * block_stride;
933        let block_size = if thread + 1 < accumulation_threads {
934            block_stride
935        } else {
936            bucket_size - block_start
937        };
938        accumulate_counts_s32(
939            &mut workspace,
940            block_start,
941            block_size,
942            bucket_stride,
943            accumulation_threads + 1,
944        );
945    }
946
947    buckets[..bucket_size].copy_from_slice(&workspace[..bucket_size]);
948    m as SaSint
949}
950
951#[allow(dead_code)]
952fn count_and_gather_lms_suffixes_32s_2k_fs_omp(
953    t: &[SaSint],
954    sa: &mut [SaSint],
955    n: SaSint,
956    k: SaSint,
957    buckets: &mut [SaSint],
958    local_buckets: SaSint,
959    threads: SaSint,
960    thread_state: &mut [ThreadState],
961) -> SaSint {
962    if threads == 1 || n < 65_536 {
963        return count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize);
964    }
965
966    let thread_count = threads as usize;
967    let n_usize = n as usize;
968    let bucket_size = 2 * k as usize;
969    let block_stride = (n / threads) & !15;
970    let free_space = if local_buckets != 0 {
971        LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
972    } else {
973        buckets.len() as SaSint
974    };
975    let bucket_stride = get_bucket_stride(free_space, 2 * k, threads) as usize;
976    let workspace_len = bucket_size + bucket_stride * thread_count.saturating_sub(1);
977    let mut workspace = vec![0; workspace_len];
978
979    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
980        let block_start = thread as SaSint * block_stride;
981        let block_size = if thread + 1 < thread_count {
982            block_stride
983        } else {
984            n - block_start
985        };
986        let workspace_end = workspace_len - thread * bucket_stride;
987        let workspace_start = workspace_end - bucket_size;
988        state.count = count_and_gather_lms_suffixes_32s_2k(
989            t,
990            sa,
991            n,
992            k,
993            &mut workspace[workspace_start..workspace_end],
994            block_start as isize,
995            block_size as isize,
996        );
997        state.position = block_start + block_size;
998    }
999
1000    let mut m = 0usize;
1001    for thread in (0..thread_count).rev() {
1002        let count =
1003            usize::try_from(thread_state[thread].count).expect("count must be non-negative");
1004        m += count;
1005        if thread + 1 != thread_count && count > 0 {
1006            let src_end = usize::try_from(thread_state[thread].position)
1007                .expect("position must be non-negative");
1008            let src_start = src_end - count;
1009            let dst_start = n_usize - m;
1010            sa.copy_within(src_start..src_end, dst_start);
1011        }
1012    }
1013
1014    let accumulation_threads = thread_count - 1;
1015    let block_stride = (bucket_size / accumulation_threads) & !15usize;
1016    for thread in 0..accumulation_threads {
1017        let block_start = thread * block_stride;
1018        let block_size = if thread + 1 < accumulation_threads {
1019            block_stride
1020        } else {
1021            bucket_size - block_start
1022        };
1023        accumulate_counts_s32(
1024            &mut workspace,
1025            block_start,
1026            block_size,
1027            bucket_stride,
1028            accumulation_threads + 1,
1029        );
1030    }
1031
1032    buckets[..bucket_size].copy_from_slice(&workspace[..bucket_size]);
1033    m as SaSint
1034}
1035
1036#[allow(dead_code)]
1037fn count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
1038    t: &[SaSint],
1039    sa: &mut [SaSint],
1040    n: SaSint,
1041    k: SaSint,
1042    buckets: &mut [SaSint],
1043    _local_buckets: SaSint,
1044    threads: SaSint,
1045    thread_state: &mut [ThreadState],
1046) {
1047    if threads == 1 || n < 65_536 {
1048        count_and_gather_compacted_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize);
1049        return;
1050    }
1051
1052    let thread_count = threads as usize;
1053    let n_usize = n as usize;
1054    let bucket_size = 2 * k as usize;
1055    let block_stride = (n / threads) & !15;
1056    let mut workspaces = vec![vec![0; bucket_size]; thread_count];
1057    let mut gathered_runs = vec![Vec::<SaSint>::new(); thread_count];
1058
1059    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
1060        let block_start = thread as SaSint * block_stride;
1061        let block_size = if thread + 1 < thread_count {
1062            block_stride
1063        } else {
1064            n - block_start
1065        };
1066        let mut temp_sa = vec![0; n_usize + block_size as usize];
1067        state.count = count_and_gather_compacted_lms_suffixes_32s_2k(
1068            t,
1069            &mut temp_sa,
1070            n,
1071            k,
1072            &mut workspaces[thread],
1073            block_start as isize,
1074            block_size as isize,
1075        );
1076        state.position = block_start + block_size;
1077        let count = usize::try_from(state.count).expect("count must be non-negative");
1078        let src_end =
1079            n_usize + usize::try_from(state.position).expect("position must be non-negative");
1080        let src_start = src_end - count;
1081        gathered_runs[thread].extend_from_slice(&temp_sa[src_start..src_end]);
1082    }
1083
1084    let mut suffixes_before = 0usize;
1085    for thread in (0..thread_count).rev() {
1086        let count =
1087            usize::try_from(thread_state[thread].count).expect("count must be non-negative");
1088        suffixes_before += count;
1089        if count > 0 {
1090            let dst_start = n_usize - suffixes_before;
1091            let dst_end = dst_start + count;
1092            sa[dst_start..dst_end].copy_from_slice(&gathered_runs[thread]);
1093        }
1094    }
1095
1096    buckets.fill(0);
1097    for workspace in &workspaces {
1098        for (dst, src) in buckets.iter_mut().zip(workspace.iter()) {
1099            *dst += *src;
1100        }
1101    }
1102}
1103
1104#[allow(dead_code)]
1105fn count_and_gather_lms_suffixes_32s_4k_nofs_omp(
1106    t: &[SaSint],
1107    sa: &mut [SaSint],
1108    n: SaSint,
1109    k: SaSint,
1110    buckets: &mut [SaSint],
1111    threads: SaSint,
1112) -> SaSint {
1113    if threads > 1 && n >= 65_536 {
1114        count_lms_suffixes_32s_4k(t, n, k, buckets);
1115        gather_lms_suffixes_32s(t, sa, n)
1116    } else {
1117        count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as isize)
1118    }
1119}
1120
1121#[allow(dead_code)]
1122fn count_and_gather_lms_suffixes_32s_2k_nofs_omp(
1123    t: &[SaSint],
1124    sa: &mut [SaSint],
1125    n: SaSint,
1126    k: SaSint,
1127    buckets: &mut [SaSint],
1128    threads: SaSint,
1129) -> SaSint {
1130    if threads > 1 && n >= 65_536 {
1131        count_lms_suffixes_32s_2k(t, n, k, buckets);
1132        gather_lms_suffixes_32s(t, sa, n)
1133    } else {
1134        count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize)
1135    }
1136}
1137
1138#[allow(dead_code)]
1139fn count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(
1140    t: &[SaSint],
1141    sa: &mut [SaSint],
1142    n: SaSint,
1143    k: SaSint,
1144    buckets: &mut [SaSint],
1145    threads: SaSint,
1146) -> SaSint {
1147    if threads > 1 && n >= 65_536 {
1148        count_compacted_lms_suffixes_32s_2k(t, n, k, buckets);
1149        gather_compacted_lms_suffixes_32s(t, sa, n)
1150    } else {
1151        count_and_gather_compacted_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize)
1152    }
1153}
1154
1155#[allow(dead_code)]
1156fn count_and_gather_lms_suffixes_32s_4k_omp(
1157    t: &[SaSint],
1158    sa: &mut [SaSint],
1159    n: SaSint,
1160    k: SaSint,
1161    buckets: &mut [SaSint],
1162    local_buckets: SaSint,
1163    threads: SaSint,
1164    thread_state: &mut [ThreadState],
1165) -> SaSint {
1166    let free_space = if local_buckets != 0 {
1167        LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
1168    } else {
1169        buckets.len() as SaSint
1170    };
1171    let mut max_threads = (free_space / (((4 * k) + 15) & !15)).min(threads);
1172
1173    if max_threads > 1 && n >= 65_536 && n / k >= 2 {
1174        let thread_cap = n / (16 * k);
1175        if max_threads > thread_cap {
1176            max_threads = thread_cap;
1177        }
1178        count_and_gather_lms_suffixes_32s_4k_fs_omp(
1179            t,
1180            sa,
1181            n,
1182            k,
1183            buckets,
1184            local_buckets,
1185            max_threads.max(2),
1186            thread_state,
1187        )
1188    } else if threads > 1 && n >= 65_536 {
1189        count_lms_suffixes_32s_4k(t, n, k, buckets);
1190        gather_lms_suffixes_32s(t, sa, n)
1191    } else {
1192        count_and_gather_lms_suffixes_32s_4k_nofs_omp(t, sa, n, k, buckets, threads)
1193    }
1194}
1195
1196#[allow(dead_code)]
1197fn count_and_gather_lms_suffixes_32s_2k_omp(
1198    t: &[SaSint],
1199    sa: &mut [SaSint],
1200    n: SaSint,
1201    k: SaSint,
1202    buckets: &mut [SaSint],
1203    local_buckets: SaSint,
1204    threads: SaSint,
1205    thread_state: &mut [ThreadState],
1206) -> SaSint {
1207    let free_space = if local_buckets != 0 {
1208        LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
1209    } else {
1210        buckets.len() as SaSint
1211    };
1212    let mut max_threads = (free_space / (((2 * k) + 15) & !15)).min(threads);
1213
1214    if max_threads > 1 && n >= 65_536 && n / k >= 2 {
1215        let thread_cap = n / (8 * k);
1216        if max_threads > thread_cap {
1217            max_threads = thread_cap;
1218        }
1219        count_and_gather_lms_suffixes_32s_2k_fs_omp(
1220            t,
1221            sa,
1222            n,
1223            k,
1224            buckets,
1225            local_buckets,
1226            max_threads.max(2),
1227            thread_state,
1228        )
1229    } else if threads > 1 && n >= 65_536 {
1230        count_lms_suffixes_32s_2k(t, n, k, buckets);
1231        gather_lms_suffixes_32s(t, sa, n)
1232    } else {
1233        count_and_gather_lms_suffixes_32s_2k_nofs_omp(t, sa, n, k, buckets, threads)
1234    }
1235}
1236
1237#[allow(dead_code)]
1238fn count_suffixes_32s(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
1239    buckets[..k as usize].fill(0);
1240
1241    let mut i = 0usize;
1242    let mut j = (n as usize).saturating_sub(7);
1243    while i < j {
1244        buckets[t[i] as usize] += 1;
1245        buckets[t[i + 1] as usize] += 1;
1246        buckets[t[i + 2] as usize] += 1;
1247        buckets[t[i + 3] as usize] += 1;
1248        buckets[t[i + 4] as usize] += 1;
1249        buckets[t[i + 5] as usize] += 1;
1250        buckets[t[i + 6] as usize] += 1;
1251        buckets[t[i + 7] as usize] += 1;
1252        i += 8;
1253    }
1254
1255    j += 7;
1256    while i < j {
1257        buckets[t[i] as usize] += 1;
1258        i += 1;
1259    }
1260}
1261
1262#[allow(dead_code)]
1263fn initialize_buckets_start_and_end_32s_6k(k: SaSint, buckets: &mut [SaSint]) {
1264    let k = k as usize;
1265    let mut sum = 0;
1266    for j in 0..k {
1267        let i = buckets_index4(j, 0);
1268        buckets[4 * k + j] = sum;
1269        sum += buckets[i] + buckets[i + 1] + buckets[i + 2] + buckets[i + 3];
1270        buckets[5 * k + j] = sum;
1271    }
1272}
1273
1274#[allow(dead_code)]
1275fn initialize_buckets_start_and_end_32s_4k(k: SaSint, buckets: &mut [SaSint]) {
1276    let k = k as usize;
1277    let mut sum = 0;
1278    for j in 0..k {
1279        let i = buckets_index2(j, 0);
1280        buckets[2 * k + j] = sum;
1281        sum += buckets[i] + buckets[i + 1];
1282        buckets[3 * k + j] = sum;
1283    }
1284}
1285
1286#[allow(dead_code)]
1287fn initialize_buckets_end_32s_2k(k: SaSint, buckets: &mut [SaSint]) {
1288    let mut sum0 = 0;
1289    for j in 0..k as usize {
1290        let i = buckets_index2(j, 0);
1291        sum0 += buckets[i] + buckets[i + 1];
1292        buckets[i] = sum0;
1293    }
1294}
1295
1296#[allow(dead_code)]
1297fn initialize_buckets_start_and_end_32s_2k(k: SaSint, buckets: &mut [SaSint]) {
1298    let k = k as usize;
1299    for j in 0..k {
1300        let i = buckets_index2(j, 0);
1301        buckets[j] = buckets[i];
1302    }
1303    buckets[k] = 0;
1304    buckets.copy_within(0..k - 1, k + 1);
1305}
1306
1307#[allow(dead_code)]
1308fn initialize_buckets_start_32s_1k(k: SaSint, buckets: &mut [SaSint]) {
1309    let mut sum = 0;
1310    for bucket in buckets.iter_mut().take(k as usize) {
1311        let tmp = *bucket;
1312        *bucket = sum;
1313        sum += tmp;
1314    }
1315}
1316
1317#[allow(dead_code)]
1318fn initialize_buckets_end_32s_1k(k: SaSint, buckets: &mut [SaSint]) {
1319    let mut sum = 0;
1320    for bucket in buckets.iter_mut().take(k as usize) {
1321        sum += *bucket;
1322        *bucket = sum;
1323    }
1324}
1325
1326#[allow(dead_code)]
1327fn initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
1328    t: &[SaSint],
1329    k: SaSint,
1330    buckets: &mut [SaSint],
1331    first_lms_suffix: SaSint,
1332) {
1333    buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 0)] += 1;
1334    buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 1)] -= 1;
1335
1336    let mut sum0 = 0;
1337    let mut sum1 = 0;
1338    for j in 0..k as usize {
1339        let i = buckets_index2(j, 0);
1340        sum0 += buckets[i] + buckets[i + 1];
1341        sum1 += buckets[i + 1];
1342        buckets[i] = sum0;
1343        buckets[i + 1] = sum1;
1344    }
1345}
1346
1347#[allow(dead_code)]
1348fn initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
1349    t: &[SaSint],
1350    k: SaSint,
1351    buckets: &mut [SaSint],
1352    mut first_lms_suffix: SaSint,
1353) -> SaSint {
1354    let mut f0 = 0usize;
1355    let mut c0 = t[first_lms_suffix as usize] as isize;
1356
1357    loop {
1358        first_lms_suffix -= 1;
1359        if first_lms_suffix < 0 {
1360            break;
1361        }
1362        let c1 = c0;
1363        c0 = t[first_lms_suffix as usize] as isize;
1364        let f1 = f0;
1365        f0 = usize::from(c0 > c1 - f1 as isize);
1366        buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] -= 1;
1367    }
1368    buckets[buckets_index4(c0 as usize, f0 + f0)] -= 1;
1369
1370    let mut sum = 0;
1371    for j in 0..k as usize {
1372        let i = buckets_index4(j, 0);
1373        sum += buckets[i + 1] + buckets[i + 3];
1374        buckets[4 * k as usize + j] = sum;
1375    }
1376    sum
1377}
1378
1379#[allow(dead_code)]
1380fn initialize_buckets_for_partial_sorting_32s_6k(
1381    t: &[SaSint],
1382    k: SaSint,
1383    buckets: &mut [SaSint],
1384    first_lms_suffix: SaSint,
1385    left_suffixes_count: SaSint,
1386) {
1387    let k = k as usize;
1388    let temp_offset = 4 * k;
1389    let first_symbol = t[first_lms_suffix as usize] as usize;
1390    let mut sum0 = left_suffixes_count + 1;
1391    let mut sum1 = 0;
1392    let mut sum2 = 0;
1393
1394    for j in 0..first_symbol {
1395        let i = buckets_index4(j, 0);
1396        let tj = buckets_index2(j, 0);
1397        let ss = buckets[i];
1398        let ls = buckets[i + 1];
1399        let sl = buckets[i + 2];
1400        let ll = buckets[i + 3];
1401
1402        buckets[i] = sum0;
1403        buckets[i + 1] = sum2;
1404        buckets[i + 2] = 0;
1405        buckets[i + 3] = 0;
1406
1407        sum0 += ss + sl;
1408        sum1 += ls;
1409        sum2 += ls + ll;
1410
1411        buckets[temp_offset + tj] = sum0;
1412        buckets[temp_offset + tj + 1] = sum1;
1413    }
1414
1415    sum1 += 1;
1416    for j in first_symbol..k {
1417        let i = buckets_index4(j, 0);
1418        let tj = buckets_index2(j, 0);
1419        let ss = buckets[i];
1420        let ls = buckets[i + 1];
1421        let sl = buckets[i + 2];
1422        let ll = buckets[i + 3];
1423
1424        buckets[i] = sum0;
1425        buckets[i + 1] = sum2;
1426        buckets[i + 2] = 0;
1427        buckets[i + 3] = 0;
1428
1429        sum0 += ss + sl;
1430        sum1 += ls;
1431        sum2 += ls + ll;
1432
1433        buckets[temp_offset + tj] = sum0;
1434        buckets[temp_offset + tj + 1] = sum1;
1435    }
1436}
1437
1438#[allow(dead_code)]
1439fn initialize_buckets_for_radix_and_partial_sorting_32s_4k(
1440    t: &[SaSint],
1441    k: SaSint,
1442    buckets: &mut [SaSint],
1443    first_lms_suffix: SaSint,
1444) {
1445    let k = k as usize;
1446    buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 0)] += 1;
1447    buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 1)] -= 1;
1448
1449    let mut sum0 = 0;
1450    let mut sum1 = 0;
1451    for j in 0..k {
1452        let i = buckets_index2(j, 0);
1453        buckets[2 * k + j] = sum1;
1454        sum0 += buckets[i + 1];
1455        sum1 += buckets[i] + buckets[i + 1];
1456        buckets[i + 1] = sum0;
1457        buckets[3 * k + j] = sum1;
1458    }
1459}
1460
1461#[allow(dead_code)]
1462fn count_and_gather_compacted_lms_suffixes_32s_2k_omp(
1463    t: &[SaSint],
1464    sa: &mut [SaSint],
1465    n: SaSint,
1466    k: SaSint,
1467    buckets: &mut [SaSint],
1468    local_buckets: SaSint,
1469    threads: SaSint,
1470    thread_state: &mut [ThreadState],
1471) {
1472    let free_space = if local_buckets != 0 {
1473        LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
1474    } else {
1475        buckets.len() as SaSint
1476    };
1477    let mut max_threads = (free_space / (((2 * k) + 15) & !15)).min(threads);
1478
1479    if local_buckets == 0 && max_threads > 1 && n >= 65_536 && n / k >= 2 {
1480        let thread_cap = n / (8 * k);
1481        if max_threads > thread_cap {
1482            max_threads = thread_cap;
1483        }
1484        count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
1485            t,
1486            sa,
1487            n,
1488            k,
1489            buckets,
1490            local_buckets,
1491            max_threads.max(2),
1492            thread_state,
1493        );
1494    } else {
1495        count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(t, sa, n, k, buckets, threads);
1496    }
1497}
1498
1499#[allow(dead_code)]
1500fn gather_lms_suffixes_16u(
1501    t: &[u16],
1502    sa: &mut [SaSint],
1503    n: SaSint,
1504    mut m: SaSint,
1505    omp_block_start: SaSint,
1506    omp_block_size: SaSint,
1507) {
1508    if omp_block_size > 0 {
1509        let n = n as isize;
1510        let mut i: isize;
1511        let mut j = (omp_block_start + omp_block_size) as isize;
1512        let mut c0 = t[(omp_block_start + omp_block_size - 1) as usize] as isize;
1513        let mut c1 = -1isize;
1514
1515        while j < n {
1516            c1 = t[j as usize] as isize;
1517            if c1 != c0 {
1518                break;
1519            }
1520            j += 1;
1521        }
1522
1523        let mut f0 = usize::from(c0 >= c1);
1524        let mut f1: usize;
1525
1526        i = (omp_block_start + omp_block_size - 2) as isize;
1527        j = (omp_block_start + 3) as isize;
1528        while i >= j {
1529            c1 = t[i as usize] as isize;
1530            f1 = usize::from(c1 > c0 - f0 as isize);
1531            sa[m as usize] = (i + 1) as SaSint;
1532            m -= (f1 & (1 - f0)) as SaSint;
1533
1534            c0 = t[(i - 1) as usize] as isize;
1535            f0 = usize::from(c0 > c1 - f1 as isize);
1536            sa[m as usize] = i as SaSint;
1537            m -= (f0 & (1 - f1)) as SaSint;
1538
1539            c1 = t[(i - 2) as usize] as isize;
1540            f1 = usize::from(c1 > c0 - f0 as isize);
1541            sa[m as usize] = (i - 1) as SaSint;
1542            m -= (f1 & (1 - f0)) as SaSint;
1543
1544            c0 = t[(i - 3) as usize] as isize;
1545            f0 = usize::from(c0 > c1 - f1 as isize);
1546            sa[m as usize] = (i - 2) as SaSint;
1547            m -= (f0 & (1 - f1)) as SaSint;
1548
1549            i -= 4;
1550        }
1551
1552        j -= 3;
1553        while i >= j {
1554            c1 = c0;
1555            c0 = t[i as usize] as isize;
1556            f1 = f0;
1557            f0 = usize::from(c0 > c1 - f1 as isize);
1558            sa[m as usize] = (i + 1) as SaSint;
1559            m -= (f0 & (1 - f1)) as SaSint;
1560            i -= 1;
1561        }
1562
1563        sa[m as usize] = (i + 1) as SaSint;
1564    }
1565}
1566
1567#[allow(dead_code)]
1568fn count_and_gather_lms_suffixes_16u(
1569    t: &[u16],
1570    sa: &mut [SaSint],
1571    n: SaSint,
1572    buckets: &mut [SaSint],
1573    omp_block_start: SaSint,
1574    omp_block_size: SaSint,
1575) -> SaSint {
1576    buckets[..4 * ALPHABET_SIZE].fill(0);
1577
1578    let mut m = (omp_block_start + omp_block_size - 1) as isize;
1579
1580    if omp_block_size > 0 {
1581        let n = n as isize;
1582        let mut i: isize;
1583        let mut j = m + 1;
1584        let mut c0 = t[m as usize] as isize;
1585        let mut c1 = -1isize;
1586
1587        while j < n {
1588            c1 = t[j as usize] as isize;
1589            if c1 != c0 {
1590                break;
1591            }
1592            j += 1;
1593        }
1594
1595        let mut f0 = usize::from(c0 >= c1);
1596        let mut f1: usize;
1597
1598        i = m - 1;
1599        j = (omp_block_start + 3) as isize;
1600        while i >= j {
1601            c1 = t[i as usize] as isize;
1602            f1 = usize::from(c1 > c0 - f0 as isize);
1603            sa[m as usize] = (i + 1) as SaSint;
1604            m -= (f1 & (1 - f0)) as isize;
1605            buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
1606
1607            c0 = t[(i - 1) as usize] as isize;
1608            f0 = usize::from(c0 > c1 - f1 as isize);
1609            sa[m as usize] = i as SaSint;
1610            m -= (f0 & (1 - f1)) as isize;
1611            buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
1612
1613            c1 = t[(i - 2) as usize] as isize;
1614            f1 = usize::from(c1 > c0 - f0 as isize);
1615            sa[m as usize] = (i - 1) as SaSint;
1616            m -= (f1 & (1 - f0)) as isize;
1617            buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
1618
1619            c0 = t[(i - 3) as usize] as isize;
1620            f0 = usize::from(c0 > c1 - f1 as isize);
1621            sa[m as usize] = (i - 2) as SaSint;
1622            m -= (f0 & (1 - f1)) as isize;
1623            buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
1624
1625            i -= 4;
1626        }
1627
1628        j -= 3;
1629        while i >= j {
1630            c1 = c0;
1631            c0 = t[i as usize] as isize;
1632            f1 = f0;
1633            f0 = usize::from(c0 > c1 - f1 as isize);
1634            sa[m as usize] = (i + 1) as SaSint;
1635            m -= (f0 & (1 - f1)) as isize;
1636            buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
1637            i -= 1;
1638        }
1639
1640        c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
1641        f1 = usize::from(c1 > c0 - f0 as isize);
1642        sa[m as usize] = (i + 1) as SaSint;
1643        m -= (f1 & (1 - f0)) as isize;
1644        buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
1645    }
1646
1647    omp_block_start + omp_block_size - 1 - m as SaSint
1648}
1649
1650#[allow(dead_code)]
1651fn gather_lms_suffixes_16u_omp(
1652    t: &[u16],
1653    sa: &mut [SaSint],
1654    n: SaSint,
1655    threads: SaSint,
1656    thread_state: &mut [ThreadState],
1657) {
1658    if threads == 1 || n < 65_536 || thread_state.is_empty() {
1659        gather_lms_suffixes_16u(t, sa, n, n - 1, 0, n);
1660        return;
1661    }
1662
1663    let thread_count = threads as usize;
1664    let block_stride = (n / threads) & !15;
1665    let mut suffix_counts_after = vec![0; thread_count];
1666    let mut m = 0;
1667    for thread in (0..thread_count).rev() {
1668        suffix_counts_after[thread] = m;
1669        m += thread_state[thread].m;
1670    }
1671
1672    for thread in 0..thread_count {
1673        let block_start = thread as SaSint * block_stride;
1674        let block_size = if thread + 1 < thread_count {
1675            block_stride
1676        } else {
1677            n - block_start
1678        };
1679        gather_lms_suffixes_16u(
1680            t,
1681            sa,
1682            n,
1683            n - 1 - suffix_counts_after[thread],
1684            block_start,
1685            block_size,
1686        );
1687    }
1688
1689    for thread in 0..thread_count {
1690        if thread_state[thread].m > 0 {
1691            sa[(n - 1 - suffix_counts_after[thread]) as usize] =
1692                thread_state[thread].last_lms_suffix;
1693        }
1694    }
1695}
1696
1697#[allow(dead_code)]
1698fn count_and_gather_lms_suffixes_16u_omp(
1699    t: &[u16],
1700    sa: &mut [SaSint],
1701    n: SaSint,
1702    buckets: &mut [SaSint],
1703    threads: SaSint,
1704    thread_state: &mut [ThreadState],
1705) -> SaSint {
1706    if threads == 1 || n < 65_536 || thread_state.is_empty() {
1707        return count_and_gather_lms_suffixes_16u(t, sa, n, buckets, 0, n);
1708    }
1709
1710    let thread_count = threads as usize;
1711    let block_stride = (n / threads) & !15;
1712
1713    for thread in 0..thread_count {
1714        let block_start = thread as SaSint * block_stride;
1715        let block_size = if thread + 1 < thread_count {
1716            block_stride
1717        } else {
1718            n - block_start
1719        };
1720        let count = count_and_gather_lms_suffixes_16u(
1721            t,
1722            sa,
1723            n,
1724            &mut thread_state[thread].buckets,
1725            block_start,
1726            block_size,
1727        );
1728        thread_state[thread].m = count;
1729        thread_state[thread].position = block_start + block_size;
1730        if count > 0 {
1731            thread_state[thread].last_lms_suffix = sa[(block_start + block_size - 1) as usize];
1732        }
1733    }
1734
1735    buckets[..4 * ALPHABET_SIZE].fill(0);
1736    let mut m = 0;
1737    for thread in (0..thread_count).rev() {
1738        let position = thread_state[thread].position;
1739        let count = thread_state[thread].m;
1740        m += count;
1741        if thread + 1 != thread_count && count > 0 {
1742            let src_end = position as usize;
1743            let src_start = src_end - count as usize;
1744            let dst_start = (n - m) as usize;
1745            sa.copy_within(src_start..src_end, dst_start);
1746        }
1747        for s in 0..4 * ALPHABET_SIZE {
1748            let a = buckets[s];
1749            let b = thread_state[thread].buckets[s];
1750            buckets[s] = a + b;
1751            thread_state[thread].buckets[s] = a;
1752        }
1753    }
1754
1755    m
1756}
1757
1758#[allow(dead_code)]
1759fn initialize_buckets_start_and_end_16u(
1760    buckets: &mut [SaSint],
1761    freq: Option<&mut [SaSint]>,
1762) -> SaSint {
1763    let (count_buckets, start_end) = buckets.split_at_mut(6 * ALPHABET_SIZE);
1764    let (bucket_start, bucket_end) = start_end.split_at_mut(ALPHABET_SIZE);
1765
1766    let mut k = -1;
1767    let mut sum = 0;
1768
1769    if let Some(freq) = freq {
1770        for j in 0..ALPHABET_SIZE {
1771            let i = buckets_index4(j, 0);
1772            let total = count_buckets[i]
1773                + count_buckets[i + buckets_index4(0, 1)]
1774                + count_buckets[i + buckets_index4(0, 2)]
1775                + count_buckets[i + buckets_index4(0, 3)];
1776
1777            bucket_start[j] = sum;
1778            sum += total;
1779            bucket_end[j] = sum;
1780            if total > 0 {
1781                k = j as SaSint;
1782            }
1783            freq[j] = total;
1784        }
1785    } else {
1786        for j in 0..ALPHABET_SIZE {
1787            let i = buckets_index4(j, 0);
1788            let total = count_buckets[i]
1789                + count_buckets[i + buckets_index4(0, 1)]
1790                + count_buckets[i + buckets_index4(0, 2)]
1791                + count_buckets[i + buckets_index4(0, 3)];
1792
1793            bucket_start[j] = sum;
1794            sum += total;
1795            bucket_end[j] = sum;
1796            if total > 0 {
1797                k = j as SaSint;
1798            }
1799        }
1800    }
1801
1802    k + 1
1803}
1804
1805#[allow(dead_code)]
1806fn initialize_buckets_for_lms_suffixes_radix_sort_16u(
1807    t: &[u16],
1808    buckets: &mut [SaSint],
1809    mut first_lms_suffix: SaSint,
1810) -> SaSint {
1811    let mut f0 = 0usize;
1812    let mut c0 = t[first_lms_suffix as usize] as isize;
1813
1814    loop {
1815        first_lms_suffix -= 1;
1816        if first_lms_suffix < 0 {
1817            break;
1818        }
1819
1820        let c1 = c0;
1821        c0 = t[first_lms_suffix as usize] as isize;
1822        let f1 = f0;
1823        f0 = usize::from(c0 > c1 - f1 as isize);
1824        buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] -= 1;
1825    }
1826
1827    buckets[buckets_index4(c0 as usize, f0 + f0)] -= 1;
1828
1829    let (count_buckets, temp_bucket) = buckets.split_at_mut(4 * ALPHABET_SIZE);
1830    let mut sum = 0;
1831    for c in 0..ALPHABET_SIZE {
1832        let i = buckets_index4(c, 0);
1833        let j = buckets_index2(c, 0);
1834        temp_bucket[j + buckets_index2(0, 1)] = sum;
1835        sum += count_buckets[i + buckets_index4(0, 1)] + count_buckets[i + buckets_index4(0, 3)];
1836        temp_bucket[j] = sum;
1837    }
1838
1839    sum
1840}
1841
1842#[allow(dead_code)]
1843fn radix_sort_lms_suffixes_16u(
1844    t: &[u16],
1845    sa: &mut [SaSint],
1846    induction_bucket: &mut [SaSint],
1847    omp_block_start: SaSint,
1848    omp_block_size: SaSint,
1849) {
1850    let mut i = omp_block_start + omp_block_size - 1;
1851    let mut j = omp_block_start + 64 + 3;
1852    while i >= j {
1853        let p0 = sa[i as usize];
1854        induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] -= 1;
1855        sa[induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] as usize] = p0;
1856
1857        let p1 = sa[(i - 1) as usize];
1858        induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] -= 1;
1859        sa[induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] as usize] = p1;
1860
1861        let p2 = sa[(i - 2) as usize];
1862        induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] -= 1;
1863        sa[induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] as usize] = p2;
1864
1865        let p3 = sa[(i - 3) as usize];
1866        induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] -= 1;
1867        sa[induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] as usize] = p3;
1868
1869        i -= 4;
1870    }
1871
1872    j -= 64 + 3;
1873    while i >= j {
1874        let p = sa[i as usize];
1875        induction_bucket[buckets_index2(t[p as usize] as usize, 0)] -= 1;
1876        sa[induction_bucket[buckets_index2(t[p as usize] as usize, 0)] as usize] = p;
1877        i -= 1;
1878    }
1879}
1880
1881#[allow(dead_code)]
1882fn radix_sort_lms_suffixes_16u_omp(
1883    t: &[u16],
1884    sa: &mut [SaSint],
1885    n: SaSint,
1886    m: SaSint,
1887    flags: SaSint,
1888    buckets: &mut [SaSint],
1889    threads: SaSint,
1890    thread_state: &mut [ThreadState],
1891) {
1892    if (flags & LIBSAIS_FLAGS_GSA) != 0 {
1893        buckets[4 * ALPHABET_SIZE] -= 1;
1894    }
1895    if threads == 1 || n < 65_536 || m < 65_536 || thread_state.is_empty() {
1896        radix_sort_lms_suffixes_16u(t, sa, &mut buckets[4 * ALPHABET_SIZE..], n - m + 1, m - 1);
1897        return;
1898    }
1899
1900    let thread_count = threads as usize;
1901    for thread in 0..thread_count {
1902        let (src_buckets, state_buckets) = (
1903            &buckets[4 * ALPHABET_SIZE..],
1904            &mut thread_state[thread].buckets,
1905        );
1906        for c in 0..ALPHABET_SIZE {
1907            let i = buckets_index2(c, 0);
1908            let j = buckets_index4(c, 1);
1909            state_buckets[i] = src_buckets[i] - state_buckets[j];
1910        }
1911
1912        let mut block_start = 0;
1913        let mut block_size = thread_state[thread].m;
1914        for idx in (thread..thread_count).rev() {
1915            block_start += thread_state[idx].m;
1916        }
1917
1918        if block_start == m && block_size > 0 {
1919            block_start -= 1;
1920            block_size -= 1;
1921        }
1922
1923        radix_sort_lms_suffixes_16u(
1924            t,
1925            sa,
1926            &mut thread_state[thread].buckets,
1927            n - block_start,
1928            block_size,
1929        );
1930    }
1931}
1932
1933#[allow(dead_code)]
1934fn radix_sort_lms_suffixes_32s_6k(
1935    t: &[SaSint],
1936    sa: &mut [SaSint],
1937    induction_bucket: &mut [SaSint],
1938    omp_block_start: SaSint,
1939    omp_block_size: SaSint,
1940) {
1941    let mut i = omp_block_start + omp_block_size - 1;
1942    let mut j = omp_block_start + 64 + 3;
1943    while i >= j {
1944        let p0 = sa[i as usize];
1945        induction_bucket[t[p0 as usize] as usize] -= 1;
1946        sa[induction_bucket[t[p0 as usize] as usize] as usize] = p0;
1947        let p1 = sa[(i - 1) as usize];
1948        induction_bucket[t[p1 as usize] as usize] -= 1;
1949        sa[induction_bucket[t[p1 as usize] as usize] as usize] = p1;
1950        let p2 = sa[(i - 2) as usize];
1951        induction_bucket[t[p2 as usize] as usize] -= 1;
1952        sa[induction_bucket[t[p2 as usize] as usize] as usize] = p2;
1953        let p3 = sa[(i - 3) as usize];
1954        induction_bucket[t[p3 as usize] as usize] -= 1;
1955        sa[induction_bucket[t[p3 as usize] as usize] as usize] = p3;
1956        i -= 4;
1957    }
1958
1959    j -= 64 + 3;
1960    while i >= j {
1961        let p = sa[i as usize];
1962        induction_bucket[t[p as usize] as usize] -= 1;
1963        sa[induction_bucket[t[p as usize] as usize] as usize] = p;
1964        i -= 1;
1965    }
1966}
1967
1968#[allow(dead_code)]
1969fn radix_sort_lms_suffixes_32s_2k(
1970    t: &[SaSint],
1971    sa: &mut [SaSint],
1972    induction_bucket: &mut [SaSint],
1973    omp_block_start: SaSint,
1974    omp_block_size: SaSint,
1975) {
1976    let mut i = omp_block_start + omp_block_size - 1;
1977    let mut j = omp_block_start + 64 + 3;
1978    while i >= j {
1979        let p0 = sa[i as usize];
1980        induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] -= 1;
1981        sa[induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] as usize] = p0;
1982        let p1 = sa[(i - 1) as usize];
1983        induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] -= 1;
1984        sa[induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] as usize] = p1;
1985        let p2 = sa[(i - 2) as usize];
1986        induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] -= 1;
1987        sa[induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] as usize] = p2;
1988        let p3 = sa[(i - 3) as usize];
1989        induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] -= 1;
1990        sa[induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] as usize] = p3;
1991        i -= 4;
1992    }
1993
1994    j -= 64 + 3;
1995    while i >= j {
1996        let p = sa[i as usize];
1997        induction_bucket[buckets_index2(t[p as usize] as usize, 0)] -= 1;
1998        sa[induction_bucket[buckets_index2(t[p as usize] as usize, 0)] as usize] = p;
1999        i -= 1;
2000    }
2001}
2002
2003#[allow(dead_code)]
2004fn radix_sort_lms_suffixes_32s_block_gather(
2005    t: &[SaSint],
2006    sa: &[SaSint],
2007    cache: &mut [ThreadCache],
2008    omp_block_start: SaSint,
2009    omp_block_size: SaSint,
2010) {
2011    if omp_block_size <= 0 {
2012        return;
2013    }
2014
2015    let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2016    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2017    let cache_base = if cache.len() >= start + size {
2018        0
2019    } else {
2020        start
2021    };
2022    let mut i = start;
2023    let mut j = if size > 67 { start + size - 67 } else { start };
2024
2025    while i < j {
2026        for current in [i, i + 1, i + 2, i + 3] {
2027            let ci = current - cache_base;
2028            let index = sa[current];
2029            cache[ci].index = index;
2030            cache[ci].symbol = t[index as usize];
2031        }
2032        i += 4;
2033    }
2034
2035    j = if size > 67 { j + 67 } else { start + size };
2036    while i < j {
2037        let ci = i - cache_base;
2038        let index = sa[i];
2039        cache[ci].index = index;
2040        cache[ci].symbol = t[index as usize];
2041        i += 1;
2042    }
2043}
2044
2045#[allow(dead_code)]
2046fn radix_sort_lms_suffixes_32s_6k_block_sort(
2047    induction_bucket: &mut [SaSint],
2048    cache: &mut [ThreadCache],
2049    omp_block_start: SaSint,
2050    omp_block_size: SaSint,
2051) {
2052    if omp_block_size <= 0 {
2053        return;
2054    }
2055
2056    let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2057    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2058    let cache_base = if cache.len() >= start + size {
2059        0
2060    } else {
2061        start
2062    };
2063    let mut i = start + size - 1;
2064    let mut j = start + 64 + 3;
2065
2066    while i >= j {
2067        for current in [i, i - 1, i - 2, i - 3] {
2068            let ci = current - cache_base;
2069            let v = cache[ci].symbol as usize;
2070            induction_bucket[v] -= 1;
2071            cache[ci].symbol = induction_bucket[v];
2072        }
2073        i -= 4;
2074    }
2075
2076    j -= 64 + 3;
2077    while i >= j {
2078        let ci = i - cache_base;
2079        let v = cache[ci].symbol as usize;
2080        induction_bucket[v] -= 1;
2081        cache[ci].symbol = induction_bucket[v];
2082        if i == 0 {
2083            break;
2084        }
2085        i -= 1;
2086    }
2087}
2088
2089#[allow(dead_code)]
2090fn radix_sort_lms_suffixes_32s_2k_block_sort(
2091    induction_bucket: &mut [SaSint],
2092    cache: &mut [ThreadCache],
2093    omp_block_start: SaSint,
2094    omp_block_size: SaSint,
2095) {
2096    if omp_block_size <= 0 {
2097        return;
2098    }
2099
2100    let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2101    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2102    let cache_base = if cache.len() >= start + size {
2103        0
2104    } else {
2105        start
2106    };
2107    let mut i = start + size - 1;
2108    let mut j = start + 64 + 3;
2109
2110    while i >= j {
2111        for current in [i, i - 1, i - 2, i - 3] {
2112            let ci = current - cache_base;
2113            let v = buckets_index2(cache[ci].symbol as usize, 0);
2114            induction_bucket[v] -= 1;
2115            cache[ci].symbol = induction_bucket[v];
2116        }
2117        i -= 4;
2118    }
2119
2120    j -= 64 + 3;
2121    while i >= j {
2122        let ci = i - cache_base;
2123        let v = buckets_index2(cache[ci].symbol as usize, 0);
2124        induction_bucket[v] -= 1;
2125        cache[ci].symbol = induction_bucket[v];
2126        if i == 0 {
2127            break;
2128        }
2129        i -= 1;
2130    }
2131}
2132
2133#[allow(dead_code)]
2134fn radix_sort_lms_suffixes_32s_6k_block_omp(
2135    t: &[SaSint],
2136    sa: &mut [SaSint],
2137    induction_bucket: &mut [SaSint],
2138    cache: &mut [ThreadCache],
2139    block_start: SaSint,
2140    block_size: SaSint,
2141    threads: SaSint,
2142) {
2143    if threads <= 1 || block_size < 16_384 {
2144        radix_sort_lms_suffixes_32s_6k(t, sa, induction_bucket, block_start, block_size);
2145        return;
2146    }
2147
2148    radix_sort_lms_suffixes_32s_block_gather(t, sa, cache, block_start, block_size);
2149    radix_sort_lms_suffixes_32s_6k_block_sort(induction_bucket, cache, block_start, block_size);
2150    place_cached_suffixes(sa, cache, block_start, block_size);
2151}
2152
2153#[allow(dead_code)]
2154fn radix_sort_lms_suffixes_32s_2k_block_omp(
2155    t: &[SaSint],
2156    sa: &mut [SaSint],
2157    induction_bucket: &mut [SaSint],
2158    cache: &mut [ThreadCache],
2159    block_start: SaSint,
2160    block_size: SaSint,
2161    threads: SaSint,
2162) {
2163    if threads <= 1 || block_size < 16_384 {
2164        radix_sort_lms_suffixes_32s_2k(t, sa, induction_bucket, block_start, block_size);
2165        return;
2166    }
2167
2168    radix_sort_lms_suffixes_32s_block_gather(t, sa, cache, block_start, block_size);
2169    radix_sort_lms_suffixes_32s_2k_block_sort(induction_bucket, cache, block_start, block_size);
2170    place_cached_suffixes(sa, cache, block_start, block_size);
2171}
2172
2173#[allow(dead_code)]
2174fn radix_sort_lms_suffixes_32s_6k_omp(
2175    t: &[SaSint],
2176    sa: &mut [SaSint],
2177    n: SaSint,
2178    m: SaSint,
2179    induction_bucket: &mut [SaSint],
2180    threads: SaSint,
2181) {
2182    if threads <= 1 || m < 65_536 {
2183        radix_sort_lms_suffixes_32s_6k(t, sa, induction_bucket, n - m + 1, m - 1);
2184        return;
2185    }
2186
2187    let threads_usize = usize::try_from(threads).expect("threads must be positive");
2188    let mut cache = vec![ThreadCache::default(); threads_usize * PER_THREAD_CACHE_SIZE];
2189    let mut block_start = 0usize;
2190    let m_usize = usize::try_from(m).expect("m must be non-negative");
2191    let n_usize = usize::try_from(n).expect("n must be non-negative");
2192    let last = m_usize - 1;
2193
2194    while block_start < last {
2195        let block_end = (block_start + threads_usize * PER_THREAD_CACHE_SIZE).min(last);
2196        radix_sort_lms_suffixes_32s_6k_block_omp(
2197            t,
2198            sa,
2199            induction_bucket,
2200            &mut cache,
2201            (n_usize - block_end) as SaSint,
2202            (block_end - block_start) as SaSint,
2203            threads,
2204        );
2205        block_start = block_end;
2206    }
2207}
2208
2209#[allow(dead_code)]
2210fn radix_sort_lms_suffixes_32s_2k_omp(
2211    t: &[SaSint],
2212    sa: &mut [SaSint],
2213    n: SaSint,
2214    m: SaSint,
2215    induction_bucket: &mut [SaSint],
2216    threads: SaSint,
2217) {
2218    if threads <= 1 || m < 65_536 {
2219        radix_sort_lms_suffixes_32s_2k(t, sa, induction_bucket, n - m + 1, m - 1);
2220        return;
2221    }
2222
2223    let threads_usize = usize::try_from(threads).expect("threads must be positive");
2224    let mut cache = vec![ThreadCache::default(); threads_usize * PER_THREAD_CACHE_SIZE];
2225    let mut block_start = 0usize;
2226    let m_usize = usize::try_from(m).expect("m must be non-negative");
2227    let n_usize = usize::try_from(n).expect("n must be non-negative");
2228    let last = m_usize - 1;
2229
2230    while block_start < last {
2231        let block_end = (block_start + threads_usize * PER_THREAD_CACHE_SIZE).min(last);
2232        radix_sort_lms_suffixes_32s_2k_block_omp(
2233            t,
2234            sa,
2235            induction_bucket,
2236            &mut cache,
2237            (n_usize - block_end) as SaSint,
2238            (block_end - block_start) as SaSint,
2239            threads,
2240        );
2241        block_start = block_end;
2242    }
2243}
2244
2245#[allow(dead_code)]
2246fn radix_sort_lms_suffixes_32s_1k(
2247    t: &[SaSint],
2248    sa: &mut [SaSint],
2249    n: SaSint,
2250    buckets: &mut [SaSint],
2251) -> SaSint {
2252    let mut i = n - 2;
2253    let mut m = 0;
2254    let mut f0 = 1usize;
2255    let mut f1: usize;
2256    let mut c0 = t[(n - 1) as usize] as isize;
2257    let mut c1: isize;
2258    let mut c2 = 0isize;
2259
2260    while i >= 64 + 3 {
2261        c1 = t[i as usize] as isize;
2262        f1 = usize::from(c1 > c0 - f0 as isize);
2263        if (f1 & !f0) != 0 {
2264            c2 = c0;
2265            buckets[c2 as usize] -= 1;
2266            sa[buckets[c2 as usize] as usize] = i + 1;
2267            m += 1;
2268        }
2269        c0 = t[(i - 1) as usize] as isize;
2270        f0 = usize::from(c0 > c1 - f1 as isize);
2271        if (f0 & !f1) != 0 {
2272            c2 = c1;
2273            buckets[c2 as usize] -= 1;
2274            sa[buckets[c2 as usize] as usize] = i;
2275            m += 1;
2276        }
2277        c1 = t[(i - 2) as usize] as isize;
2278        f1 = usize::from(c1 > c0 - f0 as isize);
2279        if (f1 & !f0) != 0 {
2280            c2 = c0;
2281            buckets[c2 as usize] -= 1;
2282            sa[buckets[c2 as usize] as usize] = i - 1;
2283            m += 1;
2284        }
2285        c0 = t[(i - 3) as usize] as isize;
2286        f0 = usize::from(c0 > c1 - f1 as isize);
2287        if (f0 & !f1) != 0 {
2288            c2 = c1;
2289            buckets[c2 as usize] -= 1;
2290            sa[buckets[c2 as usize] as usize] = i - 2;
2291            m += 1;
2292        }
2293        i -= 4;
2294    }
2295
2296    while i >= 0 {
2297        c1 = c0;
2298        c0 = t[i as usize] as isize;
2299        f1 = f0;
2300        f0 = usize::from(c0 > c1 - f1 as isize);
2301        if (f0 & !f1) != 0 {
2302            c2 = c1;
2303            buckets[c2 as usize] -= 1;
2304            sa[buckets[c2 as usize] as usize] = i + 1;
2305            m += 1;
2306        }
2307        i -= 1;
2308    }
2309
2310    if m > 1 {
2311        sa[buckets[c2 as usize] as usize] = 0;
2312    }
2313
2314    m
2315}
2316
2317#[allow(dead_code)]
2318fn radix_sort_set_markers_32s_6k(
2319    sa: &mut [SaSint],
2320    induction_bucket: &[SaSint],
2321    omp_block_start: SaSint,
2322    omp_block_size: SaSint,
2323) {
2324    let mut i = omp_block_start;
2325    let mut j = omp_block_start + omp_block_size - 64 - 3;
2326
2327    while i < j {
2328        sa[induction_bucket[i as usize] as usize] |= SAINT_MIN;
2329        sa[induction_bucket[(i + 1) as usize] as usize] |= SAINT_MIN;
2330        sa[induction_bucket[(i + 2) as usize] as usize] |= SAINT_MIN;
2331        sa[induction_bucket[(i + 3) as usize] as usize] |= SAINT_MIN;
2332        i += 4;
2333    }
2334
2335    j += 64 + 3;
2336    while i < j {
2337        sa[induction_bucket[i as usize] as usize] |= SAINT_MIN;
2338        i += 1;
2339    }
2340}
2341
2342#[allow(dead_code)]
2343fn radix_sort_set_markers_32s_4k(
2344    sa: &mut [SaSint],
2345    induction_bucket: &[SaSint],
2346    omp_block_start: SaSint,
2347    omp_block_size: SaSint,
2348) {
2349    let mut i = omp_block_start;
2350    let mut j = omp_block_start + omp_block_size - 64 - 3;
2351
2352    while i < j {
2353        sa[induction_bucket[buckets_index2(i as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2354        sa[induction_bucket[buckets_index2((i + 1) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2355        sa[induction_bucket[buckets_index2((i + 2) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2356        sa[induction_bucket[buckets_index2((i + 3) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2357        i += 4;
2358    }
2359
2360    j += 64 + 3;
2361    while i < j {
2362        sa[induction_bucket[buckets_index2(i as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2363        i += 1;
2364    }
2365}
2366
2367#[allow(dead_code)]
2368fn radix_sort_set_markers_32s_6k_omp(
2369    sa: &mut [SaSint],
2370    k: SaSint,
2371    induction_bucket: &[SaSint],
2372    threads: SaSint,
2373) {
2374    if k <= 1 {
2375        return;
2376    }
2377
2378    if threads <= 1 || k < 65_536 {
2379        radix_sort_set_markers_32s_6k(sa, induction_bucket, 0, k - 1);
2380        return;
2381    }
2382
2383    let threads_usize = usize::try_from(threads).expect("threads must be positive");
2384    let last = usize::try_from(k - 1).expect("k must be positive");
2385    let stride = (last / threads_usize) & !15usize;
2386    let mut start = 0usize;
2387
2388    for thread in 0..threads_usize {
2389        let end = if thread + 1 == threads_usize {
2390            last
2391        } else {
2392            start + stride
2393        };
2394        if end > start {
2395            radix_sort_set_markers_32s_6k(
2396                sa,
2397                induction_bucket,
2398                start as SaSint,
2399                (end - start) as SaSint,
2400            );
2401        }
2402        start = end;
2403    }
2404}
2405
2406#[allow(dead_code)]
2407fn radix_sort_set_markers_32s_4k_omp(
2408    sa: &mut [SaSint],
2409    k: SaSint,
2410    induction_bucket: &[SaSint],
2411    threads: SaSint,
2412) {
2413    if k <= 1 {
2414        return;
2415    }
2416
2417    if threads <= 1 || k < 65_536 {
2418        radix_sort_set_markers_32s_4k(sa, induction_bucket, 0, k - 1);
2419        return;
2420    }
2421
2422    let threads_usize = usize::try_from(threads).expect("threads must be positive");
2423    let last = usize::try_from(k - 1).expect("k must be positive");
2424    let stride = (last / threads_usize) & !15usize;
2425    let mut start = 0usize;
2426
2427    for thread in 0..threads_usize {
2428        let end = if thread + 1 == threads_usize {
2429            last
2430        } else {
2431            start + stride
2432        };
2433        if end > start {
2434            radix_sort_set_markers_32s_4k(
2435                sa,
2436                induction_bucket,
2437                start as SaSint,
2438                (end - start) as SaSint,
2439            );
2440        }
2441        start = end;
2442    }
2443}
2444
2445#[allow(dead_code)]
2446fn initialize_buckets_for_partial_sorting_16u(
2447    t: &[u16],
2448    buckets: &mut [SaSint],
2449    first_lms_suffix: SaSint,
2450    left_suffixes_count: SaSint,
2451) {
2452    buckets[buckets_index4(t[first_lms_suffix as usize] as usize, 1)] += 1;
2453
2454    let (front, temp_bucket) = buckets.split_at_mut(4 * ALPHABET_SIZE);
2455    let mut sum0 = left_suffixes_count + 1;
2456    let mut sum1 = 0;
2457
2458    for c in 0..ALPHABET_SIZE {
2459        let i = buckets_index4(c, 0);
2460        let j = buckets_index2(c, 0);
2461
2462        temp_bucket[j + buckets_index2(0, 0)] = sum0;
2463
2464        sum0 += front[i + buckets_index4(0, 0)] + front[i + buckets_index4(0, 2)];
2465        sum1 += front[i + buckets_index4(0, 1)];
2466
2467        front[j + buckets_index2(0, 0)] = sum0;
2468        front[j + buckets_index2(0, 1)] = sum1;
2469    }
2470}
2471
2472#[allow(dead_code)]
2473fn partial_sorting_shift_markers_32s_6k_omp(
2474    sa: &mut [SaSint],
2475    k: SaSint,
2476    buckets: &[SaSint],
2477    threads: SaSint,
2478) {
2479    let k_usize = usize::try_from(k).expect("k must be non-negative");
2480    let temp_bucket = &buckets[4 * k_usize..];
2481    let thread_count = if threads > 1 && k >= 65536 {
2482        usize::try_from(threads).expect("threads must be positive")
2483    } else {
2484        1
2485    };
2486    for t in 0..thread_count {
2487        let mut c = k_usize as isize - 1 - t as isize;
2488        while c >= 1 {
2489            let c_usize = c as usize;
2490            let mut i = buckets[buckets_index4(c_usize, 0)] - 1;
2491            let mut j = temp_bucket[buckets_index2(c_usize - 1, 0)] + 3;
2492            let mut s = SAINT_MIN;
2493
2494            while i >= j {
2495                let p0 = sa[i as usize];
2496                let q0 = (p0 & SAINT_MIN) ^ s;
2497                s ^= q0;
2498                sa[i as usize] = p0 ^ q0;
2499
2500                let p1 = sa[(i - 1) as usize];
2501                let q1 = (p1 & SAINT_MIN) ^ s;
2502                s ^= q1;
2503                sa[(i - 1) as usize] = p1 ^ q1;
2504
2505                let p2 = sa[(i - 2) as usize];
2506                let q2 = (p2 & SAINT_MIN) ^ s;
2507                s ^= q2;
2508                sa[(i - 2) as usize] = p2 ^ q2;
2509
2510                let p3 = sa[(i - 3) as usize];
2511                let q3 = (p3 & SAINT_MIN) ^ s;
2512                s ^= q3;
2513                sa[(i - 3) as usize] = p3 ^ q3;
2514
2515                i -= 4;
2516            }
2517
2518            j -= 3;
2519            while i >= j {
2520                let p = sa[i as usize];
2521                let q = (p & SAINT_MIN) ^ s;
2522                s ^= q;
2523                sa[i as usize] = p ^ q;
2524                i -= 1;
2525            }
2526
2527            c -= thread_count as isize;
2528        }
2529    }
2530}
2531
2532#[allow(dead_code)]
2533fn partial_sorting_shift_markers_32s_4k(sa: &mut [SaSint], n: SaSint) {
2534    let mut i = n - 1;
2535    let mut s = SUFFIX_GROUP_MARKER;
2536
2537    while i >= 3 {
2538        let p0 = sa[i as usize];
2539        let q0 =
2540            ((p0 & SUFFIX_GROUP_MARKER) ^ s) & (((p0 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2541        s ^= q0;
2542        sa[i as usize] = p0 ^ q0;
2543
2544        let p1 = sa[(i - 1) as usize];
2545        let q1 =
2546            ((p1 & SUFFIX_GROUP_MARKER) ^ s) & (((p1 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2547        s ^= q1;
2548        sa[(i - 1) as usize] = p1 ^ q1;
2549
2550        let p2 = sa[(i - 2) as usize];
2551        let q2 =
2552            ((p2 & SUFFIX_GROUP_MARKER) ^ s) & (((p2 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2553        s ^= q2;
2554        sa[(i - 2) as usize] = p2 ^ q2;
2555
2556        let p3 = sa[(i - 3) as usize];
2557        let q3 =
2558            ((p3 & SUFFIX_GROUP_MARKER) ^ s) & (((p3 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2559        s ^= q3;
2560        sa[(i - 3) as usize] = p3 ^ q3;
2561
2562        i -= 4;
2563    }
2564
2565    while i >= 0 {
2566        let p = sa[i as usize];
2567        let q = ((p & SUFFIX_GROUP_MARKER) ^ s) & (((p > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2568        s ^= q;
2569        sa[i as usize] = p ^ q;
2570        i -= 1;
2571    }
2572}
2573
2574#[allow(dead_code)]
2575fn partial_sorting_shift_buckets_32s_6k(k: SaSint, buckets: &mut [SaSint]) {
2576    let temp_offset = 4 * k as usize;
2577    let mut i = buckets_index2(0, 0);
2578
2579    while i <= buckets_index2(k as usize - 1, 0) {
2580        buckets[2 * i + buckets_index4(0, 0)] = buckets[temp_offset + i + buckets_index2(0, 0)];
2581        buckets[2 * i + buckets_index4(0, 1)] = buckets[temp_offset + i + buckets_index2(0, 1)];
2582        i += buckets_index2(1, 0);
2583    }
2584}
2585
2586#[allow(dead_code)]
2587fn partial_sorting_scan_left_to_right_16u(
2588    t: &[u16],
2589    sa: &mut [SaSint],
2590    buckets: &mut [SaSint],
2591    mut d: SaSint,
2592    omp_block_start: SaSint,
2593    omp_block_size: SaSint,
2594) -> SaSint {
2595    let mut i = omp_block_start as isize;
2596    let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
2597    while i < j {
2598        let mut p0 = sa[i as usize];
2599        d += SaSint::from(p0 < 0);
2600        p0 &= SAINT_MAX;
2601        let v0 = buckets_index2(
2602            t[(p0 - 1) as usize] as usize,
2603            usize::from(t[(p0 - 2) as usize] >= t[(p0 - 1) as usize]),
2604        );
2605        let mark0 = if buckets[2 * ALPHABET_SIZE + v0] != d {
2606            SAINT_MIN
2607        } else {
2608            0
2609        };
2610        let dst0 = buckets[4 * ALPHABET_SIZE + v0] as usize;
2611        sa[dst0] = (p0 - 1) | mark0;
2612        buckets[4 * ALPHABET_SIZE + v0] += 1;
2613        buckets[2 * ALPHABET_SIZE + v0] = d;
2614
2615        let mut p1 = sa[(i + 1) as usize];
2616        d += SaSint::from(p1 < 0);
2617        p1 &= SAINT_MAX;
2618        let v1 = buckets_index2(
2619            t[(p1 - 1) as usize] as usize,
2620            usize::from(t[(p1 - 2) as usize] >= t[(p1 - 1) as usize]),
2621        );
2622        let mark1 = if buckets[2 * ALPHABET_SIZE + v1] != d {
2623            SAINT_MIN
2624        } else {
2625            0
2626        };
2627        let dst1 = buckets[4 * ALPHABET_SIZE + v1] as usize;
2628        sa[dst1] = (p1 - 1) | mark1;
2629        buckets[4 * ALPHABET_SIZE + v1] += 1;
2630        buckets[2 * ALPHABET_SIZE + v1] = d;
2631
2632        i += 2;
2633    }
2634
2635    j += 64 + 1;
2636    while i < j {
2637        let mut p = sa[i as usize];
2638        d += SaSint::from(p < 0);
2639        p &= SAINT_MAX;
2640        let v = buckets_index2(
2641            t[(p - 1) as usize] as usize,
2642            usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2643        );
2644        let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2645            SAINT_MIN
2646        } else {
2647            0
2648        };
2649        let dst = buckets[4 * ALPHABET_SIZE + v] as usize;
2650        sa[dst] = (p - 1) | mark;
2651        buckets[4 * ALPHABET_SIZE + v] += 1;
2652        buckets[2 * ALPHABET_SIZE + v] = d;
2653        i += 1;
2654    }
2655
2656    d
2657}
2658
2659#[allow(dead_code)]
2660fn partial_sorting_scan_left_to_right_16u_block_prepare(
2661    t: &[u16],
2662    sa: &mut [SaSint],
2663    k: SaSint,
2664    buckets: &mut [SaSint],
2665    cache: &mut [ThreadCache],
2666    omp_block_start: SaSint,
2667    omp_block_size: SaSint,
2668    state: &mut ThreadState,
2669) -> SaSint {
2670    let width = 2 * k as usize;
2671    buckets[..width].fill(0);
2672    buckets[2 * ALPHABET_SIZE..2 * ALPHABET_SIZE + width].fill(0);
2673
2674    let mut count = 0usize;
2675    let mut d = 1;
2676    for i in omp_block_start as usize..(omp_block_start + omp_block_size) as usize {
2677        let mut p = sa[i];
2678        cache[count].index = p;
2679        d += SaSint::from(p < 0);
2680        p &= SAINT_MAX;
2681        let v = buckets_index2(
2682            t[(p - 1) as usize] as usize,
2683            usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2684        );
2685        cache[count].symbol = v as SaSint;
2686        buckets[v] += 1;
2687        buckets[2 * ALPHABET_SIZE + v] = d;
2688        count += 1;
2689    }
2690    state.cache_entries = count;
2691    d - 1
2692}
2693
2694#[allow(dead_code)]
2695fn partial_sorting_scan_left_to_right_16u_block_place(
2696    sa: &mut [SaSint],
2697    buckets: &mut [SaSint],
2698    cache: &[ThreadCache],
2699    count: SaSint,
2700    mut d: SaSint,
2701) {
2702    for entry in cache.iter().take(count as usize) {
2703        let mut p = entry.index;
2704        d += SaSint::from(p < 0);
2705        p &= SAINT_MAX;
2706        let v = entry.symbol as usize;
2707        let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2708            SAINT_MIN
2709        } else {
2710            0
2711        };
2712        let dst = buckets[v] as usize;
2713        sa[dst] = (p - 1) | mark;
2714        buckets[v] += 1;
2715        buckets[2 * ALPHABET_SIZE + v] = d;
2716    }
2717}
2718
2719#[allow(dead_code)]
2720fn partial_sorting_scan_left_to_right_16u_block_omp(
2721    t: &[u16],
2722    sa: &mut [SaSint],
2723    k: SaSint,
2724    buckets: &mut [SaSint],
2725    d: SaSint,
2726    block_start: SaSint,
2727    block_size: SaSint,
2728    threads: SaSint,
2729    thread_state: &mut [ThreadState],
2730) -> SaSint {
2731    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
2732        usize::try_from(threads)
2733            .expect("threads must be non-negative")
2734            .min(thread_state.len())
2735    } else {
2736        1
2737    };
2738    if thread_count <= 1 {
2739        return partial_sorting_scan_left_to_right_16u(t, sa, buckets, d, block_start, block_size);
2740    }
2741
2742    let bucket_width = 2 * k as usize;
2743    let block_stride = (block_size / thread_count as SaSint) & !15;
2744
2745    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
2746        let local_start = thread as SaSint * block_stride;
2747        let local_size = if thread + 1 < thread_count {
2748            block_stride
2749        } else {
2750            block_size - local_start
2751        };
2752        let mut local_state = ThreadState::default();
2753        state.position = partial_sorting_scan_left_to_right_16u_block_prepare(
2754            t,
2755            sa,
2756            k,
2757            &mut state.buckets,
2758            &mut state.cache,
2759            block_start + local_start,
2760            local_size,
2761            &mut local_state,
2762        );
2763        state.count = local_state.cache_entries as SaSint;
2764    }
2765
2766    let mut next_d = d;
2767    for state in thread_state.iter_mut().take(thread_count) {
2768        for c in 0..bucket_width {
2769            let a = buckets[4 * ALPHABET_SIZE + c];
2770            let b = state.buckets[c];
2771            buckets[4 * ALPHABET_SIZE + c] = a + b;
2772            state.buckets[c] = a;
2773        }
2774
2775        next_d -= 1;
2776        for c in 0..bucket_width {
2777            let a = buckets[2 * ALPHABET_SIZE + c];
2778            let b = state.buckets[2 * ALPHABET_SIZE + c];
2779            let shifted = b + next_d;
2780            buckets[2 * ALPHABET_SIZE + c] = if b > 0 { shifted } else { a };
2781            state.buckets[2 * ALPHABET_SIZE + c] = a;
2782        }
2783        next_d += 1 + state.position;
2784        state.position = next_d - state.position;
2785    }
2786
2787    for state in thread_state.iter_mut().take(thread_count) {
2788        partial_sorting_scan_left_to_right_16u_block_place(
2789            sa,
2790            &mut state.buckets,
2791            &state.cache,
2792            state.count,
2793            state.position,
2794        );
2795    }
2796
2797    next_d
2798}
2799
2800#[allow(dead_code)]
2801fn partial_sorting_scan_left_to_right_16u_omp(
2802    t: &[u16],
2803    sa: &mut [SaSint],
2804    n: SaSint,
2805    k: SaSint,
2806    buckets: &mut [SaSint],
2807    left_suffixes_count: SaSint,
2808    mut d: SaSint,
2809    threads: SaSint,
2810) -> SaSint {
2811    let v = buckets_index2(
2812        t[(n - 1) as usize] as usize,
2813        usize::from(t[(n - 2) as usize] >= t[(n - 1) as usize]),
2814    );
2815    let dst = buckets[4 * ALPHABET_SIZE + v] as usize;
2816    buckets[4 * ALPHABET_SIZE + v] += 1;
2817    sa[dst] = (n - 1) | SAINT_MIN;
2818    d += 1;
2819    buckets[2 * ALPHABET_SIZE + v] = d;
2820
2821    if threads == 1 || left_suffixes_count < 65536 {
2822        d = partial_sorting_scan_left_to_right_16u(t, sa, buckets, d, 0, left_suffixes_count);
2823    } else {
2824        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
2825        let mut block_start = 0;
2826        while block_start < left_suffixes_count {
2827            if sa[block_start as usize] == 0 {
2828                block_start += 1;
2829            } else {
2830                let mut block_end =
2831                    block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
2832                if block_end > left_suffixes_count {
2833                    block_end = left_suffixes_count;
2834                }
2835                let mut block_scan_end = block_start + 1;
2836                while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
2837                    block_scan_end += 1;
2838                }
2839                let block_size = block_scan_end - block_start;
2840
2841                if block_size < 32 {
2842                    while block_start < block_scan_end {
2843                        let mut p = sa[block_start as usize];
2844                        d += SaSint::from(p < 0);
2845                        p &= SAINT_MAX;
2846                        let v = buckets_index2(
2847                            t[(p - 1) as usize] as usize,
2848                            usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2849                        );
2850                        let dst = buckets[4 * ALPHABET_SIZE + v] as usize;
2851                        buckets[4 * ALPHABET_SIZE + v] += 1;
2852                        let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2853                            SAINT_MIN
2854                        } else {
2855                            0
2856                        };
2857                        sa[dst] = (p - 1) | mark;
2858                        buckets[2 * ALPHABET_SIZE + v] = d;
2859                        block_start += 1;
2860                    }
2861                } else {
2862                    d = partial_sorting_scan_left_to_right_16u_block_omp(
2863                        t,
2864                        sa,
2865                        k,
2866                        buckets,
2867                        d,
2868                        block_start,
2869                        block_size,
2870                        threads,
2871                        &mut thread_state,
2872                    );
2873                    block_start = block_scan_end;
2874                }
2875            }
2876        }
2877    }
2878    d
2879}
2880
2881#[allow(dead_code)]
2882fn partial_sorting_scan_right_to_left_16u(
2883    t: &[u16],
2884    sa: &mut [SaSint],
2885    buckets: &mut [SaSint],
2886    mut d: SaSint,
2887    omp_block_start: SaSint,
2888    omp_block_size: SaSint,
2889) -> SaSint {
2890    let mut i = (omp_block_start + omp_block_size - 1) as isize;
2891    let mut j = (omp_block_start + 64 + 1) as isize;
2892    while i >= j {
2893        let mut p0 = sa[i as usize];
2894        d += SaSint::from(p0 < 0);
2895        p0 &= SAINT_MAX;
2896        let v0 = buckets_index2(
2897            t[(p0 - 1) as usize] as usize,
2898            usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
2899        );
2900        let mark0 = if buckets[2 * ALPHABET_SIZE + v0] != d {
2901            SAINT_MIN
2902        } else {
2903            0
2904        };
2905        buckets[v0] -= 1;
2906        sa[buckets[v0] as usize] = (p0 - 1) | mark0;
2907        buckets[2 * ALPHABET_SIZE + v0] = d;
2908
2909        let mut p1 = sa[(i - 1) as usize];
2910        d += SaSint::from(p1 < 0);
2911        p1 &= SAINT_MAX;
2912        let v1 = buckets_index2(
2913            t[(p1 - 1) as usize] as usize,
2914            usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
2915        );
2916        let mark1 = if buckets[2 * ALPHABET_SIZE + v1] != d {
2917            SAINT_MIN
2918        } else {
2919            0
2920        };
2921        buckets[v1] -= 1;
2922        sa[buckets[v1] as usize] = (p1 - 1) | mark1;
2923        buckets[2 * ALPHABET_SIZE + v1] = d;
2924
2925        i -= 2;
2926    }
2927
2928    j -= 64 + 1;
2929    while i >= j {
2930        let mut p = sa[i as usize];
2931        d += SaSint::from(p < 0);
2932        p &= SAINT_MAX;
2933        let v = buckets_index2(
2934            t[(p - 1) as usize] as usize,
2935            usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
2936        );
2937        let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2938            SAINT_MIN
2939        } else {
2940            0
2941        };
2942        buckets[v] -= 1;
2943        sa[buckets[v] as usize] = (p - 1) | mark;
2944        buckets[2 * ALPHABET_SIZE + v] = d;
2945        i -= 1;
2946    }
2947
2948    d
2949}
2950
2951#[allow(dead_code)]
2952fn partial_sorting_scan_right_to_left_16u_block_prepare(
2953    t: &[u16],
2954    sa: &mut [SaSint],
2955    k: SaSint,
2956    buckets: &mut [SaSint],
2957    cache: &mut [ThreadCache],
2958    omp_block_start: SaSint,
2959    omp_block_size: SaSint,
2960    state: &mut ThreadState,
2961) -> SaSint {
2962    let width = 2 * k as usize;
2963    buckets[..width].fill(0);
2964    buckets[2 * ALPHABET_SIZE..2 * ALPHABET_SIZE + width].fill(0);
2965
2966    let mut count = 0usize;
2967    let mut d = 1;
2968    for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
2969        let mut p = sa[i];
2970        cache[count].index = p;
2971        d += SaSint::from(p < 0);
2972        p &= SAINT_MAX;
2973        let v = buckets_index2(
2974            t[(p - 1) as usize] as usize,
2975            usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
2976        );
2977        cache[count].symbol = v as SaSint;
2978        buckets[v] += 1;
2979        buckets[2 * ALPHABET_SIZE + v] = d;
2980        count += 1;
2981    }
2982    state.cache_entries = count;
2983    d - 1
2984}
2985
2986#[allow(dead_code)]
2987fn partial_sorting_scan_right_to_left_16u_block_place(
2988    sa: &mut [SaSint],
2989    buckets: &mut [SaSint],
2990    cache: &[ThreadCache],
2991    count: SaSint,
2992    mut d: SaSint,
2993) {
2994    for entry in cache.iter().take(count as usize) {
2995        let mut p = entry.index;
2996        d += SaSint::from(p < 0);
2997        p &= SAINT_MAX;
2998        let v = entry.symbol as usize;
2999        let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
3000            SAINT_MIN
3001        } else {
3002            0
3003        };
3004        buckets[v] -= 1;
3005        sa[buckets[v] as usize] = (p - 1) | mark;
3006        buckets[2 * ALPHABET_SIZE + v] = d;
3007    }
3008}
3009
3010#[allow(dead_code)]
3011fn partial_gsa_scan_right_to_left_16u_block_place(
3012    sa: &mut [SaSint],
3013    buckets: &mut [SaSint],
3014    cache: &[ThreadCache],
3015    count: SaSint,
3016    mut d: SaSint,
3017) {
3018    for entry in cache.iter().take(count as usize) {
3019        let mut p = entry.index;
3020        d += SaSint::from(p < 0);
3021        p &= SAINT_MAX;
3022        let v = entry.symbol as usize;
3023        if v != 1 {
3024            let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
3025                SAINT_MIN
3026            } else {
3027                0
3028            };
3029            buckets[v] -= 1;
3030            sa[buckets[v] as usize] = (p - 1) | mark;
3031            buckets[2 * ALPHABET_SIZE + v] = d;
3032        }
3033    }
3034}
3035
3036#[allow(dead_code)]
3037fn partial_sorting_scan_right_to_left_16u_block_omp(
3038    t: &[u16],
3039    sa: &mut [SaSint],
3040    k: SaSint,
3041    buckets: &mut [SaSint],
3042    d: SaSint,
3043    block_start: SaSint,
3044    block_size: SaSint,
3045    threads: SaSint,
3046    thread_state: &mut [ThreadState],
3047) -> SaSint {
3048    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
3049        usize::try_from(threads)
3050            .expect("threads must be non-negative")
3051            .min(thread_state.len())
3052    } else {
3053        1
3054    };
3055    if thread_count <= 1 {
3056        return partial_sorting_scan_right_to_left_16u(t, sa, buckets, d, block_start, block_size);
3057    }
3058
3059    let width = 2 * k as usize;
3060    let distinct_offset = 2 * ALPHABET_SIZE;
3061    let block_stride = (block_size / thread_count as SaSint) & !15;
3062
3063    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
3064        let local_start = thread as SaSint * block_stride;
3065        let local_size = if thread + 1 < thread_count {
3066            block_stride
3067        } else {
3068            block_size - local_start
3069        };
3070        let mut local_state = ThreadState::default();
3071        state.position = partial_sorting_scan_right_to_left_16u_block_prepare(
3072            t,
3073            sa,
3074            k,
3075            &mut state.buckets,
3076            &mut state.cache,
3077            block_start + local_start,
3078            local_size,
3079            &mut local_state,
3080        );
3081        state.count = local_state.cache_entries as SaSint;
3082    }
3083
3084    let mut next_d = d;
3085    for state in thread_state.iter_mut().take(thread_count).rev() {
3086        for c in 0..width {
3087            let a = buckets[c];
3088            let b = state.buckets[c];
3089            buckets[c] = a - b;
3090            state.buckets[c] = a;
3091        }
3092
3093        next_d -= 1;
3094        for c in 0..width {
3095            let offset = distinct_offset + c;
3096            let a = buckets[offset];
3097            let b = state.buckets[offset];
3098            let shifted = b + next_d;
3099            buckets[offset] = if b > 0 { shifted } else { a };
3100            state.buckets[offset] = a;
3101        }
3102        next_d += 1 + state.position;
3103        state.position = next_d - state.position;
3104    }
3105
3106    for state in thread_state.iter_mut().take(thread_count) {
3107        partial_sorting_scan_right_to_left_16u_block_place(
3108            sa,
3109            &mut state.buckets,
3110            &state.cache,
3111            state.count,
3112            state.position,
3113        );
3114    }
3115
3116    next_d
3117}
3118
3119#[allow(dead_code)]
3120fn partial_sorting_scan_right_to_left_16u_omp(
3121    t: &[u16],
3122    sa: &mut [SaSint],
3123    n: SaSint,
3124    k: SaSint,
3125    buckets: &mut [SaSint],
3126    first_lms_suffix: SaSint,
3127    left_suffixes_count: SaSint,
3128    d: SaSint,
3129    threads: SaSint,
3130) {
3131    let scan_start = left_suffixes_count + 1;
3132    let scan_end = n - first_lms_suffix;
3133
3134    if threads == 1 || scan_end - scan_start < 65536 {
3135        partial_sorting_scan_right_to_left_16u(
3136            t,
3137            sa,
3138            buckets,
3139            d,
3140            scan_start,
3141            scan_end - scan_start,
3142        );
3143    } else {
3144        let mut d = d;
3145        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
3146        let mut block_start = scan_end - 1;
3147        while block_start >= scan_start {
3148            if sa[block_start as usize] == 0 {
3149                block_start -= 1;
3150            } else {
3151                let block_limit = threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
3152                let mut block_max_end = block_start - block_limit;
3153                if block_max_end < scan_start {
3154                    block_max_end = scan_start - 1;
3155                }
3156                let mut block_end = block_start - 1;
3157                while block_end > block_max_end && sa[block_end as usize] != 0 {
3158                    block_end -= 1;
3159                }
3160                let block_size = block_start - block_end;
3161
3162                if block_size < 32 {
3163                    while block_start > block_end {
3164                        let mut p = sa[block_start as usize];
3165                        d += SaSint::from(p < 0);
3166                        p &= SAINT_MAX;
3167                        let v = buckets_index2(
3168                            t[(p - 1) as usize] as usize,
3169                            usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
3170                        );
3171                        let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
3172                            SAINT_MIN
3173                        } else {
3174                            0
3175                        };
3176                        buckets[v] -= 1;
3177                        sa[buckets[v] as usize] = (p - 1) | mark;
3178                        buckets[2 * ALPHABET_SIZE + v] = d;
3179                        block_start -= 1;
3180                    }
3181                } else {
3182                    d = partial_sorting_scan_right_to_left_16u_block_omp(
3183                        t,
3184                        sa,
3185                        k,
3186                        buckets,
3187                        d,
3188                        block_end + 1,
3189                        block_size,
3190                        threads,
3191                        &mut thread_state,
3192                    );
3193                    block_start = block_end;
3194                }
3195            }
3196        }
3197    }
3198}
3199
3200#[allow(dead_code)]
3201fn partial_sorting_scan_left_to_right_32s_6k(
3202    t: &[SaSint],
3203    sa: &mut [SaSint],
3204    buckets: &mut [SaSint],
3205    mut d: SaSint,
3206    omp_block_start: SaSint,
3207    omp_block_size: SaSint,
3208) -> SaSint {
3209    let mut i = omp_block_start;
3210    let mut j = omp_block_start + omp_block_size - 2 * 64 - 1;
3211
3212    while i < j {
3213        let mut p2 = sa[i as usize];
3214        d += SaSint::from(p2 < 0);
3215        p2 &= SAINT_MAX;
3216        let v2 = buckets_index4(
3217            t[(p2 - 1) as usize] as usize,
3218            usize::from(t[(p2 - 2) as usize] >= t[(p2 - 1) as usize]),
3219        );
3220        let pos2 = buckets[v2] as usize;
3221        buckets[v2] += 1;
3222        sa[pos2] = (p2 - 1) | (((buckets[2 + v2] != d) as SaSint) << (SAINT_BIT - 1));
3223        buckets[2 + v2] = d;
3224
3225        let mut p3 = sa[(i + 1) as usize];
3226        d += SaSint::from(p3 < 0);
3227        p3 &= SAINT_MAX;
3228        let v3 = buckets_index4(
3229            t[(p3 - 1) as usize] as usize,
3230            usize::from(t[(p3 - 2) as usize] >= t[(p3 - 1) as usize]),
3231        );
3232        let pos3 = buckets[v3] as usize;
3233        buckets[v3] += 1;
3234        sa[pos3] = (p3 - 1) | (((buckets[2 + v3] != d) as SaSint) << (SAINT_BIT - 1));
3235        buckets[2 + v3] = d;
3236
3237        i += 2;
3238    }
3239
3240    j += 2 * 64 + 1;
3241    while i < j {
3242        let mut p = sa[i as usize];
3243        d += SaSint::from(p < 0);
3244        p &= SAINT_MAX;
3245        let v = buckets_index4(
3246            t[(p - 1) as usize] as usize,
3247            usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
3248        );
3249        let pos = buckets[v] as usize;
3250        buckets[v] += 1;
3251        sa[pos] = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
3252        buckets[2 + v] = d;
3253        i += 1;
3254    }
3255
3256    d
3257}
3258
3259#[allow(dead_code)]
3260fn partial_sorting_scan_left_to_right_32s_4k(
3261    t: &[SaSint],
3262    sa: &mut [SaSint],
3263    k: SaSint,
3264    buckets: &mut [SaSint],
3265    mut d: SaSint,
3266    omp_block_start: SaSint,
3267    omp_block_size: SaSint,
3268) -> SaSint {
3269    let k = k as usize;
3270    let mut i = omp_block_start;
3271    let mut j = omp_block_start + omp_block_size - 2 * 64 - 1;
3272
3273    while i < j {
3274        let mut p0 = sa[i as usize];
3275        sa[i as usize] = p0 & SAINT_MAX;
3276        if p0 > 0 {
3277            sa[i as usize] = 0;
3278            d += p0 >> (SUFFIX_GROUP_BIT - 1);
3279            p0 &= !SUFFIX_GROUP_MARKER;
3280            let v0 = buckets_index2(
3281                t[(p0 - 1) as usize] as usize,
3282                usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]),
3283            );
3284            let c0 = t[(p0 - 1) as usize] as usize;
3285            let pos0 = buckets[2 * k + c0] as usize;
3286            buckets[2 * k + c0] += 1;
3287            sa[pos0] = (p0 - 1)
3288                | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3289                    << (SAINT_BIT - 1))
3290                | (((buckets[v0] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3291            buckets[v0] = d;
3292        }
3293
3294        let mut p1 = sa[(i + 1) as usize];
3295        sa[(i + 1) as usize] = p1 & SAINT_MAX;
3296        if p1 > 0 {
3297            sa[(i + 1) as usize] = 0;
3298            d += p1 >> (SUFFIX_GROUP_BIT - 1);
3299            p1 &= !SUFFIX_GROUP_MARKER;
3300            let v1 = buckets_index2(
3301                t[(p1 - 1) as usize] as usize,
3302                usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]),
3303            );
3304            let c1 = t[(p1 - 1) as usize] as usize;
3305            let pos1 = buckets[2 * k + c1] as usize;
3306            buckets[2 * k + c1] += 1;
3307            sa[pos1] = (p1 - 1)
3308                | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3309                    << (SAINT_BIT - 1))
3310                | (((buckets[v1] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3311            buckets[v1] = d;
3312        }
3313
3314        i += 2;
3315    }
3316
3317    j += 2 * 64 + 1;
3318    while i < j {
3319        let mut p = sa[i as usize];
3320        sa[i as usize] = p & SAINT_MAX;
3321        if p > 0 {
3322            sa[i as usize] = 0;
3323            d += p >> (SUFFIX_GROUP_BIT - 1);
3324            p &= !SUFFIX_GROUP_MARKER;
3325            let v = buckets_index2(
3326                t[(p - 1) as usize] as usize,
3327                usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]),
3328            );
3329            let c = t[(p - 1) as usize] as usize;
3330            let pos = buckets[2 * k + c] as usize;
3331            buckets[2 * k + c] += 1;
3332            sa[pos] = (p - 1)
3333                | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
3334                    << (SAINT_BIT - 1))
3335                | (((buckets[v] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3336            buckets[v] = d;
3337        }
3338        i += 1;
3339    }
3340
3341    d
3342}
3343
3344#[allow(dead_code)]
3345fn partial_sorting_scan_left_to_right_32s_1k(
3346    t: &[SaSint],
3347    sa: &mut [SaSint],
3348    induction_bucket: &mut [SaSint],
3349    omp_block_start: SaSint,
3350    omp_block_size: SaSint,
3351) {
3352    let mut i = omp_block_start;
3353    let mut j = omp_block_start + omp_block_size - 2 * 64 - 1;
3354
3355    while i < j {
3356        let p0 = sa[i as usize];
3357        sa[i as usize] = p0 & SAINT_MAX;
3358        if p0 > 0 {
3359            sa[i as usize] = 0;
3360            let c0 = t[(p0 - 1) as usize] as usize;
3361            let pos0 = induction_bucket[c0] as usize;
3362            induction_bucket[c0] += 1;
3363            sa[pos0] = (p0 - 1)
3364                | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3365                    << (SAINT_BIT - 1));
3366        }
3367
3368        let p1 = sa[(i + 1) as usize];
3369        sa[(i + 1) as usize] = p1 & SAINT_MAX;
3370        if p1 > 0 {
3371            sa[(i + 1) as usize] = 0;
3372            let c1 = t[(p1 - 1) as usize] as usize;
3373            let pos1 = induction_bucket[c1] as usize;
3374            induction_bucket[c1] += 1;
3375            sa[pos1] = (p1 - 1)
3376                | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3377                    << (SAINT_BIT - 1));
3378        }
3379
3380        i += 2;
3381    }
3382
3383    j += 2 * 64 + 1;
3384    while i < j {
3385        let p = sa[i as usize];
3386        sa[i as usize] = p & SAINT_MAX;
3387        if p > 0 {
3388            sa[i as usize] = 0;
3389            let c = t[(p - 1) as usize] as usize;
3390            let pos = induction_bucket[c] as usize;
3391            induction_bucket[c] += 1;
3392            sa[pos] = (p - 1)
3393                | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
3394                    << (SAINT_BIT - 1));
3395        }
3396        i += 1;
3397    }
3398}
3399
3400#[allow(dead_code)]
3401fn partial_sorting_scan_left_to_right_32s_6k_omp(
3402    t: &[SaSint],
3403    sa: &mut [SaSint],
3404    n: SaSint,
3405    buckets: &mut [SaSint],
3406    left_suffixes_count: SaSint,
3407    mut d: SaSint,
3408    threads: SaSint,
3409    _thread_state: &mut [ThreadState],
3410) -> SaSint {
3411    let v = buckets_index4(
3412        t[(n - 1) as usize] as usize,
3413        usize::from(t[(n - 2) as usize] >= t[(n - 1) as usize]),
3414    );
3415    let pos = buckets[v] as usize;
3416    buckets[v] += 1;
3417    sa[pos] = (n - 1) | SAINT_MIN;
3418    d += 1;
3419    buckets[2 + v] = d;
3420
3421    if threads == 1 || left_suffixes_count < 65536 {
3422        d = partial_sorting_scan_left_to_right_32s_6k(t, sa, buckets, d, 0, left_suffixes_count);
3423    } else {
3424        let mut cache = vec![ThreadCache::default(); left_suffixes_count as usize];
3425        let mut block_start = 0;
3426        while block_start < left_suffixes_count {
3427            let mut block_end = block_start + threads * PER_THREAD_CACHE_SIZE as SaSint;
3428            if block_end > left_suffixes_count {
3429                block_end = left_suffixes_count;
3430            }
3431            d = partial_sorting_scan_left_to_right_32s_6k_block_omp(
3432                t,
3433                sa,
3434                buckets,
3435                d,
3436                &mut cache,
3437                block_start,
3438                block_end - block_start,
3439                threads,
3440            );
3441            block_start = block_end;
3442        }
3443    }
3444
3445    d
3446}
3447
3448#[allow(dead_code)]
3449fn partial_sorting_scan_left_to_right_32s_4k_omp(
3450    t: &[SaSint],
3451    sa: &mut [SaSint],
3452    n: SaSint,
3453    k: SaSint,
3454    buckets: &mut [SaSint],
3455    mut d: SaSint,
3456    threads: SaSint,
3457    _thread_state: &mut [ThreadState],
3458) -> SaSint {
3459    let k_usize = k as usize;
3460    let pos = buckets[2 * k_usize + t[(n - 1) as usize] as usize] as usize;
3461    buckets[2 * k_usize + t[(n - 1) as usize] as usize] += 1;
3462    sa[pos] = (n - 1)
3463        | ((usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]) as SaSint) << (SAINT_BIT - 1))
3464        | SUFFIX_GROUP_MARKER;
3465    d += 1;
3466    buckets[buckets_index2(
3467        t[(n - 1) as usize] as usize,
3468        usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]),
3469    )] = d;
3470
3471    if threads == 1 || n < 65536 {
3472        d = partial_sorting_scan_left_to_right_32s_4k(t, sa, k, buckets, d, 0, n);
3473    } else {
3474        let mut cache = vec![ThreadCache::default(); n as usize];
3475        let mut block_start = 0;
3476        while block_start < n {
3477            let mut block_end = block_start + threads * PER_THREAD_CACHE_SIZE as SaSint;
3478            if block_end > n {
3479                block_end = n;
3480            }
3481            d = partial_sorting_scan_left_to_right_32s_4k_block_omp(
3482                t,
3483                sa,
3484                k,
3485                buckets,
3486                d,
3487                &mut cache,
3488                block_start,
3489                block_end - block_start,
3490                threads,
3491            );
3492            block_start = block_end;
3493        }
3494    }
3495
3496    d
3497}
3498
3499#[allow(dead_code)]
3500fn partial_sorting_scan_left_to_right_32s_1k_omp(
3501    t: &[SaSint],
3502    sa: &mut [SaSint],
3503    n: SaSint,
3504    buckets: &mut [SaSint],
3505    threads: SaSint,
3506    _thread_state: &mut [ThreadState],
3507) {
3508    let pos = buckets[t[(n - 1) as usize] as usize] as usize;
3509    buckets[t[(n - 1) as usize] as usize] += 1;
3510    sa[pos] = (n - 1)
3511        | ((usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]) as SaSint) << (SAINT_BIT - 1));
3512
3513    if threads == 1 || n < 65536 {
3514        partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, 0, n);
3515    } else {
3516        let mut cache = vec![ThreadCache::default(); n as usize];
3517        let mut block_start = 0;
3518        while block_start < n {
3519            let mut block_end = block_start + threads * PER_THREAD_CACHE_SIZE as SaSint;
3520            if block_end > n {
3521                block_end = n;
3522            }
3523            partial_sorting_scan_left_to_right_32s_1k_block_omp(
3524                t,
3525                sa,
3526                buckets,
3527                &mut cache,
3528                block_start,
3529                block_end - block_start,
3530                threads,
3531            );
3532            block_start = block_end;
3533        }
3534    }
3535}
3536
3537#[allow(dead_code)]
3538fn partial_sorting_scan_right_to_left_32s_6k(
3539    t: &[SaSint],
3540    sa: &mut [SaSint],
3541    buckets: &mut [SaSint],
3542    mut d: SaSint,
3543    omp_block_start: SaSint,
3544    omp_block_size: SaSint,
3545) -> SaSint {
3546    if omp_block_size <= 0 {
3547        return d;
3548    }
3549
3550    let mut i = omp_block_start + omp_block_size - 1;
3551    let mut j = omp_block_start + 2 * 64 + 1;
3552
3553    while i >= j {
3554        let mut p2 = sa[i as usize];
3555        d += SaSint::from(p2 < 0);
3556        p2 &= SAINT_MAX;
3557        let v2 = buckets_index4(
3558            t[(p2 - 1) as usize] as usize,
3559            usize::from(t[(p2 - 2) as usize] > t[(p2 - 1) as usize]),
3560        );
3561        buckets[v2] -= 1;
3562        sa[buckets[v2] as usize] =
3563            (p2 - 1) | (((buckets[2 + v2] != d) as SaSint) << (SAINT_BIT - 1));
3564        buckets[2 + v2] = d;
3565
3566        let mut p3 = sa[(i - 1) as usize];
3567        d += SaSint::from(p3 < 0);
3568        p3 &= SAINT_MAX;
3569        let v3 = buckets_index4(
3570            t[(p3 - 1) as usize] as usize,
3571            usize::from(t[(p3 - 2) as usize] > t[(p3 - 1) as usize]),
3572        );
3573        buckets[v3] -= 1;
3574        sa[buckets[v3] as usize] =
3575            (p3 - 1) | (((buckets[2 + v3] != d) as SaSint) << (SAINT_BIT - 1));
3576        buckets[2 + v3] = d;
3577
3578        i -= 2;
3579    }
3580
3581    j -= 2 * 64 + 1;
3582    while i >= j {
3583        let mut p = sa[i as usize];
3584        d += SaSint::from(p < 0);
3585        p &= SAINT_MAX;
3586        let v = buckets_index4(
3587            t[(p - 1) as usize] as usize,
3588            usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
3589        );
3590        buckets[v] -= 1;
3591        sa[buckets[v] as usize] = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
3592        buckets[2 + v] = d;
3593        i -= 1;
3594    }
3595
3596    d
3597}
3598
3599#[allow(dead_code)]
3600fn partial_sorting_scan_right_to_left_32s_4k(
3601    t: &[SaSint],
3602    sa: &mut [SaSint],
3603    k: SaSint,
3604    buckets: &mut [SaSint],
3605    mut d: SaSint,
3606    omp_block_start: SaSint,
3607    omp_block_size: SaSint,
3608) -> SaSint {
3609    if omp_block_size <= 0 {
3610        return d;
3611    }
3612
3613    let k = k as usize;
3614    let mut i = omp_block_start + omp_block_size - 1;
3615    let mut j = omp_block_start + 2 * 64 + 1;
3616
3617    while i >= j {
3618        let mut p0 = sa[i as usize];
3619        if p0 > 0 {
3620            sa[i as usize] = 0;
3621            d += p0 >> (SUFFIX_GROUP_BIT - 1);
3622            p0 &= !SUFFIX_GROUP_MARKER;
3623            let v0 = buckets_index2(
3624                t[(p0 - 1) as usize] as usize,
3625                usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
3626            );
3627            let c0 = t[(p0 - 1) as usize] as usize;
3628            buckets[3 * k + c0] -= 1;
3629            sa[buckets[3 * k + c0] as usize] = (p0 - 1)
3630                | ((usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]) as SaSint)
3631                    << (SAINT_BIT - 1))
3632                | (((buckets[v0] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3633            buckets[v0] = d;
3634        }
3635
3636        let mut p1 = sa[(i - 1) as usize];
3637        if p1 > 0 {
3638            sa[(i - 1) as usize] = 0;
3639            d += p1 >> (SUFFIX_GROUP_BIT - 1);
3640            p1 &= !SUFFIX_GROUP_MARKER;
3641            let v1 = buckets_index2(
3642                t[(p1 - 1) as usize] as usize,
3643                usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
3644            );
3645            let c1 = t[(p1 - 1) as usize] as usize;
3646            buckets[3 * k + c1] -= 1;
3647            sa[buckets[3 * k + c1] as usize] = (p1 - 1)
3648                | ((usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]) as SaSint)
3649                    << (SAINT_BIT - 1))
3650                | (((buckets[v1] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3651            buckets[v1] = d;
3652        }
3653
3654        i -= 2;
3655    }
3656
3657    j -= 2 * 64 + 1;
3658    while i >= j {
3659        let mut p = sa[i as usize];
3660        if p > 0 {
3661            sa[i as usize] = 0;
3662            d += p >> (SUFFIX_GROUP_BIT - 1);
3663            p &= !SUFFIX_GROUP_MARKER;
3664            let v = buckets_index2(
3665                t[(p - 1) as usize] as usize,
3666                usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
3667            );
3668            let c = t[(p - 1) as usize] as usize;
3669            buckets[3 * k + c] -= 1;
3670            sa[buckets[3 * k + c] as usize] = (p - 1)
3671                | ((usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]) as SaSint)
3672                    << (SAINT_BIT - 1))
3673                | (((buckets[v] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3674            buckets[v] = d;
3675        }
3676        i -= 1;
3677    }
3678
3679    d
3680}
3681
3682#[allow(dead_code)]
3683fn partial_sorting_scan_right_to_left_32s_1k(
3684    t: &[SaSint],
3685    sa: &mut [SaSint],
3686    induction_bucket: &mut [SaSint],
3687    omp_block_start: SaSint,
3688    omp_block_size: SaSint,
3689) {
3690    if omp_block_size <= 0 {
3691        return;
3692    }
3693
3694    let mut i = omp_block_start + omp_block_size - 1;
3695    let mut j = omp_block_start + 2 * 64 + 1;
3696
3697    while i >= j {
3698        let p0 = sa[i as usize];
3699        if p0 > 0 {
3700            sa[i as usize] = 0;
3701            let c0 = t[(p0 - 1) as usize] as usize;
3702            induction_bucket[c0] -= 1;
3703            sa[induction_bucket[c0] as usize] = (p0 - 1)
3704                | ((usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]) as SaSint)
3705                    << (SAINT_BIT - 1));
3706        }
3707
3708        let p1 = sa[(i - 1) as usize];
3709        if p1 > 0 {
3710            sa[(i - 1) as usize] = 0;
3711            let c1 = t[(p1 - 1) as usize] as usize;
3712            induction_bucket[c1] -= 1;
3713            sa[induction_bucket[c1] as usize] = (p1 - 1)
3714                | ((usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]) as SaSint)
3715                    << (SAINT_BIT - 1));
3716        }
3717
3718        i -= 2;
3719    }
3720
3721    j -= 2 * 64 + 1;
3722    while i >= j {
3723        let p = sa[i as usize];
3724        if p > 0 {
3725            sa[i as usize] = 0;
3726            let c = t[(p - 1) as usize] as usize;
3727            induction_bucket[c] -= 1;
3728            sa[induction_bucket[c] as usize] = (p - 1)
3729                | ((usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]) as SaSint)
3730                    << (SAINT_BIT - 1));
3731        }
3732        i -= 1;
3733    }
3734}
3735
3736#[allow(dead_code)]
3737fn partial_sorting_scan_right_to_left_32s_6k_omp(
3738    t: &[SaSint],
3739    sa: &mut [SaSint],
3740    n: SaSint,
3741    buckets: &mut [SaSint],
3742    first_lms_suffix: SaSint,
3743    left_suffixes_count: SaSint,
3744    mut d: SaSint,
3745    threads: SaSint,
3746    _thread_state: &mut [ThreadState],
3747) -> SaSint {
3748    let scan_start = left_suffixes_count + 1;
3749    let scan_end = n - first_lms_suffix;
3750
3751    if threads == 1 || scan_end - scan_start < 65536 {
3752        d = partial_sorting_scan_right_to_left_32s_6k(
3753            t,
3754            sa,
3755            buckets,
3756            d,
3757            scan_start,
3758            scan_end - scan_start,
3759        );
3760    } else {
3761        let mut cache = vec![ThreadCache::default(); (scan_end - scan_start) as usize];
3762        let mut block_start = scan_end;
3763        while block_start > scan_start {
3764            let block_size =
3765                (block_start - scan_start).min(threads * PER_THREAD_CACHE_SIZE as SaSint);
3766            block_start -= block_size;
3767            d = partial_sorting_scan_right_to_left_32s_6k_block_omp(
3768                t,
3769                sa,
3770                buckets,
3771                d,
3772                &mut cache,
3773                block_start,
3774                block_size,
3775                threads,
3776            );
3777        }
3778    }
3779
3780    d
3781}
3782
3783#[allow(dead_code)]
3784fn partial_sorting_scan_right_to_left_32s_4k_omp(
3785    t: &[SaSint],
3786    sa: &mut [SaSint],
3787    n: SaSint,
3788    k: SaSint,
3789    buckets: &mut [SaSint],
3790    mut d: SaSint,
3791    threads: SaSint,
3792    _thread_state: &mut [ThreadState],
3793) -> SaSint {
3794    if threads == 1 || n < 65536 {
3795        d = partial_sorting_scan_right_to_left_32s_4k(t, sa, k, buckets, d, 0, n);
3796    } else {
3797        let mut cache = vec![ThreadCache::default(); n as usize];
3798        let mut block_start = n;
3799        while block_start > 0 {
3800            let block_size = block_start.min(threads * PER_THREAD_CACHE_SIZE as SaSint);
3801            block_start -= block_size;
3802            d = partial_sorting_scan_right_to_left_32s_4k_block_omp(
3803                t,
3804                sa,
3805                k,
3806                buckets,
3807                d,
3808                &mut cache,
3809                block_start,
3810                block_size,
3811                threads,
3812            );
3813        }
3814    }
3815
3816    d
3817}
3818
3819#[allow(dead_code)]
3820fn partial_sorting_scan_right_to_left_32s_1k_omp(
3821    t: &[SaSint],
3822    sa: &mut [SaSint],
3823    n: SaSint,
3824    buckets: &mut [SaSint],
3825    threads: SaSint,
3826    _thread_state: &mut [ThreadState],
3827) {
3828    if threads == 1 || n < 65536 {
3829        partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, 0, n);
3830    } else {
3831        let mut cache = vec![ThreadCache::default(); n as usize];
3832        let mut block_start = n;
3833        while block_start > 0 {
3834            let block_size = block_start.min(threads * PER_THREAD_CACHE_SIZE as SaSint);
3835            block_start -= block_size;
3836            partial_sorting_scan_right_to_left_32s_1k_block_omp(
3837                t,
3838                sa,
3839                buckets,
3840                &mut cache,
3841                block_start,
3842                block_size,
3843                threads,
3844            );
3845        }
3846    }
3847}
3848
3849#[allow(dead_code)]
3850fn partial_sorting_scan_left_to_right_32s_6k_block_gather(
3851    t: &[SaSint],
3852    sa: &mut [SaSint],
3853    cache: &mut [ThreadCache],
3854    omp_block_start: SaSint,
3855    omp_block_size: SaSint,
3856) {
3857    let mut i = omp_block_start;
3858    let mut j = omp_block_start + omp_block_size - 64 - 1;
3859
3860    while i < j {
3861        let p0 = sa[i as usize];
3862        cache[i as usize].index = p0;
3863        let p0 = p0 & SAINT_MAX;
3864        cache[i as usize].symbol = if p0 != 0 {
3865            buckets_index4(
3866                t[(p0 - 1) as usize] as usize,
3867                usize::from(t[(p0 - 2) as usize] >= t[(p0 - 1) as usize]),
3868            ) as SaSint
3869        } else {
3870            0
3871        };
3872
3873        let p1 = sa[(i + 1) as usize];
3874        cache[(i + 1) as usize].index = p1;
3875        let p1 = p1 & SAINT_MAX;
3876        cache[(i + 1) as usize].symbol = if p1 != 0 {
3877            buckets_index4(
3878                t[(p1 - 1) as usize] as usize,
3879                usize::from(t[(p1 - 2) as usize] >= t[(p1 - 1) as usize]),
3880            ) as SaSint
3881        } else {
3882            0
3883        };
3884
3885        i += 2;
3886    }
3887
3888    j += 64 + 1;
3889    while i < j {
3890        let p = sa[i as usize];
3891        cache[i as usize].index = p;
3892        let p = p & SAINT_MAX;
3893        cache[i as usize].symbol = if p != 0 {
3894            buckets_index4(
3895                t[(p - 1) as usize] as usize,
3896                usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
3897            ) as SaSint
3898        } else {
3899            0
3900        };
3901        i += 1;
3902    }
3903}
3904
3905#[allow(dead_code)]
3906fn partial_sorting_scan_left_to_right_32s_4k_block_gather(
3907    t: &[SaSint],
3908    sa: &mut [SaSint],
3909    cache: &mut [ThreadCache],
3910    omp_block_start: SaSint,
3911    omp_block_size: SaSint,
3912) {
3913    let mut i = omp_block_start;
3914    let mut j = omp_block_start + omp_block_size - 64 - 1;
3915
3916    while i < j {
3917        let mut symbol0 = SAINT_MIN;
3918        let mut p0 = sa[i as usize];
3919        if p0 > 0 {
3920            cache[i as usize].index = p0;
3921            p0 &= !SUFFIX_GROUP_MARKER;
3922            symbol0 = buckets_index2(
3923                t[(p0 - 1) as usize] as usize,
3924                usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]),
3925            ) as SaSint;
3926            p0 = 0;
3927        }
3928        cache[i as usize].symbol = symbol0;
3929        sa[i as usize] = p0 & SAINT_MAX;
3930
3931        let mut symbol1 = SAINT_MIN;
3932        let mut p1 = sa[(i + 1) as usize];
3933        if p1 > 0 {
3934            cache[(i + 1) as usize].index = p1;
3935            p1 &= !SUFFIX_GROUP_MARKER;
3936            symbol1 = buckets_index2(
3937                t[(p1 - 1) as usize] as usize,
3938                usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]),
3939            ) as SaSint;
3940            p1 = 0;
3941        }
3942        cache[(i + 1) as usize].symbol = symbol1;
3943        sa[(i + 1) as usize] = p1 & SAINT_MAX;
3944
3945        i += 2;
3946    }
3947
3948    j += 64 + 1;
3949    while i < j {
3950        let mut symbol = SAINT_MIN;
3951        let mut p = sa[i as usize];
3952        if p > 0 {
3953            cache[i as usize].index = p;
3954            p &= !SUFFIX_GROUP_MARKER;
3955            symbol = buckets_index2(
3956                t[(p - 1) as usize] as usize,
3957                usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]),
3958            ) as SaSint;
3959            p = 0;
3960        }
3961        cache[i as usize].symbol = symbol;
3962        sa[i as usize] = p & SAINT_MAX;
3963        i += 1;
3964    }
3965}
3966
3967#[allow(dead_code)]
3968fn partial_sorting_scan_left_to_right_32s_1k_block_gather(
3969    t: &[SaSint],
3970    sa: &mut [SaSint],
3971    cache: &mut [ThreadCache],
3972    omp_block_start: SaSint,
3973    omp_block_size: SaSint,
3974) {
3975    let mut i = omp_block_start;
3976    let mut j = omp_block_start + omp_block_size - 64 - 1;
3977
3978    while i < j {
3979        let mut symbol0 = SAINT_MIN;
3980        let mut p0 = sa[i as usize];
3981        if p0 > 0 {
3982            cache[i as usize].index = (p0 - 1)
3983                | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3984                    << (SAINT_BIT - 1));
3985            symbol0 = t[(p0 - 1) as usize];
3986            p0 = 0;
3987        }
3988        cache[i as usize].symbol = symbol0;
3989        sa[i as usize] = p0 & SAINT_MAX;
3990
3991        let mut symbol1 = SAINT_MIN;
3992        let mut p1 = sa[(i + 1) as usize];
3993        if p1 > 0 {
3994            cache[(i + 1) as usize].index = (p1 - 1)
3995                | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3996                    << (SAINT_BIT - 1));
3997            symbol1 = t[(p1 - 1) as usize];
3998            p1 = 0;
3999        }
4000        cache[(i + 1) as usize].symbol = symbol1;
4001        sa[(i + 1) as usize] = p1 & SAINT_MAX;
4002
4003        i += 2;
4004    }
4005
4006    j += 64 + 1;
4007    while i < j {
4008        let mut symbol = SAINT_MIN;
4009        let mut p = sa[i as usize];
4010        if p > 0 {
4011            cache[i as usize].index = (p - 1)
4012                | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
4013                    << (SAINT_BIT - 1));
4014            symbol = t[(p - 1) as usize];
4015            p = 0;
4016        }
4017        cache[i as usize].symbol = symbol;
4018        sa[i as usize] = p & SAINT_MAX;
4019        i += 1;
4020    }
4021}
4022
4023#[allow(dead_code)]
4024fn partial_sorting_scan_right_to_left_32s_6k_block_gather(
4025    t: &[SaSint],
4026    sa: &mut [SaSint],
4027    cache: &mut [ThreadCache],
4028    omp_block_start: SaSint,
4029    omp_block_size: SaSint,
4030) {
4031    let mut i = omp_block_start;
4032    let mut j = omp_block_start + omp_block_size - 64 - 1;
4033
4034    while i < j {
4035        let p0 = sa[i as usize];
4036        cache[i as usize].index = p0;
4037        let p0 = p0 & SAINT_MAX;
4038        cache[i as usize].symbol = if p0 != 0 {
4039            buckets_index4(
4040                t[(p0 - 1) as usize] as usize,
4041                usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
4042            ) as SaSint
4043        } else {
4044            0
4045        };
4046
4047        let p1 = sa[(i + 1) as usize];
4048        cache[(i + 1) as usize].index = p1;
4049        let p1 = p1 & SAINT_MAX;
4050        cache[(i + 1) as usize].symbol = if p1 != 0 {
4051            buckets_index4(
4052                t[(p1 - 1) as usize] as usize,
4053                usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
4054            ) as SaSint
4055        } else {
4056            0
4057        };
4058
4059        i += 2;
4060    }
4061
4062    j += 64 + 1;
4063    while i < j {
4064        let p = sa[i as usize];
4065        cache[i as usize].index = p;
4066        let p = p & SAINT_MAX;
4067        cache[i as usize].symbol = if p != 0 {
4068            buckets_index4(
4069                t[(p - 1) as usize] as usize,
4070                usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
4071            ) as SaSint
4072        } else {
4073            0
4074        };
4075        i += 1;
4076    }
4077}
4078
4079#[allow(dead_code)]
4080fn partial_sorting_scan_right_to_left_32s_4k_block_gather(
4081    t: &[SaSint],
4082    sa: &mut [SaSint],
4083    cache: &mut [ThreadCache],
4084    omp_block_start: SaSint,
4085    omp_block_size: SaSint,
4086) {
4087    let mut i = omp_block_start;
4088    let mut j = omp_block_start + omp_block_size - 64 - 1;
4089
4090    while i < j {
4091        let mut symbol0 = SAINT_MIN;
4092        let mut p0 = sa[i as usize];
4093        if p0 > 0 {
4094            sa[i as usize] = 0;
4095            cache[i as usize].index = p0;
4096            p0 &= !SUFFIX_GROUP_MARKER;
4097            symbol0 = buckets_index2(
4098                t[(p0 - 1) as usize] as usize,
4099                usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
4100            ) as SaSint;
4101        }
4102        cache[i as usize].symbol = symbol0;
4103
4104        let mut symbol1 = SAINT_MIN;
4105        let mut p1 = sa[(i + 1) as usize];
4106        if p1 > 0 {
4107            sa[(i + 1) as usize] = 0;
4108            cache[(i + 1) as usize].index = p1;
4109            p1 &= !SUFFIX_GROUP_MARKER;
4110            symbol1 = buckets_index2(
4111                t[(p1 - 1) as usize] as usize,
4112                usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
4113            ) as SaSint;
4114        }
4115        cache[(i + 1) as usize].symbol = symbol1;
4116
4117        i += 2;
4118    }
4119
4120    j += 64 + 1;
4121    while i < j {
4122        let mut symbol = SAINT_MIN;
4123        let mut p = sa[i as usize];
4124        if p > 0 {
4125            sa[i as usize] = 0;
4126            cache[i as usize].index = p;
4127            p &= !SUFFIX_GROUP_MARKER;
4128            symbol = buckets_index2(
4129                t[(p - 1) as usize] as usize,
4130                usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
4131            ) as SaSint;
4132        }
4133        cache[i as usize].symbol = symbol;
4134        i += 1;
4135    }
4136}
4137
4138#[allow(dead_code)]
4139fn partial_sorting_scan_right_to_left_32s_1k_block_gather(
4140    t: &[SaSint],
4141    sa: &mut [SaSint],
4142    cache: &mut [ThreadCache],
4143    omp_block_start: SaSint,
4144    omp_block_size: SaSint,
4145) {
4146    let mut i = omp_block_start;
4147    let mut j = omp_block_start + omp_block_size - 64 - 1;
4148
4149    while i < j {
4150        let mut symbol0 = SAINT_MIN;
4151        let p0 = sa[i as usize];
4152        if p0 > 0 {
4153            sa[i as usize] = 0;
4154            cache[i as usize].index = (p0 - 1)
4155                | ((usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]) as SaSint)
4156                    << (SAINT_BIT - 1));
4157            symbol0 = t[(p0 - 1) as usize];
4158        }
4159        cache[i as usize].symbol = symbol0;
4160
4161        let mut symbol1 = SAINT_MIN;
4162        let p1 = sa[(i + 1) as usize];
4163        if p1 > 0 {
4164            sa[(i + 1) as usize] = 0;
4165            cache[(i + 1) as usize].index = (p1 - 1)
4166                | ((usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]) as SaSint)
4167                    << (SAINT_BIT - 1));
4168            symbol1 = t[(p1 - 1) as usize];
4169        }
4170        cache[(i + 1) as usize].symbol = symbol1;
4171
4172        i += 2;
4173    }
4174
4175    j += 64 + 1;
4176    while i < j {
4177        let mut symbol = SAINT_MIN;
4178        let p = sa[i as usize];
4179        if p > 0 {
4180            sa[i as usize] = 0;
4181            cache[i as usize].index = (p - 1)
4182                | ((usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]) as SaSint)
4183                    << (SAINT_BIT - 1));
4184            symbol = t[(p - 1) as usize];
4185        }
4186        cache[i as usize].symbol = symbol;
4187        i += 1;
4188    }
4189}
4190
4191#[allow(dead_code)]
4192fn partial_sorting_scan_left_to_right_32s_6k_block_sort(
4193    t: &[SaSint],
4194    buckets: &mut [SaSint],
4195    mut d: SaSint,
4196    cache: &mut [ThreadCache],
4197    omp_block_start: SaSint,
4198    omp_block_size: SaSint,
4199) -> SaSint {
4200    let mut i = omp_block_start;
4201    let omp_block_end = omp_block_start + omp_block_size;
4202    let mut j = omp_block_end - 64 - 1;
4203
4204    while i < j {
4205        let v0 = cache[i as usize].symbol as usize;
4206        let p0 = cache[i as usize].index;
4207        d += SaSint::from(p0 < 0);
4208        cache[i as usize].symbol = buckets[v0];
4209        buckets[v0] += 1;
4210        cache[i as usize].index =
4211            (p0 - 1) | (((buckets[2 + v0] != d) as SaSint) << (SAINT_BIT - 1));
4212        buckets[2 + v0] = d;
4213        if cache[i as usize].symbol < omp_block_end {
4214            let s = cache[i as usize].symbol as usize;
4215            let q = cache[i as usize].index & SAINT_MAX;
4216            cache[s].index = cache[i as usize].index;
4217            cache[s].symbol = buckets_index4(
4218                t[(q - 1) as usize] as usize,
4219                usize::from(t[(q - 2) as usize] >= t[(q - 1) as usize]),
4220            ) as SaSint;
4221        }
4222
4223        let v1 = cache[(i + 1) as usize].symbol as usize;
4224        let p1 = cache[(i + 1) as usize].index;
4225        d += SaSint::from(p1 < 0);
4226        cache[(i + 1) as usize].symbol = buckets[v1];
4227        buckets[v1] += 1;
4228        cache[(i + 1) as usize].index =
4229            (p1 - 1) | (((buckets[2 + v1] != d) as SaSint) << (SAINT_BIT - 1));
4230        buckets[2 + v1] = d;
4231        if cache[(i + 1) as usize].symbol < omp_block_end {
4232            let s = cache[(i + 1) as usize].symbol as usize;
4233            let q = cache[(i + 1) as usize].index & SAINT_MAX;
4234            cache[s].index = cache[(i + 1) as usize].index;
4235            cache[s].symbol = buckets_index4(
4236                t[(q - 1) as usize] as usize,
4237                usize::from(t[(q - 2) as usize] >= t[(q - 1) as usize]),
4238            ) as SaSint;
4239        }
4240
4241        i += 2;
4242    }
4243
4244    j += 64 + 1;
4245    while i < j {
4246        let v = cache[i as usize].symbol as usize;
4247        let p = cache[i as usize].index;
4248        d += SaSint::from(p < 0);
4249        cache[i as usize].symbol = buckets[v];
4250        buckets[v] += 1;
4251        cache[i as usize].index = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
4252        buckets[2 + v] = d;
4253        if cache[i as usize].symbol < omp_block_end {
4254            let s = cache[i as usize].symbol as usize;
4255            let q = cache[i as usize].index & SAINT_MAX;
4256            cache[s].index = cache[i as usize].index;
4257            cache[s].symbol = buckets_index4(
4258                t[(q - 1) as usize] as usize,
4259                usize::from(t[(q - 2) as usize] >= t[(q - 1) as usize]),
4260            ) as SaSint;
4261        }
4262        i += 1;
4263    }
4264
4265    d
4266}
4267
4268#[allow(dead_code)]
4269fn partial_sorting_scan_left_to_right_32s_4k_block_sort(
4270    t: &[SaSint],
4271    k: SaSint,
4272    buckets: &mut [SaSint],
4273    mut d: SaSint,
4274    cache: &mut [ThreadCache],
4275    omp_block_start: SaSint,
4276    omp_block_size: SaSint,
4277) -> SaSint {
4278    let k = k as usize;
4279    let mut i = omp_block_start;
4280    let omp_block_end = omp_block_start + omp_block_size;
4281    let mut j = omp_block_end - 64 - 1;
4282
4283    while i < j {
4284        for current in [i, i + 1] {
4285            let v = cache[current as usize].symbol;
4286            if v >= 0 {
4287                let p = cache[current as usize].index;
4288                d += p >> (SUFFIX_GROUP_BIT - 1);
4289                let bucket_index = (v >> 1) as usize;
4290                let v_usize = v as usize;
4291                cache[current as usize].symbol = buckets[2 * k + bucket_index];
4292                buckets[2 * k + bucket_index] += 1;
4293                cache[current as usize].index = (p - 1)
4294                    | ((v & 1) << (SAINT_BIT - 1))
4295                    | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4296                buckets[v_usize] = d;
4297                if cache[current as usize].symbol < omp_block_end {
4298                    let ni = cache[current as usize].symbol as usize;
4299                    let mut np = cache[current as usize].index;
4300                    if np > 0 {
4301                        cache[ni].index = np;
4302                        np &= !SUFFIX_GROUP_MARKER;
4303                        cache[ni].symbol = buckets_index2(
4304                            t[(np - 1) as usize] as usize,
4305                            usize::from(t[(np - 2) as usize] < t[(np - 1) as usize]),
4306                        ) as SaSint;
4307                        np = 0;
4308                    }
4309                    cache[current as usize].index = np & SAINT_MAX;
4310                }
4311            }
4312        }
4313        i += 2;
4314    }
4315
4316    j += 64 + 1;
4317    while i < j {
4318        let v = cache[i as usize].symbol;
4319        if v >= 0 {
4320            let p = cache[i as usize].index;
4321            d += p >> (SUFFIX_GROUP_BIT - 1);
4322            let bucket_index = (v >> 1) as usize;
4323            let v_usize = v as usize;
4324            cache[i as usize].symbol = buckets[2 * k + bucket_index];
4325            buckets[2 * k + bucket_index] += 1;
4326            cache[i as usize].index = (p - 1)
4327                | ((v & 1) << (SAINT_BIT - 1))
4328                | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4329            buckets[v_usize] = d;
4330            if cache[i as usize].symbol < omp_block_end {
4331                let ni = cache[i as usize].symbol as usize;
4332                let mut np = cache[i as usize].index;
4333                if np > 0 {
4334                    cache[ni].index = np;
4335                    np &= !SUFFIX_GROUP_MARKER;
4336                    cache[ni].symbol = buckets_index2(
4337                        t[(np - 1) as usize] as usize,
4338                        usize::from(t[(np - 2) as usize] < t[(np - 1) as usize]),
4339                    ) as SaSint;
4340                    np = 0;
4341                }
4342                cache[i as usize].index = np & SAINT_MAX;
4343            }
4344        }
4345        i += 1;
4346    }
4347
4348    d
4349}
4350
4351#[allow(dead_code)]
4352fn partial_sorting_scan_left_to_right_32s_1k_block_sort(
4353    t: &[SaSint],
4354    induction_bucket: &mut [SaSint],
4355    cache: &mut [ThreadCache],
4356    omp_block_start: SaSint,
4357    omp_block_size: SaSint,
4358) {
4359    let mut i = omp_block_start;
4360    let omp_block_end = omp_block_start + omp_block_size;
4361    let mut j = omp_block_end - 64 - 1;
4362
4363    while i < j {
4364        for current in [i, i + 1] {
4365            let v = cache[current as usize].symbol;
4366            if v >= 0 {
4367                cache[current as usize].symbol = induction_bucket[v as usize];
4368                induction_bucket[v as usize] += 1;
4369                if cache[current as usize].symbol < omp_block_end {
4370                    let ni = cache[current as usize].symbol as usize;
4371                    let mut np = cache[current as usize].index;
4372                    if np > 0 {
4373                        cache[ni].index = (np - 1)
4374                            | ((usize::from(t[(np - 2) as usize] < t[(np - 1) as usize])
4375                                as SaSint)
4376                                << (SAINT_BIT - 1));
4377                        cache[ni].symbol = t[(np - 1) as usize];
4378                        np = 0;
4379                    }
4380                    cache[current as usize].index = np & SAINT_MAX;
4381                }
4382            }
4383        }
4384        i += 2;
4385    }
4386
4387    j = omp_block_end;
4388    while i < j {
4389        let v = cache[i as usize].symbol;
4390        if v >= 0 {
4391            cache[i as usize].symbol = induction_bucket[v as usize];
4392            induction_bucket[v as usize] += 1;
4393            if cache[i as usize].symbol < omp_block_end {
4394                let ni = cache[i as usize].symbol as usize;
4395                let mut np = cache[i as usize].index;
4396                if np > 0 {
4397                    cache[ni].index = (np - 1)
4398                        | ((usize::from(t[(np - 2) as usize] < t[(np - 1) as usize]) as SaSint)
4399                            << (SAINT_BIT - 1));
4400                    cache[ni].symbol = t[(np - 1) as usize];
4401                    np = 0;
4402                }
4403                cache[i as usize].index = np & SAINT_MAX;
4404            }
4405        }
4406        i += 1;
4407    }
4408}
4409
4410#[allow(dead_code)]
4411fn partial_sorting_scan_right_to_left_32s_6k_block_sort(
4412    t: &[SaSint],
4413    buckets: &mut [SaSint],
4414    mut d: SaSint,
4415    cache: &mut [ThreadCache],
4416    omp_block_start: SaSint,
4417    omp_block_size: SaSint,
4418) -> SaSint {
4419    let mut i = omp_block_start + omp_block_size - 1;
4420    let mut j = omp_block_start + 64 + 1;
4421
4422    while i >= j {
4423        let v0 = cache[i as usize].symbol as usize;
4424        let p0 = cache[i as usize].index;
4425        d += SaSint::from(p0 < 0);
4426        buckets[v0] -= 1;
4427        cache[i as usize].symbol = buckets[v0];
4428        cache[i as usize].index =
4429            (p0 - 1) | (((buckets[2 + v0] != d) as SaSint) << (SAINT_BIT - 1));
4430        buckets[2 + v0] = d;
4431        if cache[i as usize].symbol >= omp_block_start {
4432            let s = cache[i as usize].symbol as usize;
4433            let q = cache[i as usize].index & SAINT_MAX;
4434            cache[s].index = cache[i as usize].index;
4435            cache[s].symbol = buckets_index4(
4436                t[(q - 1) as usize] as usize,
4437                usize::from(t[(q - 2) as usize] > t[(q - 1) as usize]),
4438            ) as SaSint;
4439        }
4440
4441        let v1 = cache[(i - 1) as usize].symbol as usize;
4442        let p1 = cache[(i - 1) as usize].index;
4443        d += SaSint::from(p1 < 0);
4444        buckets[v1] -= 1;
4445        cache[(i - 1) as usize].symbol = buckets[v1];
4446        cache[(i - 1) as usize].index =
4447            (p1 - 1) | (((buckets[2 + v1] != d) as SaSint) << (SAINT_BIT - 1));
4448        buckets[2 + v1] = d;
4449        if cache[(i - 1) as usize].symbol >= omp_block_start {
4450            let s = cache[(i - 1) as usize].symbol as usize;
4451            let q = cache[(i - 1) as usize].index & SAINT_MAX;
4452            cache[s].index = cache[(i - 1) as usize].index;
4453            cache[s].symbol = buckets_index4(
4454                t[(q - 1) as usize] as usize,
4455                usize::from(t[(q - 2) as usize] > t[(q - 1) as usize]),
4456            ) as SaSint;
4457        }
4458
4459        i -= 2;
4460    }
4461
4462    j -= 64 + 1;
4463    while i >= j {
4464        let v = cache[i as usize].symbol as usize;
4465        let p = cache[i as usize].index;
4466        d += SaSint::from(p < 0);
4467        buckets[v] -= 1;
4468        cache[i as usize].symbol = buckets[v];
4469        cache[i as usize].index = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
4470        buckets[2 + v] = d;
4471        if cache[i as usize].symbol >= omp_block_start {
4472            let s = cache[i as usize].symbol as usize;
4473            let q = cache[i as usize].index & SAINT_MAX;
4474            cache[s].index = cache[i as usize].index;
4475            cache[s].symbol = buckets_index4(
4476                t[(q - 1) as usize] as usize,
4477                usize::from(t[(q - 2) as usize] > t[(q - 1) as usize]),
4478            ) as SaSint;
4479        }
4480        i -= 1;
4481    }
4482
4483    d
4484}
4485
4486#[allow(dead_code)]
4487fn partial_sorting_scan_right_to_left_32s_4k_block_sort(
4488    t: &[SaSint],
4489    k: SaSint,
4490    buckets: &mut [SaSint],
4491    mut d: SaSint,
4492    cache: &mut [ThreadCache],
4493    omp_block_start: SaSint,
4494    omp_block_size: SaSint,
4495) -> SaSint {
4496    let k = k as usize;
4497    let mut i = omp_block_start + omp_block_size - 1;
4498    let mut j = omp_block_start + 64 + 1;
4499
4500    while i >= j {
4501        for current in [i, i - 1] {
4502            let v = cache[current as usize].symbol;
4503            if v >= 0 {
4504                let p = cache[current as usize].index;
4505                d += p >> (SUFFIX_GROUP_BIT - 1);
4506                let bucket_index = (v >> 1) as usize;
4507                let v_usize = v as usize;
4508                buckets[3 * k + bucket_index] -= 1;
4509                cache[current as usize].symbol = buckets[3 * k + bucket_index];
4510                cache[current as usize].index = (p - 1)
4511                    | ((v & 1) << (SAINT_BIT - 1))
4512                    | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4513                buckets[v_usize] = d;
4514                if cache[current as usize].symbol >= omp_block_start {
4515                    let ni = cache[current as usize].symbol as usize;
4516                    let mut np = cache[current as usize].index;
4517                    if np > 0 {
4518                        cache[current as usize].index = 0;
4519                        cache[ni].index = np;
4520                        np &= !SUFFIX_GROUP_MARKER;
4521                        cache[ni].symbol = buckets_index2(
4522                            t[(np - 1) as usize] as usize,
4523                            usize::from(t[(np - 2) as usize] > t[(np - 1) as usize]),
4524                        ) as SaSint;
4525                    }
4526                }
4527            }
4528        }
4529        i -= 2;
4530    }
4531
4532    j -= 64 + 1;
4533    while i >= j {
4534        let v = cache[i as usize].symbol;
4535        if v >= 0 {
4536            let p = cache[i as usize].index;
4537            d += p >> (SUFFIX_GROUP_BIT - 1);
4538            let bucket_index = (v >> 1) as usize;
4539            let v_usize = v as usize;
4540            buckets[3 * k + bucket_index] -= 1;
4541            cache[i as usize].symbol = buckets[3 * k + bucket_index];
4542            cache[i as usize].index = (p - 1)
4543                | ((v & 1) << (SAINT_BIT - 1))
4544                | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4545            buckets[v_usize] = d;
4546            if cache[i as usize].symbol >= omp_block_start {
4547                let ni = cache[i as usize].symbol as usize;
4548                let mut np = cache[i as usize].index;
4549                if np > 0 {
4550                    cache[i as usize].index = 0;
4551                    cache[ni].index = np;
4552                    np &= !SUFFIX_GROUP_MARKER;
4553                    cache[ni].symbol = buckets_index2(
4554                        t[(np - 1) as usize] as usize,
4555                        usize::from(t[(np - 2) as usize] > t[(np - 1) as usize]),
4556                    ) as SaSint;
4557                }
4558            }
4559        }
4560        i -= 1;
4561    }
4562
4563    d
4564}
4565
4566#[allow(dead_code)]
4567fn partial_sorting_scan_right_to_left_32s_1k_block_sort(
4568    t: &[SaSint],
4569    induction_bucket: &mut [SaSint],
4570    cache: &mut [ThreadCache],
4571    omp_block_start: SaSint,
4572    omp_block_size: SaSint,
4573) {
4574    let mut i = omp_block_start + omp_block_size - 1;
4575    let mut j = omp_block_start + 64 + 1;
4576
4577    while i >= j {
4578        for current in [i, i - 1] {
4579            let v = cache[current as usize].symbol;
4580            if v >= 0 {
4581                induction_bucket[v as usize] -= 1;
4582                cache[current as usize].symbol = induction_bucket[v as usize];
4583                if cache[current as usize].symbol >= omp_block_start {
4584                    let ni = cache[current as usize].symbol as usize;
4585                    let np = cache[current as usize].index;
4586                    if np > 0 {
4587                        cache[current as usize].index = 0;
4588                        cache[ni].index = (np - 1)
4589                            | ((usize::from(t[(np - 2) as usize] > t[(np - 1) as usize])
4590                                as SaSint)
4591                                << (SAINT_BIT - 1));
4592                        cache[ni].symbol = t[(np - 1) as usize];
4593                    }
4594                }
4595            }
4596        }
4597        i -= 2;
4598    }
4599
4600    j -= 64 + 1;
4601    while i >= j {
4602        let v = cache[i as usize].symbol;
4603        if v >= 0 {
4604            induction_bucket[v as usize] -= 1;
4605            cache[i as usize].symbol = induction_bucket[v as usize];
4606            if cache[i as usize].symbol >= omp_block_start {
4607                let ni = cache[i as usize].symbol as usize;
4608                let np = cache[i as usize].index;
4609                if np > 0 {
4610                    cache[i as usize].index = 0;
4611                    cache[ni].index = (np - 1)
4612                        | ((usize::from(t[(np - 2) as usize] > t[(np - 1) as usize]) as SaSint)
4613                            << (SAINT_BIT - 1));
4614                    cache[ni].symbol = t[(np - 1) as usize];
4615                }
4616            }
4617        }
4618        i -= 1;
4619    }
4620}
4621
4622#[allow(dead_code)]
4623fn partial_sorting_scan_left_to_right_32s_6k_block_omp(
4624    t: &[SaSint],
4625    sa: &mut [SaSint],
4626    buckets: &mut [SaSint],
4627    d: SaSint,
4628    cache: &mut [ThreadCache],
4629    block_start: SaSint,
4630    block_size: SaSint,
4631    threads: SaSint,
4632) -> SaSint {
4633    if block_size <= 0 {
4634        return d;
4635    }
4636    if threads == 1 || block_size < 16_384 {
4637        return partial_sorting_scan_left_to_right_32s_6k(
4638            t,
4639            sa,
4640            buckets,
4641            d,
4642            block_start,
4643            block_size,
4644        );
4645    }
4646
4647    let threads_usize = usize::try_from(threads)
4648        .expect("threads must be non-negative")
4649        .max(1);
4650    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4651    let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4652    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4653    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4654
4655    for omp_thread_num in 0..omp_num_threads {
4656        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4657            omp_block_stride
4658        } else {
4659            block_size_usize - omp_thread_num * omp_block_stride
4660        };
4661        let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4662        if omp_block_size == 0 {
4663            omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4664        }
4665        partial_sorting_scan_left_to_right_32s_6k_block_gather(
4666            t,
4667            sa,
4668            &mut cache[omp_thread_num * omp_block_stride
4669                ..omp_thread_num * omp_block_stride + omp_block_size],
4670            omp_block_start as SaSint,
4671            omp_block_size as SaSint,
4672        );
4673    }
4674
4675    let d = partial_sorting_scan_left_to_right_32s_6k_block_sort(
4676        t,
4677        buckets,
4678        d,
4679        &mut cache[..block_size_usize],
4680        block_start,
4681        block_size,
4682    );
4683    place_cached_suffixes(sa, &cache[..block_size_usize], 0, block_size);
4684    d
4685}
4686
4687#[allow(dead_code)]
4688fn partial_sorting_scan_left_to_right_32s_4k_block_omp(
4689    t: &[SaSint],
4690    sa: &mut [SaSint],
4691    k: SaSint,
4692    buckets: &mut [SaSint],
4693    d: SaSint,
4694    cache: &mut [ThreadCache],
4695    block_start: SaSint,
4696    block_size: SaSint,
4697    threads: SaSint,
4698) -> SaSint {
4699    if block_size <= 0 {
4700        return d;
4701    }
4702    if threads == 1 || block_size < 16_384 {
4703        return partial_sorting_scan_left_to_right_32s_4k(
4704            t,
4705            sa,
4706            k,
4707            buckets,
4708            d,
4709            block_start,
4710            block_size,
4711        );
4712    }
4713
4714    let threads_usize = usize::try_from(threads)
4715        .expect("threads must be non-negative")
4716        .max(1);
4717    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4718    let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4719    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4720    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4721
4722    for omp_thread_num in 0..omp_num_threads {
4723        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4724            omp_block_stride
4725        } else {
4726            block_size_usize - omp_thread_num * omp_block_stride
4727        };
4728        let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4729        if omp_block_size == 0 {
4730            omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4731        }
4732        partial_sorting_scan_left_to_right_32s_4k_block_gather(
4733            t,
4734            sa,
4735            &mut cache[omp_thread_num * omp_block_stride
4736                ..omp_thread_num * omp_block_stride + omp_block_size],
4737            omp_block_start as SaSint,
4738            omp_block_size as SaSint,
4739        );
4740    }
4741
4742    let cache = &mut cache[..block_size_usize];
4743    let d = partial_sorting_scan_left_to_right_32s_4k_block_sort(
4744        t,
4745        k,
4746        buckets,
4747        d,
4748        cache,
4749        block_start,
4750        block_size,
4751    );
4752    for entry in cache.iter() {
4753        if entry.symbol >= 0 {
4754            sa[entry.symbol as usize] = entry.index;
4755        }
4756    }
4757    d
4758}
4759
4760#[allow(dead_code)]
4761fn partial_sorting_scan_left_to_right_32s_1k_block_omp(
4762    t: &[SaSint],
4763    sa: &mut [SaSint],
4764    buckets: &mut [SaSint],
4765    cache: &mut [ThreadCache],
4766    block_start: SaSint,
4767    block_size: SaSint,
4768    threads: SaSint,
4769) {
4770    if block_size <= 0 {
4771        return;
4772    }
4773    if threads == 1 || block_size < 16_384 {
4774        partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, block_start, block_size);
4775        return;
4776    }
4777
4778    let threads_usize = usize::try_from(threads)
4779        .expect("threads must be non-negative")
4780        .max(1);
4781    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4782    let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4783    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4784    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4785
4786    for omp_thread_num in 0..omp_num_threads {
4787        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4788            omp_block_stride
4789        } else {
4790            block_size_usize - omp_thread_num * omp_block_stride
4791        };
4792        let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4793        if omp_block_size == 0 {
4794            omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4795        }
4796        partial_sorting_scan_left_to_right_32s_1k_block_gather(
4797            t,
4798            sa,
4799            &mut cache[omp_thread_num * omp_block_stride
4800                ..omp_thread_num * omp_block_stride + omp_block_size],
4801            omp_block_start as SaSint,
4802            omp_block_size as SaSint,
4803        );
4804    }
4805
4806    let cache = &mut cache[..block_size_usize];
4807    partial_sorting_scan_left_to_right_32s_1k_block_sort(
4808        t,
4809        buckets,
4810        cache,
4811        block_start,
4812        block_size,
4813    );
4814    compact_and_place_cached_suffixes(sa, cache, block_start, block_size);
4815}
4816
4817#[allow(dead_code)]
4818fn partial_sorting_scan_right_to_left_32s_6k_block_omp(
4819    t: &[SaSint],
4820    sa: &mut [SaSint],
4821    buckets: &mut [SaSint],
4822    mut d: SaSint,
4823    cache: &mut [ThreadCache],
4824    block_start: SaSint,
4825    block_size: SaSint,
4826    threads: SaSint,
4827) -> SaSint {
4828    if block_size <= 0 {
4829        return d;
4830    }
4831    if threads == 1 || block_size < 16_384 {
4832        return partial_sorting_scan_right_to_left_32s_6k(
4833            t,
4834            sa,
4835            buckets,
4836            d,
4837            block_start,
4838            block_size,
4839        );
4840    }
4841
4842    let threads_usize = usize::try_from(threads)
4843        .expect("threads must be non-negative")
4844        .max(1);
4845    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4846    let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4847    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4848    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4849
4850    for omp_thread_num in 0..omp_num_threads {
4851        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4852            omp_block_stride
4853        } else {
4854            block_size_usize - omp_thread_num * omp_block_stride
4855        };
4856        let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4857        if omp_block_size == 0 {
4858            omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4859        }
4860        partial_sorting_scan_right_to_left_32s_6k_block_gather(
4861            t,
4862            sa,
4863            &mut cache[omp_thread_num * omp_block_stride
4864                ..omp_thread_num * omp_block_stride + omp_block_size],
4865            omp_block_start as SaSint,
4866            omp_block_size as SaSint,
4867        );
4868    }
4869
4870    d = partial_sorting_scan_right_to_left_32s_6k_block_sort(
4871        t,
4872        buckets,
4873        d,
4874        &mut cache[..block_size_usize],
4875        block_start,
4876        block_size,
4877    );
4878    for omp_thread_num in 0..omp_num_threads {
4879        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4880            omp_block_stride
4881        } else {
4882            block_size_usize - omp_thread_num * omp_block_stride
4883        };
4884        let cache_start = omp_thread_num * omp_block_stride;
4885        if omp_block_size == 0 {
4886            omp_block_size = block_size_usize - cache_start;
4887        }
4888        for entry in &cache[cache_start..cache_start + omp_block_size] {
4889            sa[entry.symbol as usize] = entry.index;
4890        }
4891    }
4892    d
4893}
4894
4895#[allow(dead_code)]
4896fn partial_sorting_scan_right_to_left_32s_4k_block_omp(
4897    t: &[SaSint],
4898    sa: &mut [SaSint],
4899    k: SaSint,
4900    buckets: &mut [SaSint],
4901    mut d: SaSint,
4902    cache: &mut [ThreadCache],
4903    block_start: SaSint,
4904    block_size: SaSint,
4905    threads: SaSint,
4906) -> SaSint {
4907    if block_size <= 0 {
4908        return d;
4909    }
4910    if threads == 1 || block_size < 16_384 {
4911        return partial_sorting_scan_right_to_left_32s_4k(
4912            t,
4913            sa,
4914            k,
4915            buckets,
4916            d,
4917            block_start,
4918            block_size,
4919        );
4920    }
4921
4922    let threads_usize = usize::try_from(threads)
4923        .expect("threads must be non-negative")
4924        .max(1);
4925    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4926    let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4927    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4928    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4929
4930    for omp_thread_num in 0..omp_num_threads {
4931        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4932            omp_block_stride
4933        } else {
4934            block_size_usize - omp_thread_num * omp_block_stride
4935        };
4936        let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4937        if omp_block_size == 0 {
4938            omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4939        }
4940        partial_sorting_scan_right_to_left_32s_4k_block_gather(
4941            t,
4942            sa,
4943            &mut cache[omp_thread_num * omp_block_stride
4944                ..omp_thread_num * omp_block_stride + omp_block_size],
4945            omp_block_start as SaSint,
4946            omp_block_size as SaSint,
4947        );
4948    }
4949
4950    d = partial_sorting_scan_right_to_left_32s_4k_block_sort(
4951        t,
4952        k,
4953        buckets,
4954        d,
4955        &mut cache[..block_size_usize],
4956        block_start,
4957        block_size,
4958    );
4959    let mut write = 0usize;
4960    for read in 0..block_size_usize {
4961        let entry = cache[read];
4962        if entry.symbol >= 0 {
4963            cache[write] = entry;
4964            write += 1;
4965        }
4966    }
4967    for entry in &cache[..write] {
4968        sa[entry.symbol as usize] = entry.index;
4969    }
4970    d
4971}
4972
4973#[allow(dead_code)]
4974fn partial_sorting_scan_right_to_left_32s_1k_block_omp(
4975    t: &[SaSint],
4976    sa: &mut [SaSint],
4977    buckets: &mut [SaSint],
4978    cache: &mut [ThreadCache],
4979    block_start: SaSint,
4980    block_size: SaSint,
4981    threads: SaSint,
4982) {
4983    if block_size <= 0 {
4984        return;
4985    }
4986    if threads == 1 || block_size < 16_384 {
4987        partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, block_start, block_size);
4988        return;
4989    }
4990
4991    let threads_usize = usize::try_from(threads)
4992        .expect("threads must be non-negative")
4993        .max(1);
4994    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4995    let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4996    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4997    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4998
4999    for omp_thread_num in 0..omp_num_threads {
5000        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5001            omp_block_stride
5002        } else {
5003            block_size_usize - omp_thread_num * omp_block_stride
5004        };
5005        let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
5006        if omp_block_size == 0 {
5007            omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
5008        }
5009        partial_sorting_scan_right_to_left_32s_1k_block_gather(
5010            t,
5011            sa,
5012            &mut cache[omp_thread_num * omp_block_stride
5013                ..omp_thread_num * omp_block_stride + omp_block_size],
5014            omp_block_start as SaSint,
5015            omp_block_size as SaSint,
5016        );
5017    }
5018
5019    let cache = &mut cache[..block_size_usize];
5020    partial_sorting_scan_right_to_left_32s_1k_block_sort(
5021        t,
5022        buckets,
5023        cache,
5024        block_start,
5025        block_size,
5026    );
5027    compact_and_place_cached_suffixes(sa, cache, block_start, block_size);
5028}
5029
5030#[allow(dead_code)]
5031fn partial_sorting_gather_lms_suffixes_32s_4k(
5032    sa: &mut [SaSint],
5033    omp_block_start: SaSint,
5034    omp_block_size: SaSint,
5035) -> SaSint {
5036    let mut i = omp_block_start;
5037    let mut j = omp_block_start + omp_block_size - 3;
5038    let mut l = omp_block_start;
5039
5040    while i < j {
5041        let s0 = sa[i as usize] as SaUint;
5042        sa[l as usize] = (s0.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5043            & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5044        l += SaSint::from((s0 as SaSint) < 0);
5045
5046        let s1 = sa[(i + 1) as usize] as SaUint;
5047        sa[l as usize] = (s1.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5048            & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5049        l += SaSint::from((s1 as SaSint) < 0);
5050
5051        let s2 = sa[(i + 2) as usize] as SaUint;
5052        sa[l as usize] = (s2.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5053            & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5054        l += SaSint::from((s2 as SaSint) < 0);
5055
5056        let s3 = sa[(i + 3) as usize] as SaUint;
5057        sa[l as usize] = (s3.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5058            & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5059        l += SaSint::from((s3 as SaSint) < 0);
5060
5061        i += 4;
5062    }
5063
5064    j += 3;
5065    while i < j {
5066        let s = sa[i as usize] as SaUint;
5067        sa[l as usize] = (s.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5068            & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5069        l += SaSint::from((s as SaSint) < 0);
5070        i += 1;
5071    }
5072
5073    l
5074}
5075
5076#[allow(dead_code)]
5077fn partial_sorting_gather_lms_suffixes_32s_1k(
5078    sa: &mut [SaSint],
5079    omp_block_start: SaSint,
5080    omp_block_size: SaSint,
5081) -> SaSint {
5082    let mut i = omp_block_start;
5083    let mut j = omp_block_start + omp_block_size - 3;
5084    let mut l = omp_block_start;
5085
5086    while i < j {
5087        let s0 = sa[i as usize];
5088        sa[l as usize] = s0 & SAINT_MAX;
5089        l += SaSint::from(s0 < 0);
5090
5091        let s1 = sa[(i + 1) as usize];
5092        sa[l as usize] = s1 & SAINT_MAX;
5093        l += SaSint::from(s1 < 0);
5094
5095        let s2 = sa[(i + 2) as usize];
5096        sa[l as usize] = s2 & SAINT_MAX;
5097        l += SaSint::from(s2 < 0);
5098
5099        let s3 = sa[(i + 3) as usize];
5100        sa[l as usize] = s3 & SAINT_MAX;
5101        l += SaSint::from(s3 < 0);
5102
5103        i += 4;
5104    }
5105
5106    j += 3;
5107    while i < j {
5108        let s = sa[i as usize];
5109        sa[l as usize] = s & SAINT_MAX;
5110        l += SaSint::from(s < 0);
5111        i += 1;
5112    }
5113
5114    l
5115}
5116
5117#[allow(dead_code)]
5118fn partial_sorting_gather_lms_suffixes_32s_4k_omp(
5119    sa: &mut [SaSint],
5120    n: SaSint,
5121    threads: SaSint,
5122    thread_state: &mut [ThreadState],
5123) {
5124    let n_usize = usize::try_from(n).expect("n must be non-negative");
5125    let thread_count = if threads > 1 && n >= 65_536 {
5126        usize::try_from(threads)
5127            .expect("threads must be non-negative")
5128            .min(thread_state.len())
5129            .max(1)
5130    } else {
5131        1
5132    };
5133
5134    if thread_count == 1 {
5135        let _ = partial_sorting_gather_lms_suffixes_32s_4k(sa, 0, n);
5136        return;
5137    }
5138
5139    let block_stride = (n_usize / thread_count) & !15usize;
5140    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
5141        let block_start = thread * block_stride;
5142        let block_size = if thread + 1 < thread_count {
5143            block_stride
5144        } else {
5145            n_usize - block_start
5146        };
5147        state.position = block_start as SaSint;
5148        state.count = partial_sorting_gather_lms_suffixes_32s_4k(
5149            sa,
5150            block_start as SaSint,
5151            block_size as SaSint,
5152        ) - block_start as SaSint;
5153    }
5154
5155    let mut position = 0usize;
5156    for (thread, state) in thread_state.iter().take(thread_count).enumerate() {
5157        let count = usize::try_from(state.count).expect("count must be non-negative");
5158        let src = usize::try_from(state.position).expect("position must be non-negative");
5159        if thread > 0 && count > 0 {
5160            sa.copy_within(src..src + count, position);
5161        }
5162        position += count;
5163    }
5164}
5165
5166#[allow(dead_code)]
5167fn partial_sorting_gather_lms_suffixes_32s_1k_omp(
5168    sa: &mut [SaSint],
5169    n: SaSint,
5170    threads: SaSint,
5171    thread_state: &mut [ThreadState],
5172) {
5173    let n_usize = usize::try_from(n).expect("n must be non-negative");
5174    let thread_count = if threads > 1 && n >= 65_536 {
5175        usize::try_from(threads)
5176            .expect("threads must be non-negative")
5177            .min(thread_state.len())
5178            .max(1)
5179    } else {
5180        1
5181    };
5182
5183    if thread_count == 1 {
5184        let _ = partial_sorting_gather_lms_suffixes_32s_1k(sa, 0, n);
5185        return;
5186    }
5187
5188    let block_stride = (n_usize / thread_count) & !15usize;
5189    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
5190        let block_start = thread * block_stride;
5191        let block_size = if thread + 1 < thread_count {
5192            block_stride
5193        } else {
5194            n_usize - block_start
5195        };
5196        state.position = block_start as SaSint;
5197        state.count = partial_sorting_gather_lms_suffixes_32s_1k(
5198            sa,
5199            block_start as SaSint,
5200            block_size as SaSint,
5201        ) - block_start as SaSint;
5202    }
5203
5204    let mut position = 0usize;
5205    for (thread, state) in thread_state.iter().take(thread_count).enumerate() {
5206        let count = usize::try_from(state.count).expect("count must be non-negative");
5207        let src = usize::try_from(state.position).expect("position must be non-negative");
5208        if thread > 0 && count > 0 {
5209            sa.copy_within(src..src + count, position);
5210        }
5211        position += count;
5212    }
5213}
5214
5215#[allow(dead_code)]
5216fn partial_gsa_scan_right_to_left_16u(
5217    t: &[u16],
5218    sa: &mut [SaSint],
5219    buckets: &mut [SaSint],
5220    mut d: SaSint,
5221    omp_block_start: SaSint,
5222    omp_block_size: SaSint,
5223) -> SaSint {
5224    let mut i = (omp_block_start + omp_block_size - 1) as isize;
5225    let mut j = (omp_block_start + 64 + 1) as isize;
5226    while i >= j {
5227        let mut p0 = sa[i as usize];
5228        d += SaSint::from(p0 < 0);
5229        p0 &= SAINT_MAX;
5230        let v0 = buckets_index2(
5231            t[(p0 - 1) as usize] as usize,
5232            usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
5233        );
5234        if v0 != 1 {
5235            let mark0 = if buckets[2 * ALPHABET_SIZE + v0] != d {
5236                SAINT_MIN
5237            } else {
5238                0
5239            };
5240            buckets[v0] -= 1;
5241            sa[buckets[v0] as usize] = (p0 - 1) | mark0;
5242            buckets[2 * ALPHABET_SIZE + v0] = d;
5243        }
5244
5245        let mut p1 = sa[(i - 1) as usize];
5246        d += SaSint::from(p1 < 0);
5247        p1 &= SAINT_MAX;
5248        let v1 = buckets_index2(
5249            t[(p1 - 1) as usize] as usize,
5250            usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
5251        );
5252        if v1 != 1 {
5253            let mark1 = if buckets[2 * ALPHABET_SIZE + v1] != d {
5254                SAINT_MIN
5255            } else {
5256                0
5257            };
5258            buckets[v1] -= 1;
5259            sa[buckets[v1] as usize] = (p1 - 1) | mark1;
5260            buckets[2 * ALPHABET_SIZE + v1] = d;
5261        }
5262
5263        i -= 2;
5264    }
5265
5266    j -= 64 + 1;
5267    while i >= j {
5268        let mut p = sa[i as usize];
5269        d += SaSint::from(p < 0);
5270        p &= SAINT_MAX;
5271        let v = buckets_index2(
5272            t[(p - 1) as usize] as usize,
5273            usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
5274        );
5275        if v != 1 {
5276            let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
5277                SAINT_MIN
5278            } else {
5279                0
5280            };
5281            buckets[v] -= 1;
5282            sa[buckets[v] as usize] = (p - 1) | mark;
5283            buckets[2 * ALPHABET_SIZE + v] = d;
5284        }
5285        i -= 1;
5286    }
5287
5288    d
5289}
5290
5291#[allow(dead_code)]
5292fn partial_gsa_scan_right_to_left_16u_block_omp(
5293    t: &[u16],
5294    sa: &mut [SaSint],
5295    k: SaSint,
5296    buckets: &mut [SaSint],
5297    d: SaSint,
5298    block_start: SaSint,
5299    block_size: SaSint,
5300    threads: SaSint,
5301    thread_state: &mut [ThreadState],
5302) -> SaSint {
5303    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
5304        usize::try_from(threads)
5305            .expect("threads must be non-negative")
5306            .min(thread_state.len())
5307    } else {
5308        1
5309    };
5310    if thread_count <= 1 {
5311        return partial_gsa_scan_right_to_left_16u(t, sa, buckets, d, block_start, block_size);
5312    }
5313
5314    let width = 2 * k as usize;
5315    let distinct_offset = 2 * ALPHABET_SIZE;
5316    let block_stride = (block_size / thread_count as SaSint) & !15;
5317
5318    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
5319        let local_start = thread as SaSint * block_stride;
5320        let local_size = if thread + 1 < thread_count {
5321            block_stride
5322        } else {
5323            block_size - local_start
5324        };
5325        let mut local_state = ThreadState::default();
5326        state.position = partial_sorting_scan_right_to_left_16u_block_prepare(
5327            t,
5328            sa,
5329            k,
5330            &mut state.buckets,
5331            &mut state.cache,
5332            block_start + local_start,
5333            local_size,
5334            &mut local_state,
5335        );
5336        state.count = local_state.cache_entries as SaSint;
5337    }
5338
5339    let mut next_d = d;
5340    for state in thread_state.iter_mut().take(thread_count).rev() {
5341        for c in 0..width {
5342            let a = buckets[c];
5343            let b = state.buckets[c];
5344            buckets[c] = a - b;
5345            state.buckets[c] = a;
5346        }
5347
5348        next_d -= 1;
5349        for c in 0..width {
5350            let offset = distinct_offset + c;
5351            let a = buckets[offset];
5352            let b = state.buckets[offset];
5353            let shifted = b + next_d;
5354            buckets[offset] = if b > 0 { shifted } else { a };
5355            state.buckets[offset] = a;
5356        }
5357        next_d += 1 + state.position;
5358        state.position = next_d - state.position;
5359    }
5360
5361    for state in thread_state.iter_mut().take(thread_count) {
5362        partial_gsa_scan_right_to_left_16u_block_place(
5363            sa,
5364            &mut state.buckets,
5365            &state.cache,
5366            state.count,
5367            state.position,
5368        );
5369    }
5370
5371    next_d
5372}
5373
5374#[allow(dead_code)]
5375fn partial_gsa_scan_right_to_left_16u_omp(
5376    t: &[u16],
5377    sa: &mut [SaSint],
5378    n: SaSint,
5379    k: SaSint,
5380    buckets: &mut [SaSint],
5381    first_lms_suffix: SaSint,
5382    left_suffixes_count: SaSint,
5383    d: SaSint,
5384    threads: SaSint,
5385) {
5386    let scan_start = left_suffixes_count + 1;
5387    let scan_end = n - first_lms_suffix;
5388
5389    if threads == 1 || scan_end - scan_start < 65536 {
5390        partial_gsa_scan_right_to_left_16u(t, sa, buckets, d, scan_start, scan_end - scan_start);
5391    } else {
5392        let mut d = d;
5393        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
5394        let mut block_start = scan_end - 1;
5395        while block_start >= scan_start {
5396            if sa[block_start as usize] == 0 {
5397                block_start -= 1;
5398            } else {
5399                let block_limit = threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
5400                let mut block_max_end = block_start - block_limit;
5401                if block_max_end < scan_start {
5402                    block_max_end = scan_start - 1;
5403                }
5404                let mut block_end = block_start - 1;
5405                while block_end > block_max_end && sa[block_end as usize] != 0 {
5406                    block_end -= 1;
5407                }
5408                let block_size = block_start - block_end;
5409
5410                if block_size < 32 {
5411                    while block_start > block_end {
5412                        let mut p = sa[block_start as usize];
5413                        d += SaSint::from(p < 0);
5414                        p &= SAINT_MAX;
5415                        let v = buckets_index2(
5416                            t[(p - 1) as usize] as usize,
5417                            usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
5418                        );
5419                        if v != 1 {
5420                            let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
5421                                SAINT_MIN
5422                            } else {
5423                                0
5424                            };
5425                            buckets[v] -= 1;
5426                            sa[buckets[v] as usize] = (p - 1) | mark;
5427                            buckets[2 * ALPHABET_SIZE + v] = d;
5428                        }
5429                        block_start -= 1;
5430                    }
5431                } else {
5432                    d = partial_gsa_scan_right_to_left_16u_block_omp(
5433                        t,
5434                        sa,
5435                        k,
5436                        buckets,
5437                        d,
5438                        block_end + 1,
5439                        block_size,
5440                        threads,
5441                        &mut thread_state,
5442                    );
5443                    block_start = block_end;
5444                }
5445            }
5446        }
5447    }
5448}
5449
5450#[allow(dead_code)]
5451fn partial_sorting_shift_markers_16u_omp(
5452    sa: &mut [SaSint],
5453    n: SaSint,
5454    buckets: &[SaSint],
5455    threads: SaSint,
5456) {
5457    let thread_count = if threads > 1 && n >= 65536 {
5458        usize::try_from(threads).expect("threads must be positive")
5459    } else {
5460        1
5461    };
5462    let c_step = buckets_index2(1, 0) as isize;
5463    let c_min = buckets_index2(1, 0) as isize;
5464    let c_max = buckets_index2(ALPHABET_SIZE - 1, 0) as isize;
5465    for t in 0..thread_count {
5466        let mut c = c_max - (t as isize * c_step);
5467        while c >= c_min {
5468            let c_usize = c as usize;
5469            let mut s = SAINT_MIN;
5470            let mut i = buckets[4 * ALPHABET_SIZE + c_usize] as isize - 1;
5471            let mut j = buckets[c_usize - buckets_index2(1, 0)] as isize + 3;
5472            while i >= j {
5473                let p0 = sa[i as usize];
5474                let q0 = (p0 & SAINT_MIN) ^ s;
5475                s ^= q0;
5476                sa[i as usize] = p0 ^ q0;
5477
5478                let p1 = sa[(i - 1) as usize];
5479                let q1 = (p1 & SAINT_MIN) ^ s;
5480                s ^= q1;
5481                sa[(i - 1) as usize] = p1 ^ q1;
5482
5483                let p2 = sa[(i - 2) as usize];
5484                let q2 = (p2 & SAINT_MIN) ^ s;
5485                s ^= q2;
5486                sa[(i - 2) as usize] = p2 ^ q2;
5487
5488                let p3 = sa[(i - 3) as usize];
5489                let q3 = (p3 & SAINT_MIN) ^ s;
5490                s ^= q3;
5491                sa[(i - 3) as usize] = p3 ^ q3;
5492
5493                i -= 4;
5494            }
5495
5496            j -= 3;
5497            while i >= j {
5498                let p = sa[i as usize];
5499                let q = (p & SAINT_MIN) ^ s;
5500                s ^= q;
5501                sa[i as usize] = p ^ q;
5502                i -= 1;
5503            }
5504
5505            c -= c_step * thread_count as isize;
5506        }
5507    }
5508}
5509
5510#[allow(dead_code)]
5511fn induce_partial_order_16u_omp(
5512    t: &[u16],
5513    sa: &mut [SaSint],
5514    n: SaSint,
5515    k: SaSint,
5516    flags: SaSint,
5517    buckets: &mut [SaSint],
5518    first_lms_suffix: SaSint,
5519    left_suffixes_count: SaSint,
5520    threads: SaSint,
5521) {
5522    buckets[2 * ALPHABET_SIZE..4 * ALPHABET_SIZE].fill(0);
5523
5524    if (flags & LIBSAIS_FLAGS_GSA) != 0 {
5525        let marker = 4 * ALPHABET_SIZE + buckets_index2(0, 1);
5526        buckets[marker] = buckets[4 * ALPHABET_SIZE + buckets_index2(1, 1)] - 1;
5527        flip_suffix_markers_omp(sa, buckets[marker], threads);
5528    }
5529
5530    let d = partial_sorting_scan_left_to_right_16u_omp(
5531        t,
5532        sa,
5533        n,
5534        k,
5535        buckets,
5536        left_suffixes_count,
5537        0,
5538        threads,
5539    );
5540    partial_sorting_shift_markers_16u_omp(sa, n, buckets, threads);
5541
5542    if (flags & LIBSAIS_FLAGS_GSA) != 0 {
5543        partial_gsa_scan_right_to_left_16u_omp(
5544            t,
5545            sa,
5546            n,
5547            k,
5548            buckets,
5549            first_lms_suffix,
5550            left_suffixes_count,
5551            d,
5552            threads,
5553        );
5554
5555        if t[first_lms_suffix as usize] == 0 {
5556            let count = (buckets[buckets_index2(1, 1)] - 1) as usize;
5557            sa.copy_within(0..count, 1);
5558            sa[0] = first_lms_suffix | SAINT_MIN;
5559        }
5560
5561        buckets[buckets_index2(0, 1)] = 0;
5562    } else {
5563        partial_sorting_scan_right_to_left_16u_omp(
5564            t,
5565            sa,
5566            n,
5567            k,
5568            buckets,
5569            first_lms_suffix,
5570            left_suffixes_count,
5571            d,
5572            threads,
5573        );
5574    }
5575}
5576
5577#[allow(dead_code)]
5578fn induce_partial_order_32s_6k_omp(
5579    t: &[SaSint],
5580    sa: &mut [SaSint],
5581    n: SaSint,
5582    k: SaSint,
5583    buckets: &mut [SaSint],
5584    first_lms_suffix: SaSint,
5585    left_suffixes_count: SaSint,
5586    threads: SaSint,
5587    thread_state: &mut [ThreadState],
5588) {
5589    let d = partial_sorting_scan_left_to_right_32s_6k_omp(
5590        t,
5591        sa,
5592        n,
5593        buckets,
5594        left_suffixes_count,
5595        0,
5596        threads,
5597        thread_state,
5598    );
5599    partial_sorting_shift_markers_32s_6k_omp(sa, k, buckets, threads);
5600    partial_sorting_shift_buckets_32s_6k(k, buckets);
5601    partial_sorting_scan_right_to_left_32s_6k_omp(
5602        t,
5603        sa,
5604        n,
5605        buckets,
5606        first_lms_suffix,
5607        left_suffixes_count,
5608        d,
5609        threads,
5610        thread_state,
5611    );
5612}
5613
5614#[allow(dead_code)]
5615fn induce_partial_order_32s_4k_omp(
5616    t: &[SaSint],
5617    sa: &mut [SaSint],
5618    n: SaSint,
5619    k: SaSint,
5620    buckets: &mut [SaSint],
5621    threads: SaSint,
5622    thread_state: &mut [ThreadState],
5623) {
5624    buckets[..2 * k as usize].fill(0);
5625    let d = partial_sorting_scan_left_to_right_32s_4k_omp(
5626        t,
5627        sa,
5628        n,
5629        k,
5630        buckets,
5631        0,
5632        threads,
5633        thread_state,
5634    );
5635    partial_sorting_shift_markers_32s_4k(sa, n);
5636    partial_sorting_scan_right_to_left_32s_4k_omp(t, sa, n, k, buckets, d, threads, thread_state);
5637    partial_sorting_gather_lms_suffixes_32s_4k_omp(sa, n, threads, thread_state);
5638}
5639
5640#[allow(dead_code)]
5641fn induce_partial_order_32s_2k_omp(
5642    t: &[SaSint],
5643    sa: &mut [SaSint],
5644    n: SaSint,
5645    k: SaSint,
5646    buckets: &mut [SaSint],
5647    threads: SaSint,
5648    thread_state: &mut [ThreadState],
5649) {
5650    let k = k as usize;
5651    let (left, right) = buckets.split_at_mut(k);
5652    partial_sorting_scan_left_to_right_32s_1k_omp(t, sa, n, right, threads, thread_state);
5653    partial_sorting_scan_right_to_left_32s_1k_omp(t, sa, n, left, threads, thread_state);
5654    partial_sorting_gather_lms_suffixes_32s_1k_omp(sa, n, threads, thread_state);
5655}
5656
5657#[allow(dead_code)]
5658fn induce_partial_order_32s_1k_omp(
5659    t: &[SaSint],
5660    sa: &mut [SaSint],
5661    n: SaSint,
5662    k: SaSint,
5663    buckets: &mut [SaSint],
5664    threads: SaSint,
5665    thread_state: &mut [ThreadState],
5666) {
5667    count_suffixes_32s(t, n, k, buckets);
5668    initialize_buckets_start_32s_1k(k, buckets);
5669    partial_sorting_scan_left_to_right_32s_1k_omp(t, sa, n, buckets, threads, thread_state);
5670
5671    count_suffixes_32s(t, n, k, buckets);
5672    initialize_buckets_end_32s_1k(k, buckets);
5673    partial_sorting_scan_right_to_left_32s_1k_omp(t, sa, n, buckets, threads, thread_state);
5674
5675    partial_sorting_gather_lms_suffixes_32s_1k_omp(sa, n, threads, thread_state);
5676}
5677
5678#[allow(dead_code)]
5679fn final_sorting_scan_left_to_right_16u(
5680    t: &[u16],
5681    sa: &mut [SaSint],
5682    induction_bucket: &mut [SaSint],
5683    omp_block_start: SaSint,
5684    omp_block_size: SaSint,
5685) {
5686    let mut i = omp_block_start as isize;
5687    let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
5688    while i < j {
5689        final_sorting_ltr_step(t, sa, induction_bucket, i as usize);
5690        final_sorting_ltr_step(t, sa, induction_bucket, (i + 1) as usize);
5691        i += 2;
5692    }
5693    j += 64 + 1;
5694    while i < j {
5695        final_sorting_ltr_step(t, sa, induction_bucket, i as usize);
5696        i += 1;
5697    }
5698}
5699
5700#[allow(dead_code)]
5701fn final_sorting_scan_right_to_left_16u(
5702    t: &[u16],
5703    sa: &mut [SaSint],
5704    induction_bucket: &mut [SaSint],
5705    omp_block_start: SaSint,
5706    omp_block_size: SaSint,
5707) {
5708    let mut i = (omp_block_start + omp_block_size - 1) as isize;
5709    let mut j = (omp_block_start + 64 + 1) as isize;
5710    while i >= j {
5711        final_sorting_rtl_step(t, sa, induction_bucket, i as usize, false);
5712        final_sorting_rtl_step(t, sa, induction_bucket, (i - 1) as usize, false);
5713        i -= 2;
5714    }
5715    j -= 64 + 1;
5716    while i >= j {
5717        final_sorting_rtl_step(t, sa, induction_bucket, i as usize, false);
5718        i -= 1;
5719    }
5720}
5721
5722#[allow(dead_code)]
5723fn final_sorting_scan_left_to_right_32s(
5724    t: &[SaSint],
5725    sa: &mut [SaSint],
5726    induction_bucket: &mut [SaSint],
5727    omp_block_start: SaSint,
5728    omp_block_size: SaSint,
5729) {
5730    let mut i = omp_block_start as isize;
5731    let mut j = (omp_block_start + omp_block_size - 2 * 64 - 1) as isize;
5732    while i < j {
5733        for current in [i, i + 1] {
5734            let current = current as usize;
5735            let mut p = sa[current];
5736            sa[current] = p ^ SAINT_MIN;
5737            if p > 0 {
5738                p -= 1;
5739                let p_usize = p as usize;
5740                let bucket = t[p_usize] as usize;
5741                let slot = induction_bucket[bucket] as usize;
5742                sa[slot] = p
5743                    | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
5744                        << (SAINT_BIT - 1));
5745                induction_bucket[bucket] += 1;
5746            }
5747        }
5748        i += 2;
5749    }
5750
5751    j += 2 * 64 + 1;
5752    while i < j {
5753        let current = i as usize;
5754        let mut p = sa[current];
5755        sa[current] = p ^ SAINT_MIN;
5756        if p > 0 {
5757            p -= 1;
5758            let p_usize = p as usize;
5759            let bucket = t[p_usize] as usize;
5760            let slot = induction_bucket[bucket] as usize;
5761            sa[slot] = p
5762                | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
5763                    << (SAINT_BIT - 1));
5764            induction_bucket[bucket] += 1;
5765        }
5766        i += 1;
5767    }
5768}
5769
5770#[allow(dead_code)]
5771fn final_sorting_scan_left_to_right_32s_block_gather(
5772    t: &[SaSint],
5773    sa: &mut [SaSint],
5774    cache: &mut [ThreadCache],
5775    omp_block_start: SaSint,
5776    omp_block_size: SaSint,
5777) {
5778    if omp_block_size <= 0 {
5779        return;
5780    }
5781
5782    let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5783    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5784    for offset in 0..size {
5785        let current = start + offset;
5786        let mut symbol = SAINT_MIN;
5787        let mut p = sa[current];
5788        sa[current] = p ^ SAINT_MIN;
5789        if p > 0 {
5790            p -= 1;
5791            let p_usize = p as usize;
5792            cache[offset].index = p
5793                | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
5794                    << (SAINT_BIT - 1));
5795            symbol = t[p_usize];
5796        }
5797        cache[offset].symbol = symbol;
5798    }
5799}
5800
5801#[allow(dead_code)]
5802fn final_sorting_scan_left_to_right_32s_block_sort(
5803    t: &[SaSint],
5804    induction_bucket: &mut [SaSint],
5805    cache: &mut [ThreadCache],
5806    omp_block_start: SaSint,
5807    omp_block_size: SaSint,
5808) {
5809    if omp_block_size <= 0 {
5810        return;
5811    }
5812
5813    let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5814    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5815    let block_end = start + size;
5816
5817    for offset in 0..size {
5818        let v = cache[offset].symbol;
5819        if v >= 0 {
5820            let bucket_index = v as usize;
5821            let target = induction_bucket[bucket_index];
5822            cache[offset].symbol = target;
5823            induction_bucket[bucket_index] += 1;
5824            if target >= omp_block_start && target < block_end as SaSint {
5825                let ni = usize::try_from(target - omp_block_start)
5826                    .expect("cache slot must be non-negative");
5827                let mut np = cache[offset].index;
5828                cache[offset].index = np ^ SAINT_MIN;
5829                if np > 0 {
5830                    np -= 1;
5831                    let np_usize = np as usize;
5832                    cache[ni].index = np
5833                        | ((usize::from(t[np_usize - usize::from(np > 0)] < t[np_usize])
5834                            as SaSint)
5835                            << (SAINT_BIT - 1));
5836                    cache[ni].symbol = t[np_usize];
5837                }
5838            }
5839        }
5840    }
5841}
5842
5843#[allow(dead_code)]
5844fn final_sorting_scan_left_to_right_32s_block_omp(
5845    t: &[SaSint],
5846    sa: &mut [SaSint],
5847    buckets: &mut [SaSint],
5848    cache: &mut [ThreadCache],
5849    block_start: SaSint,
5850    block_size: SaSint,
5851    threads: SaSint,
5852) {
5853    if threads <= 1 || block_size < 16_384 {
5854        final_sorting_scan_left_to_right_32s(t, sa, buckets, block_start, block_size);
5855        return;
5856    }
5857
5858    final_sorting_scan_left_to_right_32s_block_gather(t, sa, cache, block_start, block_size);
5859    final_sorting_scan_left_to_right_32s_block_sort(t, buckets, cache, block_start, block_size);
5860
5861    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
5862    let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
5863    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
5864    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
5865    for omp_thread_num in 0..omp_num_threads {
5866        let omp_block_start = omp_thread_num * omp_block_stride;
5867        let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5868            omp_block_stride
5869        } else {
5870            block_size_usize - omp_block_start
5871        };
5872        compact_and_place_cached_suffixes(
5873            sa,
5874            cache,
5875            omp_block_start as SaSint,
5876            omp_block_size as SaSint,
5877        );
5878    }
5879}
5880
5881#[allow(dead_code)]
5882fn final_sorting_scan_left_to_right_32s_omp(
5883    t: &[SaSint],
5884    sa: &mut [SaSint],
5885    n: SaSint,
5886    induction_bucket: &mut [SaSint],
5887    threads: SaSint,
5888    thread_state: &mut [ThreadState],
5889) {
5890    let last = (n - 1) as usize;
5891    let bucket = t[last] as usize;
5892    let slot = induction_bucket[bucket] as usize;
5893    sa[slot] = (n - 1) | ((usize::from(t[last - 1] < t[last]) as SaSint) << (SAINT_BIT - 1));
5894    induction_bucket[bucket] += 1;
5895
5896    if threads == 1 || n < 65536 || thread_state.is_empty() {
5897        final_sorting_scan_left_to_right_32s(t, sa, induction_bucket, 0, n);
5898        return;
5899    }
5900
5901    let threads_usize = usize::try_from(threads)
5902        .expect("threads must be non-negative")
5903        .max(1);
5904    let block_span = threads_usize * PER_THREAD_CACHE_SIZE;
5905    let mut cache = vec![ThreadCache::default(); block_span];
5906    let mut block_start = 0;
5907    while block_start < n {
5908        let block_end = (block_start + block_span as SaSint).min(n);
5909        final_sorting_scan_left_to_right_32s_block_omp(
5910            t,
5911            sa,
5912            induction_bucket,
5913            &mut cache,
5914            block_start,
5915            block_end - block_start,
5916            threads,
5917        );
5918        block_start = block_end;
5919    }
5920}
5921
5922#[allow(dead_code)]
5923fn final_sorting_scan_right_to_left_32s(
5924    t: &[SaSint],
5925    sa: &mut [SaSint],
5926    induction_bucket: &mut [SaSint],
5927    omp_block_start: SaSint,
5928    omp_block_size: SaSint,
5929) {
5930    let mut i = (omp_block_start + omp_block_size - 1) as isize;
5931    let mut j = (omp_block_start + 2 * 64 + 1) as isize;
5932    while i >= j {
5933        for current in [i, i - 1] {
5934            let current = current as usize;
5935            let mut p = sa[current];
5936            sa[current] = p & SAINT_MAX;
5937            if p > 0 {
5938                p -= 1;
5939                let p_usize = p as usize;
5940                let bucket = t[p_usize] as usize;
5941                induction_bucket[bucket] -= 1;
5942                let slot = induction_bucket[bucket] as usize;
5943                sa[slot] = p
5944                    | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
5945                        << (SAINT_BIT - 1));
5946            }
5947        }
5948        i -= 2;
5949    }
5950
5951    j -= 2 * 64 + 1;
5952    while i >= j {
5953        let current = i as usize;
5954        let mut p = sa[current];
5955        sa[current] = p & SAINT_MAX;
5956        if p > 0 {
5957            p -= 1;
5958            let p_usize = p as usize;
5959            let bucket = t[p_usize] as usize;
5960            induction_bucket[bucket] -= 1;
5961            let slot = induction_bucket[bucket] as usize;
5962            sa[slot] = p
5963                | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
5964                    << (SAINT_BIT - 1));
5965        }
5966        i -= 1;
5967    }
5968}
5969
5970#[allow(dead_code)]
5971fn final_sorting_scan_right_to_left_32s_block_gather(
5972    t: &[SaSint],
5973    sa: &mut [SaSint],
5974    cache: &mut [ThreadCache],
5975    omp_block_start: SaSint,
5976    omp_block_size: SaSint,
5977) {
5978    if omp_block_size <= 0 {
5979        return;
5980    }
5981
5982    let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5983    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5984    for offset in 0..size {
5985        let current = start + offset;
5986        let mut symbol = SAINT_MIN;
5987        let mut p = sa[current];
5988        sa[current] = p & SAINT_MAX;
5989        if p > 0 {
5990            p -= 1;
5991            let p_usize = p as usize;
5992            cache[offset].index = p
5993                | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
5994                    << (SAINT_BIT - 1));
5995            symbol = t[p_usize];
5996        }
5997        cache[offset].symbol = symbol;
5998    }
5999}
6000
6001#[allow(dead_code)]
6002fn final_sorting_scan_right_to_left_32s_block_sort(
6003    t: &[SaSint],
6004    induction_bucket: &mut [SaSint],
6005    cache: &mut [ThreadCache],
6006    omp_block_start: SaSint,
6007    omp_block_size: SaSint,
6008) {
6009    if omp_block_size <= 0 {
6010        return;
6011    }
6012
6013    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
6014    let block_end = omp_block_start + omp_block_size;
6015    let mut offset = size;
6016
6017    while offset > 0 {
6018        offset -= 1;
6019        let v = cache[offset].symbol;
6020        if v >= 0 {
6021            let bucket_index = v as usize;
6022            induction_bucket[bucket_index] -= 1;
6023            let target = induction_bucket[bucket_index];
6024            cache[offset].symbol = target;
6025            if target >= omp_block_start && target < block_end {
6026                let ni = usize::try_from(target - omp_block_start)
6027                    .expect("cache slot must be non-negative");
6028                let mut np = cache[offset].index;
6029                cache[offset].index = np & SAINT_MAX;
6030                if np > 0 {
6031                    np -= 1;
6032                    let np_usize = np as usize;
6033                    cache[ni].index = np
6034                        | ((usize::from(t[np_usize - usize::from(np > 0)] > t[np_usize])
6035                            as SaSint)
6036                            << (SAINT_BIT - 1));
6037                    cache[ni].symbol = t[np_usize];
6038                }
6039            }
6040        }
6041    }
6042}
6043
6044#[allow(dead_code)]
6045fn final_sorting_scan_right_to_left_32s_block_omp(
6046    t: &[SaSint],
6047    sa: &mut [SaSint],
6048    buckets: &mut [SaSint],
6049    cache: &mut [ThreadCache],
6050    block_start: SaSint,
6051    block_size: SaSint,
6052    threads: SaSint,
6053) {
6054    if threads <= 1 || block_size < 16_384 {
6055        final_sorting_scan_right_to_left_32s(t, sa, buckets, block_start, block_size);
6056        return;
6057    }
6058
6059    final_sorting_scan_right_to_left_32s_block_gather(t, sa, cache, block_start, block_size);
6060    final_sorting_scan_right_to_left_32s_block_sort(t, buckets, cache, block_start, block_size);
6061
6062    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
6063    let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
6064    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
6065    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
6066    for omp_thread_num in 0..omp_num_threads {
6067        let omp_block_start = omp_thread_num * omp_block_stride;
6068        let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6069            omp_block_stride
6070        } else {
6071            block_size_usize - omp_block_start
6072        };
6073        compact_and_place_cached_suffixes(
6074            sa,
6075            cache,
6076            omp_block_start as SaSint,
6077            omp_block_size as SaSint,
6078        );
6079    }
6080}
6081
6082#[allow(dead_code)]
6083fn final_sorting_scan_right_to_left_32s_omp(
6084    t: &[SaSint],
6085    sa: &mut [SaSint],
6086    n: SaSint,
6087    induction_bucket: &mut [SaSint],
6088    threads: SaSint,
6089    thread_state: &mut [ThreadState],
6090) {
6091    if threads == 1 || n < 65536 || thread_state.is_empty() {
6092        final_sorting_scan_right_to_left_32s(t, sa, induction_bucket, 0, n);
6093        return;
6094    }
6095
6096    let threads_usize = usize::try_from(threads)
6097        .expect("threads must be non-negative")
6098        .max(1);
6099    let block_span = threads_usize * PER_THREAD_CACHE_SIZE;
6100    let mut cache = vec![ThreadCache::default(); block_span];
6101    let mut block_start = n - 1;
6102    while block_start >= 0 {
6103        let block_end = (block_start - block_span as SaSint).max(-1);
6104        final_sorting_scan_right_to_left_32s_block_omp(
6105            t,
6106            sa,
6107            induction_bucket,
6108            &mut cache,
6109            block_end + 1,
6110            block_start - block_end,
6111            threads,
6112        );
6113        block_start = block_end;
6114    }
6115}
6116
6117#[allow(dead_code)]
6118fn induce_final_order_32s_6k(
6119    t: &[SaSint],
6120    sa: &mut [SaSint],
6121    n: SaSint,
6122    k: SaSint,
6123    buckets: &mut [SaSint],
6124    threads: SaSint,
6125    thread_state: &mut [ThreadState],
6126) {
6127    let k = k as usize;
6128    final_sorting_scan_left_to_right_32s_omp(
6129        t,
6130        sa,
6131        n,
6132        &mut buckets[4 * k..5 * k],
6133        threads,
6134        thread_state,
6135    );
6136    final_sorting_scan_right_to_left_32s_omp(
6137        t,
6138        sa,
6139        n,
6140        &mut buckets[5 * k..6 * k],
6141        threads,
6142        thread_state,
6143    );
6144}
6145
6146#[allow(dead_code)]
6147fn induce_final_order_32s_4k(
6148    t: &[SaSint],
6149    sa: &mut [SaSint],
6150    n: SaSint,
6151    k: SaSint,
6152    buckets: &mut [SaSint],
6153    threads: SaSint,
6154    thread_state: &mut [ThreadState],
6155) {
6156    let k = k as usize;
6157    final_sorting_scan_left_to_right_32s_omp(
6158        t,
6159        sa,
6160        n,
6161        &mut buckets[2 * k..3 * k],
6162        threads,
6163        thread_state,
6164    );
6165    final_sorting_scan_right_to_left_32s_omp(
6166        t,
6167        sa,
6168        n,
6169        &mut buckets[3 * k..4 * k],
6170        threads,
6171        thread_state,
6172    );
6173}
6174
6175#[allow(dead_code)]
6176fn induce_final_order_32s_2k(
6177    t: &[SaSint],
6178    sa: &mut [SaSint],
6179    n: SaSint,
6180    k: SaSint,
6181    buckets: &mut [SaSint],
6182    threads: SaSint,
6183    thread_state: &mut [ThreadState],
6184) {
6185    let k = k as usize;
6186    final_sorting_scan_left_to_right_32s_omp(
6187        t,
6188        sa,
6189        n,
6190        &mut buckets[k..2 * k],
6191        threads,
6192        thread_state,
6193    );
6194    final_sorting_scan_right_to_left_32s_omp(t, sa, n, &mut buckets[..k], threads, thread_state);
6195}
6196
6197#[allow(dead_code)]
6198fn induce_final_order_32s_1k(
6199    t: &[SaSint],
6200    sa: &mut [SaSint],
6201    n: SaSint,
6202    k: SaSint,
6203    buckets: &mut [SaSint],
6204    threads: SaSint,
6205    thread_state: &mut [ThreadState],
6206) {
6207    count_suffixes_32s(t, n, k, buckets);
6208    initialize_buckets_start_32s_1k(k, buckets);
6209    final_sorting_scan_left_to_right_32s_omp(t, sa, n, buckets, threads, thread_state);
6210
6211    count_suffixes_32s(t, n, k, buckets);
6212    initialize_buckets_end_32s_1k(k, buckets);
6213    final_sorting_scan_right_to_left_32s_omp(t, sa, n, buckets, threads, thread_state);
6214}
6215
6216#[allow(dead_code)]
6217fn clear_lms_suffixes_omp(
6218    sa: &mut [SaSint],
6219    n: SaSint,
6220    k: SaSint,
6221    bucket_start: &[SaSint],
6222    bucket_end: &[SaSint],
6223    threads: SaSint,
6224) {
6225    let k_usize = usize::try_from(k).expect("k must be non-negative");
6226    let thread_count = if threads > 1 && n >= 65536 {
6227        usize::try_from(threads).expect("threads must be positive")
6228    } else {
6229        1
6230    };
6231    for t in 0..thread_count {
6232        let mut c = t;
6233        while c < k_usize {
6234            if bucket_end[c] > bucket_start[c] {
6235                let start = bucket_start[c] as usize;
6236                let end = bucket_end[c] as usize;
6237                sa[start..end].fill(0);
6238            }
6239            c += thread_count;
6240        }
6241    }
6242}
6243
6244#[allow(dead_code)]
6245fn final_gsa_scan_right_to_left_16u(
6246    t: &[u16],
6247    sa: &mut [SaSint],
6248    induction_bucket: &mut [SaSint],
6249    omp_block_start: SaSint,
6250    omp_block_size: SaSint,
6251) {
6252    let mut i = (omp_block_start + omp_block_size - 1) as isize;
6253    let mut j = (omp_block_start + 64 + 1) as isize;
6254    while i >= j {
6255        final_sorting_rtl_step(t, sa, induction_bucket, i as usize, true);
6256        final_sorting_rtl_step(t, sa, induction_bucket, (i - 1) as usize, true);
6257        i -= 2;
6258    }
6259    j -= 64 + 1;
6260    while i >= j {
6261        final_sorting_rtl_step(t, sa, induction_bucket, i as usize, true);
6262        i -= 1;
6263    }
6264}
6265
6266#[allow(dead_code)]
6267fn final_sorting_ltr_step(
6268    t: &[u16],
6269    sa: &mut [SaSint],
6270    induction_bucket: &mut [SaSint],
6271    index: usize,
6272) {
6273    let mut p = sa[index];
6274    sa[index] = p ^ SAINT_MIN;
6275    if p > 0 {
6276        p -= 1;
6277        let c = t[p as usize] as usize;
6278        let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
6279            SAINT_MIN
6280        } else {
6281            0
6282        };
6283        let dst = induction_bucket[c] as usize;
6284        sa[dst] = p | mark;
6285        induction_bucket[c] += 1;
6286    }
6287}
6288
6289#[allow(dead_code)]
6290fn final_sorting_rtl_step(
6291    t: &[u16],
6292    sa: &mut [SaSint],
6293    induction_bucket: &mut [SaSint],
6294    index: usize,
6295    gsa: bool,
6296) {
6297    let mut p = sa[index];
6298    sa[index] = p & SAINT_MAX;
6299    if p > 0 && (!gsa || t[(p - 1) as usize] > 0) {
6300        p -= 1;
6301        let c = t[p as usize] as usize;
6302        let mark = if t[(p - SaSint::from(p > 0)) as usize] > t[p as usize] {
6303            SAINT_MIN
6304        } else {
6305            0
6306        };
6307        induction_bucket[c] -= 1;
6308        sa[induction_bucket[c] as usize] = p | mark;
6309    }
6310}
6311
6312#[allow(dead_code)]
6313fn final_bwt_scan_left_to_right_16u(
6314    t: &[u16],
6315    sa: &mut [SaSint],
6316    induction_bucket: &mut [SaSint],
6317    omp_block_start: SaSint,
6318    omp_block_size: SaSint,
6319) {
6320    let mut i = omp_block_start as isize;
6321    let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
6322    while i < j {
6323        final_bwt_ltr_step(t, sa, induction_bucket, i as usize);
6324        final_bwt_ltr_step(t, sa, induction_bucket, (i + 1) as usize);
6325        i += 2;
6326    }
6327    j += 64 + 1;
6328    while i < j {
6329        final_bwt_ltr_step(t, sa, induction_bucket, i as usize);
6330        i += 1;
6331    }
6332}
6333
6334#[allow(dead_code)]
6335fn final_bwt_scan_right_to_left_16u(
6336    t: &[u16],
6337    sa: &mut [SaSint],
6338    induction_bucket: &mut [SaSint],
6339    omp_block_start: SaSint,
6340    omp_block_size: SaSint,
6341) -> SaSint {
6342    let mut index = -1;
6343    let mut i = (omp_block_start + omp_block_size - 1) as isize;
6344    let mut j = (omp_block_start + 64 + 1) as isize;
6345    while i >= j {
6346        final_bwt_rtl_step(t, sa, induction_bucket, i as usize, &mut index);
6347        final_bwt_rtl_step(t, sa, induction_bucket, (i - 1) as usize, &mut index);
6348        i -= 2;
6349    }
6350    j -= 64 + 1;
6351    while i >= j {
6352        final_bwt_rtl_step(t, sa, induction_bucket, i as usize, &mut index);
6353        i -= 1;
6354    }
6355    index
6356}
6357
6358#[allow(dead_code)]
6359fn final_bwt_aux_scan_left_to_right_16u(
6360    t: &[u16],
6361    sa: &mut [SaSint],
6362    rm: SaSint,
6363    i_sample: &mut [SaSint],
6364    induction_bucket: &mut [SaSint],
6365    omp_block_start: SaSint,
6366    omp_block_size: SaSint,
6367) {
6368    let mut i = omp_block_start as isize;
6369    let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
6370    while i < j {
6371        final_bwt_aux_ltr_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6372        final_bwt_aux_ltr_step(t, sa, rm, i_sample, induction_bucket, (i + 1) as usize);
6373        i += 2;
6374    }
6375    j += 64 + 1;
6376    while i < j {
6377        final_bwt_aux_ltr_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6378        i += 1;
6379    }
6380}
6381
6382#[allow(dead_code)]
6383fn final_bwt_aux_scan_right_to_left_16u(
6384    t: &[u16],
6385    sa: &mut [SaSint],
6386    rm: SaSint,
6387    i_sample: &mut [SaSint],
6388    induction_bucket: &mut [SaSint],
6389    omp_block_start: SaSint,
6390    omp_block_size: SaSint,
6391) {
6392    let mut i = (omp_block_start + omp_block_size - 1) as isize;
6393    let mut j = (omp_block_start + 64 + 1) as isize;
6394    while i >= j {
6395        final_bwt_aux_rtl_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6396        final_bwt_aux_rtl_step(t, sa, rm, i_sample, induction_bucket, (i - 1) as usize);
6397        i -= 2;
6398    }
6399    j -= 64 + 1;
6400    while i >= j {
6401        final_bwt_aux_rtl_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6402        i -= 1;
6403    }
6404}
6405
6406#[allow(dead_code)]
6407fn renumber_lms_suffixes_16u(
6408    sa: &mut [SaSint],
6409    m: SaSint,
6410    mut name: SaSint,
6411    omp_block_start: SaSint,
6412    omp_block_size: SaSint,
6413) -> SaSint {
6414    let mut i = omp_block_start as isize;
6415    let mut j = (omp_block_start + omp_block_size - 64 - 3) as isize;
6416    while i < j {
6417        let p0 = sa[i as usize];
6418        sa[m as usize + ((p0 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6419        name += SaSint::from(p0 < 0);
6420
6421        let p1 = sa[(i + 1) as usize];
6422        sa[m as usize + ((p1 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6423        name += SaSint::from(p1 < 0);
6424
6425        let p2 = sa[(i + 2) as usize];
6426        sa[m as usize + ((p2 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6427        name += SaSint::from(p2 < 0);
6428
6429        let p3 = sa[(i + 3) as usize];
6430        sa[m as usize + ((p3 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6431        name += SaSint::from(p3 < 0);
6432
6433        i += 4;
6434    }
6435
6436    j += 64 + 3;
6437    while i < j {
6438        let p = sa[i as usize];
6439        sa[m as usize + ((p & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6440        name += SaSint::from(p < 0);
6441        i += 1;
6442    }
6443
6444    name
6445}
6446
6447#[allow(dead_code)]
6448fn renumber_lms_suffixes_16u_omp(
6449    sa: &mut [SaSint],
6450    m: SaSint,
6451    threads: SaSint,
6452    thread_state: &mut [ThreadState],
6453) -> SaSint {
6454    if threads == 1 || m < 65_536 || thread_state.is_empty() {
6455        return renumber_lms_suffixes_16u(sa, m, 0, 0, m);
6456    }
6457
6458    let thread_count = usize::try_from(threads)
6459        .expect("threads must be non-negative")
6460        .min(thread_state.len());
6461    let block_stride = (m / thread_count as SaSint) & !15;
6462
6463    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
6464        let block_start = thread as SaSint * block_stride;
6465        let block_size = if thread + 1 < thread_count {
6466            block_stride
6467        } else {
6468            m - block_start
6469        };
6470        state.count = count_negative_marked_suffixes(sa, block_start, block_size);
6471    }
6472
6473    let mut name = 0;
6474    for thread in 0..thread_count {
6475        let block_start = thread as SaSint * block_stride;
6476        let block_size = if thread + 1 < thread_count {
6477            block_stride
6478        } else {
6479            m - block_start
6480        };
6481        renumber_lms_suffixes_16u(sa, m, name, block_start, block_size);
6482        name += thread_state[thread].count;
6483    }
6484
6485    name
6486}
6487
6488#[allow(dead_code)]
6489fn gather_marked_lms_suffixes(
6490    sa: &mut [SaSint],
6491    m: SaSint,
6492    mut l: isize,
6493    omp_block_start: isize,
6494    omp_block_size: isize,
6495) -> isize {
6496    if omp_block_size <= 0 {
6497        return l;
6498    }
6499
6500    l -= 1;
6501    let mut i = m as isize + omp_block_start + omp_block_size - 1;
6502    let mut j = m as isize + omp_block_start + 3;
6503    while i >= j {
6504        let s0 = sa[i as usize];
6505        sa[l as usize] = s0 & SAINT_MAX;
6506        l -= isize::from(s0 < 0);
6507
6508        let s1 = sa[(i - 1) as usize];
6509        sa[l as usize] = s1 & SAINT_MAX;
6510        l -= isize::from(s1 < 0);
6511
6512        let s2 = sa[(i - 2) as usize];
6513        sa[l as usize] = s2 & SAINT_MAX;
6514        l -= isize::from(s2 < 0);
6515
6516        let s3 = sa[(i - 3) as usize];
6517        sa[l as usize] = s3 & SAINT_MAX;
6518        l -= isize::from(s3 < 0);
6519
6520        i -= 4;
6521    }
6522
6523    j -= 3;
6524    while i >= j {
6525        let s = sa[i as usize];
6526        sa[l as usize] = s & SAINT_MAX;
6527        l -= isize::from(s < 0);
6528        i -= 1;
6529    }
6530
6531    l + 1
6532}
6533
6534#[allow(dead_code)]
6535fn gather_marked_lms_suffixes_omp(
6536    sa: &mut [SaSint],
6537    n: SaSint,
6538    m: SaSint,
6539    fs: SaSint,
6540    threads: SaSint,
6541    thread_state: &mut [ThreadState],
6542) {
6543    let half_n = n >> 1;
6544    if threads == 1 || n < 131_072 || thread_state.is_empty() {
6545        let _ = gather_marked_lms_suffixes(sa, m, (n + fs) as isize, 0, half_n as isize);
6546        return;
6547    }
6548
6549    let thread_count = usize::try_from(threads)
6550        .expect("threads must be non-negative")
6551        .min(thread_state.len());
6552    let block_stride = (half_n / thread_count as SaSint) & !15;
6553
6554    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
6555        let block_start = thread as SaSint * block_stride;
6556        let block_size = if thread + 1 < thread_count {
6557            block_stride
6558        } else {
6559            half_n - block_start
6560        };
6561        let local_end = if thread + 1 < thread_count {
6562            m + block_start + block_size
6563        } else {
6564            n + fs
6565        } as isize;
6566        let gathered_position =
6567            gather_marked_lms_suffixes(sa, m, local_end, block_start as isize, block_size as isize);
6568        state.position = gathered_position as SaSint;
6569        state.count = (local_end - gathered_position) as SaSint;
6570    }
6571
6572    let mut position = (n + fs) as isize;
6573    for thread in (0..thread_count).rev() {
6574        let count =
6575            usize::try_from(thread_state[thread].count).expect("count must be non-negative");
6576        position -= thread_state[thread].count as isize;
6577        if thread + 1 != thread_count && count > 0 {
6578            let src = usize::try_from(thread_state[thread].position)
6579                .expect("position must be non-negative");
6580            let dst = position as usize;
6581            sa.copy_within(src..src + count, dst);
6582        }
6583    }
6584}
6585
6586#[allow(dead_code)]
6587fn renumber_and_gather_lms_suffixes_omp(
6588    sa: &mut [SaSint],
6589    n: SaSint,
6590    m: SaSint,
6591    fs: SaSint,
6592    threads: SaSint,
6593    thread_state: &mut [ThreadState],
6594) -> SaSint {
6595    let m_usize = m as usize;
6596    let half_n = (n >> 1) as usize;
6597    sa[m_usize..m_usize + half_n].fill(0);
6598
6599    let name = renumber_lms_suffixes_16u_omp(sa, m, threads, thread_state);
6600    if name < m {
6601        gather_marked_lms_suffixes_omp(sa, n, m, fs, threads, thread_state);
6602    } else {
6603        for item in &mut sa[..m_usize] {
6604            *item &= SAINT_MAX;
6605        }
6606    }
6607
6608    name
6609}
6610
6611#[allow(dead_code)]
6612fn reconstruct_lms_suffixes(
6613    sa: &mut [SaSint],
6614    n: SaSint,
6615    m: SaSint,
6616    omp_block_start: isize,
6617    omp_block_size: isize,
6618) {
6619    if omp_block_size <= 0 {
6620        return;
6621    }
6622
6623    let base = (n - m) as usize;
6624    let mut i = omp_block_start;
6625    let mut j = omp_block_start + omp_block_size - 64 - 3;
6626    while i < j {
6627        let iu = i as usize;
6628        let s0 = sa[iu] as usize;
6629        let s1 = sa[iu + 1] as usize;
6630        let s2 = sa[iu + 2] as usize;
6631        let s3 = sa[iu + 3] as usize;
6632        sa[iu] = sa[base + s0];
6633        sa[iu + 1] = sa[base + s1];
6634        sa[iu + 2] = sa[base + s2];
6635        sa[iu + 3] = sa[base + s3];
6636        i += 4;
6637    }
6638
6639    j += 64 + 3;
6640    while i < j {
6641        let iu = i as usize;
6642        let s = sa[iu] as usize;
6643        sa[iu] = sa[base + s];
6644        i += 1;
6645    }
6646}
6647
6648#[allow(dead_code)]
6649fn reconstruct_lms_suffixes_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6650    if threads == 1 || m < 65_536 {
6651        reconstruct_lms_suffixes(sa, n, m, 0, m as isize);
6652        return;
6653    }
6654
6655    let thread_count = threads as usize;
6656    let block_stride = (m / threads) & !15;
6657    for thread in 0..thread_count {
6658        let block_start = thread as SaSint * block_stride;
6659        let block_size = if thread + 1 < thread_count {
6660            block_stride
6661        } else {
6662            m - block_start
6663        };
6664        reconstruct_lms_suffixes(sa, n, m, block_start as isize, block_size as isize);
6665    }
6666}
6667
6668#[allow(dead_code)]
6669fn renumber_distinct_lms_suffixes_32s_4k(
6670    sa: &mut [SaSint],
6671    m: SaSint,
6672    mut name: SaSint,
6673    omp_block_start: isize,
6674    omp_block_size: isize,
6675) -> SaSint {
6676    if omp_block_size <= 0 {
6677        return name;
6678    }
6679
6680    let m_usize = m as usize;
6681    let start = omp_block_start as usize;
6682    let size = omp_block_size as usize;
6683    let (sa_head, sam) = sa.split_at_mut(m_usize);
6684    let mut i = start;
6685    let mut j = start + size.saturating_sub(64 + 3);
6686    let mut p3 = 0;
6687
6688    while i < j {
6689        let p0 = sa_head[i];
6690        sa_head[i] = p0 & SAINT_MAX;
6691        sam[(sa_head[i] >> 1) as usize] = name | (p0 & p3 & SAINT_MIN);
6692        name += SaSint::from(p0 < 0);
6693
6694        let p1 = sa_head[i + 1];
6695        sa_head[i + 1] = p1 & SAINT_MAX;
6696        sam[(sa_head[i + 1] >> 1) as usize] = name | (p1 & p0 & SAINT_MIN);
6697        name += SaSint::from(p1 < 0);
6698
6699        let p2 = sa_head[i + 2];
6700        sa_head[i + 2] = p2 & SAINT_MAX;
6701        sam[(sa_head[i + 2] >> 1) as usize] = name | (p2 & p1 & SAINT_MIN);
6702        name += SaSint::from(p2 < 0);
6703
6704        p3 = sa_head[i + 3];
6705        sa_head[i + 3] = p3 & SAINT_MAX;
6706        sam[(sa_head[i + 3] >> 1) as usize] = name | (p3 & p2 & SAINT_MIN);
6707        name += SaSint::from(p3 < 0);
6708
6709        i += 4;
6710    }
6711
6712    j = start + size;
6713    while i < j {
6714        let p2 = p3;
6715        p3 = sa_head[i];
6716        sa_head[i] = p3 & SAINT_MAX;
6717        sam[(sa_head[i] >> 1) as usize] = name | (p3 & p2 & SAINT_MIN);
6718        name += SaSint::from(p3 < 0);
6719        i += 1;
6720    }
6721
6722    name
6723}
6724
6725#[allow(dead_code)]
6726fn mark_distinct_lms_suffixes_32s(
6727    sa: &mut [SaSint],
6728    m: SaSint,
6729    omp_block_start: isize,
6730    omp_block_size: isize,
6731) {
6732    if omp_block_size <= 0 {
6733        return;
6734    }
6735
6736    let mut i = m as usize + omp_block_start as usize;
6737    let mut j = i + (omp_block_size as usize).saturating_sub(3);
6738    let mut p3 = 0;
6739    while i < j {
6740        let mut p0 = sa[i];
6741        sa[i] = p0 & (p3 | SAINT_MAX);
6742        p0 = if p0 == 0 { p3 } else { p0 };
6743
6744        let mut p1 = sa[i + 1];
6745        sa[i + 1] = p1 & (p0 | SAINT_MAX);
6746        p1 = if p1 == 0 { p0 } else { p1 };
6747
6748        let mut p2 = sa[i + 2];
6749        sa[i + 2] = p2 & (p1 | SAINT_MAX);
6750        p2 = if p2 == 0 { p1 } else { p2 };
6751
6752        p3 = sa[i + 3];
6753        sa[i + 3] = p3 & (p2 | SAINT_MAX);
6754        p3 = if p3 == 0 { p2 } else { p3 };
6755        i += 4;
6756    }
6757
6758    j = m as usize + omp_block_start as usize + omp_block_size as usize;
6759    while i < j {
6760        let p2 = p3;
6761        p3 = sa[i];
6762        sa[i] = p3 & (p2 | SAINT_MAX);
6763        p3 = if p3 == 0 { p2 } else { p3 };
6764        i += 1;
6765    }
6766}
6767
6768#[allow(dead_code)]
6769fn clamp_lms_suffixes_length_32s(
6770    sa: &mut [SaSint],
6771    m: SaSint,
6772    omp_block_start: isize,
6773    omp_block_size: isize,
6774) {
6775    if omp_block_size <= 0 {
6776        return;
6777    }
6778
6779    let mut i = m as usize + omp_block_start as usize;
6780    let mut j = i + (omp_block_size as usize).saturating_sub(3);
6781    while i < j {
6782        let s0 = sa[i];
6783        sa[i] = if s0 < 0 { s0 } else { 0 } & SAINT_MAX;
6784
6785        let s1 = sa[i + 1];
6786        sa[i + 1] = if s1 < 0 { s1 } else { 0 } & SAINT_MAX;
6787
6788        let s2 = sa[i + 2];
6789        sa[i + 2] = if s2 < 0 { s2 } else { 0 } & SAINT_MAX;
6790
6791        let s3 = sa[i + 3];
6792        sa[i + 3] = if s3 < 0 { s3 } else { 0 } & SAINT_MAX;
6793
6794        i += 4;
6795    }
6796
6797    j = m as usize + omp_block_start as usize + omp_block_size as usize;
6798    while i < j {
6799        let s = sa[i];
6800        sa[i] = if s < 0 { s } else { 0 } & SAINT_MAX;
6801        i += 1;
6802    }
6803}
6804
6805#[allow(dead_code)]
6806fn renumber_distinct_lms_suffixes_32s_4k_omp(
6807    sa: &mut [SaSint],
6808    m: SaSint,
6809    threads: SaSint,
6810    thread_state: &mut [ThreadState],
6811) -> SaSint {
6812    if threads == 1 || m < 65_536 || thread_state.is_empty() {
6813        return renumber_distinct_lms_suffixes_32s_4k(sa, m, 1, 0, m as isize) - 1;
6814    }
6815
6816    let thread_count = usize::try_from(threads)
6817        .expect("threads must be non-negative")
6818        .min(thread_state.len());
6819    let block_stride = (m / thread_count as SaSint) & !15;
6820
6821    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
6822        let block_start = thread as SaSint * block_stride;
6823        let block_size = if thread + 1 < thread_count {
6824            block_stride
6825        } else {
6826            m - block_start
6827        };
6828        state.count = count_negative_marked_suffixes(sa, block_start, block_size);
6829    }
6830
6831    let mut count = 1;
6832    for thread in 0..thread_count {
6833        let block_start = thread as SaSint * block_stride;
6834        let block_size = if thread + 1 < thread_count {
6835            block_stride
6836        } else {
6837            m - block_start
6838        };
6839        renumber_distinct_lms_suffixes_32s_4k(
6840            sa,
6841            m,
6842            count,
6843            block_start as isize,
6844            block_size as isize,
6845        );
6846        count += thread_state[thread].count;
6847    }
6848
6849    count - 1
6850}
6851
6852#[allow(dead_code)]
6853fn mark_distinct_lms_suffixes_32s_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6854    let half_n = n >> 1;
6855    if threads == 1 || n < 131_072 {
6856        mark_distinct_lms_suffixes_32s(sa, m, 0, half_n as isize);
6857        return;
6858    }
6859
6860    let thread_count = threads as usize;
6861    let block_stride = (half_n / threads) & !15;
6862    for thread in 0..thread_count {
6863        let block_start = thread as SaSint * block_stride;
6864        let block_size = if thread + 1 < thread_count {
6865            block_stride
6866        } else {
6867            half_n - block_start
6868        };
6869        mark_distinct_lms_suffixes_32s(sa, m, block_start as isize, block_size as isize);
6870    }
6871}
6872
6873#[allow(dead_code)]
6874fn clamp_lms_suffixes_length_32s_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6875    let half_n = n >> 1;
6876    if threads == 1 || n < 131_072 {
6877        clamp_lms_suffixes_length_32s(sa, m, 0, half_n as isize);
6878        return;
6879    }
6880
6881    let thread_count = threads as usize;
6882    let block_stride = (half_n / threads) & !15;
6883    for thread in 0..thread_count {
6884        let block_start = thread as SaSint * block_stride;
6885        let block_size = if thread + 1 < thread_count {
6886            block_stride
6887        } else {
6888            half_n - block_start
6889        };
6890        clamp_lms_suffixes_length_32s(sa, m, block_start as isize, block_size as isize);
6891    }
6892}
6893
6894#[allow(dead_code)]
6895fn renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
6896    sa: &mut [SaSint],
6897    n: SaSint,
6898    m: SaSint,
6899    threads: SaSint,
6900    thread_state: &mut [ThreadState],
6901) -> SaSint {
6902    let m_usize = m as usize;
6903    let half_n = (n >> 1) as usize;
6904    sa[m_usize..m_usize + half_n].fill(0);
6905
6906    let name = renumber_distinct_lms_suffixes_32s_4k_omp(sa, m, threads, thread_state);
6907    if name < m {
6908        mark_distinct_lms_suffixes_32s_omp(sa, n, m, threads);
6909    }
6910
6911    name
6912}
6913
6914#[allow(dead_code)]
6915fn renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
6916    t: &[SaSint],
6917    sa: &mut [SaSint],
6918    n: SaSint,
6919    m: SaSint,
6920    threads: SaSint,
6921) -> SaSint {
6922    let m_usize = m as usize;
6923    let n_usize = n as usize;
6924
6925    gather_lms_suffixes_32s(t, sa, n);
6926    sa[m_usize..n_usize - m_usize].fill(0);
6927
6928    let mut i = n - m;
6929    let mut j = n - 1 - 64 - 3;
6930    while i < j {
6931        let s0 = (sa[i as usize] as SaUint >> 1) as usize;
6932        let s1 = (sa[(i + 1) as usize] as SaUint >> 1) as usize;
6933        let s2 = (sa[(i + 2) as usize] as SaUint >> 1) as usize;
6934        let s3 = (sa[(i + 3) as usize] as SaUint >> 1) as usize;
6935        sa[m_usize + s0] = sa[(i + 1) as usize] - sa[i as usize] + 1 + SAINT_MIN;
6936        sa[m_usize + s1] = sa[(i + 2) as usize] - sa[(i + 1) as usize] + 1 + SAINT_MIN;
6937        sa[m_usize + s2] = sa[(i + 3) as usize] - sa[(i + 2) as usize] + 1 + SAINT_MIN;
6938        sa[m_usize + s3] = sa[(i + 4) as usize] - sa[(i + 3) as usize] + 1 + SAINT_MIN;
6939        i += 4;
6940    }
6941
6942    j += 64 + 3;
6943    while i < j {
6944        let s = (sa[i as usize] as SaUint >> 1) as usize;
6945        sa[m_usize + s] = sa[(i + 1) as usize] - sa[i as usize] + 1 + SAINT_MIN;
6946        i += 1;
6947    }
6948
6949    let tail = (sa[n_usize - 1] as SaUint >> 1) as usize;
6950    sa[m_usize + tail] = 1 + SAINT_MIN;
6951
6952    clamp_lms_suffixes_length_32s_omp(sa, n, m, threads);
6953
6954    let mut name = 1;
6955    if m_usize > 0 {
6956        let mut i = 1usize;
6957        let mut j = m_usize.saturating_sub(64 + 1);
6958        let mut p = sa[0] as usize;
6959        let mut plen = sa[m_usize + (p >> 1)];
6960        let mut pdiff = SAINT_MIN;
6961
6962        while i < j {
6963            let q = sa[i] as usize;
6964            let qlen = sa[m_usize + (q >> 1)];
6965            let mut qdiff = SAINT_MIN;
6966            if plen == qlen {
6967                let mut l = 0;
6968                while l < qlen as usize {
6969                    if t[p + l] != t[q + l] {
6970                        break;
6971                    }
6972                    l += 1;
6973                }
6974                qdiff = ((l as SaSint) - qlen) & SAINT_MIN;
6975            }
6976            sa[m_usize + (p >> 1)] = name | (pdiff & qdiff);
6977            name += SaSint::from(qdiff < 0);
6978
6979            p = sa[i + 1] as usize;
6980            plen = sa[m_usize + (p >> 1)];
6981            pdiff = SAINT_MIN;
6982            if qlen == plen {
6983                let mut l = 0;
6984                while l < plen as usize {
6985                    if t[q + l] != t[p + l] {
6986                        break;
6987                    }
6988                    l += 1;
6989                }
6990                pdiff = ((l as SaSint) - plen) & SAINT_MIN;
6991            }
6992            sa[m_usize + (q >> 1)] = name | (qdiff & pdiff);
6993            name += SaSint::from(pdiff < 0);
6994            i += 2;
6995        }
6996
6997        j = m_usize;
6998        while i < j {
6999            let q = sa[i] as usize;
7000            let qlen = sa[m_usize + (q >> 1)];
7001            let mut qdiff = SAINT_MIN;
7002            if plen == qlen {
7003                let mut l = 0;
7004                while l < plen as usize {
7005                    if t[p + l] != t[q + l] {
7006                        break;
7007                    }
7008                    l += 1;
7009                }
7010                qdiff = ((l as SaSint) - plen) & SAINT_MIN;
7011            }
7012            sa[m_usize + (p >> 1)] = name | (pdiff & qdiff);
7013            name += SaSint::from(qdiff < 0);
7014            p = q;
7015            plen = qlen;
7016            pdiff = qdiff;
7017            i += 1;
7018        }
7019
7020        sa[m_usize + (p >> 1)] = name | pdiff;
7021        name += 1;
7022    }
7023
7024    if name <= m {
7025        mark_distinct_lms_suffixes_32s_omp(sa, n, m, threads);
7026    }
7027
7028    name - 1
7029}
7030
7031#[allow(dead_code)]
7032fn renumber_unique_and_nonunique_lms_suffixes_32s(
7033    t: &mut [SaSint],
7034    sa: &mut [SaSint],
7035    m: SaSint,
7036    mut f: SaSint,
7037    omp_block_start: isize,
7038    omp_block_size: isize,
7039) -> SaSint {
7040    if omp_block_size <= 0 {
7041        return f;
7042    }
7043
7044    let m_usize = m as usize;
7045    let (sa_head, sam) = sa.split_at_mut(m_usize);
7046    let mut i = omp_block_start;
7047    let mut j = omp_block_start + omp_block_size - 128 - 3;
7048    while i < j {
7049        for offset in 0..4 {
7050            let idx = (i + offset) as usize;
7051            let p = sa_head[idx] as SaUint;
7052            let mut s = sam[(p >> 1) as usize];
7053            if s < 0 {
7054                t[p as usize] |= SAINT_MIN;
7055                f += 1;
7056                s = i as SaSint + offset as SaSint + SAINT_MIN + f;
7057            }
7058            sam[(p >> 1) as usize] = s - f;
7059        }
7060        i += 4;
7061    }
7062
7063    j += 128 + 3;
7064    while i < j {
7065        let p = sa_head[i as usize] as SaUint;
7066        let mut s = sam[(p >> 1) as usize];
7067        if s < 0 {
7068            t[p as usize] |= SAINT_MIN;
7069            f += 1;
7070            s = i as SaSint + SAINT_MIN + f;
7071        }
7072        sam[(p >> 1) as usize] = s - f;
7073        i += 1;
7074    }
7075
7076    f
7077}
7078
7079#[allow(dead_code)]
7080fn compact_unique_and_nonunique_lms_suffixes_32s(
7081    sa: &mut [SaSint],
7082    m: SaSint,
7083    pl: &mut isize,
7084    pr: &mut isize,
7085    omp_block_start: isize,
7086    omp_block_size: isize,
7087) {
7088    if omp_block_size <= 0 {
7089        return;
7090    }
7091
7092    let m_usize = m as usize;
7093    let source: Vec<SaSint> = sa
7094        [m_usize + omp_block_start as usize..m_usize + (omp_block_start + omp_block_size) as usize]
7095        .to_vec();
7096    let mut l = *pl - 1;
7097    let mut r = *pr - 1;
7098
7099    for &p in source.iter().rev() {
7100        sa[l as usize] = p & SAINT_MAX;
7101        l -= isize::from(p < 0);
7102
7103        sa[r as usize] = p.wrapping_sub(1);
7104        r -= isize::from(p > 0);
7105    }
7106
7107    *pl = l + 1;
7108    *pr = r + 1;
7109}
7110
7111#[allow(dead_code)]
7112fn count_unique_suffixes(
7113    sa: &[SaSint],
7114    m: SaSint,
7115    omp_block_start: isize,
7116    omp_block_size: isize,
7117) -> SaSint {
7118    let base = m as usize;
7119    let start = omp_block_start as usize;
7120    let end = start + omp_block_size as usize;
7121    let mut count = 0;
7122    for i in start..end {
7123        count += SaSint::from(sa[base + ((sa[i] as SaUint) >> 1) as usize] < 0);
7124    }
7125    count
7126}
7127
7128#[allow(dead_code)]
7129fn renumber_unique_and_nonunique_lms_suffixes_32s_omp(
7130    t: &mut [SaSint],
7131    sa: &mut [SaSint],
7132    m: SaSint,
7133    threads: SaSint,
7134) -> SaSint {
7135    if threads == 1 || m < 65_536 {
7136        return renumber_unique_and_nonunique_lms_suffixes_32s(t, sa, m, 0, 0, m as isize);
7137    }
7138
7139    let thread_count = threads as usize;
7140    let block_stride = (m / threads) & !15;
7141    let mut counts = vec![0; thread_count];
7142
7143    for thread in 0..thread_count {
7144        let block_start = thread as SaSint * block_stride;
7145        let block_size = if thread + 1 < thread_count {
7146            block_stride
7147        } else {
7148            m - block_start
7149        };
7150        counts[thread] = count_unique_suffixes(sa, m, block_start as isize, block_size as isize);
7151    }
7152
7153    let mut f = 0;
7154    for thread in 0..thread_count {
7155        let block_start = thread as SaSint * block_stride;
7156        let block_size = if thread + 1 < thread_count {
7157            block_stride
7158        } else {
7159            m - block_start
7160        };
7161        renumber_unique_and_nonunique_lms_suffixes_32s(
7162            t,
7163            sa,
7164            m,
7165            f,
7166            block_start as isize,
7167            block_size as isize,
7168        );
7169        f += counts[thread];
7170    }
7171
7172    f
7173}
7174
7175#[allow(dead_code)]
7176fn compact_unique_and_nonunique_lms_suffixes_32s_omp(
7177    sa: &mut [SaSint],
7178    n: SaSint,
7179    m: SaSint,
7180    fs: SaSint,
7181    f: SaSint,
7182    threads: SaSint,
7183) {
7184    let half_n = n >> 1;
7185    if threads == 1 || n < 131_072 || m >= fs {
7186        let mut l = m as isize;
7187        let mut r = (n + fs) as isize;
7188        compact_unique_and_nonunique_lms_suffixes_32s(sa, m, &mut l, &mut r, 0, half_n as isize);
7189    } else {
7190        let thread_count = threads as usize;
7191        let block_stride = (half_n / threads) & !15;
7192        let mut positions = vec![0isize; thread_count];
7193        let mut counts = vec![0isize; thread_count];
7194
7195        for thread in 0..thread_count {
7196            let block_start = thread as SaSint * block_stride;
7197            let block_size = if thread + 1 < thread_count {
7198                block_stride
7199            } else {
7200                half_n - block_start
7201            };
7202            let mut position = (m + half_n + block_start + block_size) as isize;
7203            let mut count = (m + block_start + block_size) as isize;
7204            compact_unique_and_nonunique_lms_suffixes_32s(
7205                sa,
7206                m,
7207                &mut position,
7208                &mut count,
7209                block_start as isize,
7210                block_size as isize,
7211            );
7212            positions[thread] = position;
7213            counts[thread] = count;
7214        }
7215
7216        let mut position = m as isize;
7217        for thread in (0..thread_count).rev() {
7218            let block_end = if thread + 1 < thread_count {
7219                block_stride * (thread as SaSint + 1)
7220            } else {
7221                half_n
7222            };
7223            let count = (m + half_n + block_end) as isize - positions[thread];
7224            if count > 0 {
7225                position -= count;
7226                let src = positions[thread] as usize;
7227                let dst = position as usize;
7228                sa.copy_within(src..src + count as usize, dst);
7229            }
7230        }
7231
7232        let mut position = (n + fs) as isize;
7233        for thread in (0..thread_count).rev() {
7234            let block_end = if thread + 1 < thread_count {
7235                block_stride * (thread as SaSint + 1)
7236            } else {
7237                half_n
7238            };
7239            let count = (m + block_end) as isize - counts[thread];
7240            if count > 0 {
7241                position -= count;
7242                let src = counts[thread] as usize;
7243                let dst = position as usize;
7244                sa.copy_within(src..src + count as usize, dst);
7245            }
7246        }
7247    }
7248
7249    let dst = (n + fs - m) as usize;
7250    let src = (m - f) as usize;
7251    sa.copy_within(src..src + f as usize, dst);
7252}
7253
7254#[allow(dead_code)]
7255fn compact_lms_suffixes_32s_omp(
7256    t: &mut [SaSint],
7257    sa: &mut [SaSint],
7258    n: SaSint,
7259    m: SaSint,
7260    fs: SaSint,
7261    threads: SaSint,
7262) -> SaSint {
7263    let f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(t, sa, m, threads);
7264    compact_unique_and_nonunique_lms_suffixes_32s_omp(sa, n, m, fs, f, threads);
7265    f
7266}
7267
7268#[allow(dead_code)]
7269fn merge_unique_lms_suffixes_32s(
7270    t: &mut [SaSint],
7271    sa: &mut [SaSint],
7272    n: SaSint,
7273    m: SaSint,
7274    l: isize,
7275    omp_block_start: isize,
7276    omp_block_size: isize,
7277) {
7278    let mut src_index = (n as isize - m as isize - 1 + l) as usize;
7279    let mut tmp = sa[src_index] as isize;
7280    src_index += 1;
7281
7282    let mut i = omp_block_start;
7283    let mut j = omp_block_start + omp_block_size - 6;
7284    while i < j {
7285        let iu = i as usize;
7286
7287        let c0 = t[iu];
7288        if c0 < 0 {
7289            t[iu] = c0 & SAINT_MAX;
7290            sa[tmp as usize] = i as SaSint;
7291            i += 1;
7292            tmp = sa[src_index] as isize;
7293            src_index += 1;
7294        }
7295
7296        let c1 = t[(i + 1) as usize];
7297        if c1 < 0 {
7298            t[(i + 1) as usize] = c1 & SAINT_MAX;
7299            sa[tmp as usize] = i as SaSint + 1;
7300            i += 1;
7301            tmp = sa[src_index] as isize;
7302            src_index += 1;
7303        }
7304
7305        let c2 = t[(i + 2) as usize];
7306        if c2 < 0 {
7307            t[(i + 2) as usize] = c2 & SAINT_MAX;
7308            sa[tmp as usize] = i as SaSint + 2;
7309            i += 1;
7310            tmp = sa[src_index] as isize;
7311            src_index += 1;
7312        }
7313
7314        let c3 = t[(i + 3) as usize];
7315        if c3 < 0 {
7316            t[(i + 3) as usize] = c3 & SAINT_MAX;
7317            sa[tmp as usize] = i as SaSint + 3;
7318            i += 1;
7319            tmp = sa[src_index] as isize;
7320            src_index += 1;
7321        }
7322
7323        i += 4;
7324    }
7325
7326    j += 6;
7327    while i < j {
7328        let c = t[i as usize];
7329        if c < 0 {
7330            t[i as usize] = c & SAINT_MAX;
7331            sa[tmp as usize] = i as SaSint;
7332            i += 1;
7333            tmp = sa[src_index] as isize;
7334            src_index += 1;
7335        }
7336        i += 1;
7337    }
7338}
7339
7340#[allow(dead_code)]
7341fn merge_nonunique_lms_suffixes_32s(
7342    sa: &mut [SaSint],
7343    n: SaSint,
7344    m: SaSint,
7345    l: isize,
7346    omp_block_start: isize,
7347    omp_block_size: isize,
7348) {
7349    let mut src_index = (n as isize - m as isize - 1 + l) as usize;
7350    let mut tmp = sa[src_index];
7351    src_index += 1;
7352
7353    let mut i = omp_block_start;
7354    let mut j = omp_block_start + omp_block_size - 3;
7355    while i < j {
7356        if sa[i as usize] == 0 {
7357            sa[i as usize] = tmp;
7358            tmp = sa[src_index];
7359            src_index += 1;
7360        }
7361        if sa[(i + 1) as usize] == 0 {
7362            sa[(i + 1) as usize] = tmp;
7363            tmp = sa[src_index];
7364            src_index += 1;
7365        }
7366        if sa[(i + 2) as usize] == 0 {
7367            sa[(i + 2) as usize] = tmp;
7368            tmp = sa[src_index];
7369            src_index += 1;
7370        }
7371        if sa[(i + 3) as usize] == 0 {
7372            sa[(i + 3) as usize] = tmp;
7373            tmp = sa[src_index];
7374            src_index += 1;
7375        }
7376        i += 4;
7377    }
7378
7379    j += 3;
7380    while i < j {
7381        if sa[i as usize] == 0 {
7382            sa[i as usize] = tmp;
7383            tmp = sa[src_index];
7384            src_index += 1;
7385        }
7386        i += 1;
7387    }
7388}
7389
7390#[allow(dead_code)]
7391fn merge_unique_lms_suffixes_32s_omp(
7392    t: &mut [SaSint],
7393    sa: &mut [SaSint],
7394    n: SaSint,
7395    m: SaSint,
7396    threads: SaSint,
7397) {
7398    if threads == 1 || n < 65_536 {
7399        merge_unique_lms_suffixes_32s(t, sa, n, m, 0, 0, n as isize);
7400        return;
7401    }
7402
7403    let thread_count = threads as usize;
7404    let block_stride = (n / threads) & !15;
7405    let mut counts = vec![0; thread_count];
7406
7407    for thread in 0..thread_count {
7408        let block_start = thread as SaSint * block_stride;
7409        let block_size = if thread + 1 < thread_count {
7410            block_stride
7411        } else {
7412            n - block_start
7413        };
7414        counts[thread] = count_negative_marked_suffixes(t, block_start, block_size);
7415    }
7416
7417    let mut count = 0;
7418    for thread in 0..thread_count {
7419        let block_start = thread as SaSint * block_stride;
7420        let block_size = if thread + 1 < thread_count {
7421            block_stride
7422        } else {
7423            n - block_start
7424        };
7425        merge_unique_lms_suffixes_32s(
7426            t,
7427            sa,
7428            n,
7429            m,
7430            count as isize,
7431            block_start as isize,
7432            block_size as isize,
7433        );
7434        count += counts[thread];
7435    }
7436}
7437
7438#[allow(dead_code)]
7439fn merge_nonunique_lms_suffixes_32s_omp(
7440    sa: &mut [SaSint],
7441    n: SaSint,
7442    m: SaSint,
7443    f: SaSint,
7444    threads: SaSint,
7445) {
7446    if threads == 1 || m < 65_536 {
7447        merge_nonunique_lms_suffixes_32s(sa, n, m, f as isize, 0, m as isize);
7448        return;
7449    }
7450
7451    let thread_count = threads as usize;
7452    let block_stride = (m / threads) & !15;
7453    let mut counts = vec![0; thread_count];
7454
7455    for thread in 0..thread_count {
7456        let block_start = thread as SaSint * block_stride;
7457        let block_size = if thread + 1 < thread_count {
7458            block_stride
7459        } else {
7460            m - block_start
7461        };
7462        counts[thread] = count_zero_marked_suffixes(sa, block_start, block_size);
7463    }
7464
7465    let mut count = f;
7466    for thread in 0..thread_count {
7467        let block_start = thread as SaSint * block_stride;
7468        let block_size = if thread + 1 < thread_count {
7469            block_stride
7470        } else {
7471            m - block_start
7472        };
7473        merge_nonunique_lms_suffixes_32s(
7474            sa,
7475            n,
7476            m,
7477            count as isize,
7478            block_start as isize,
7479            block_size as isize,
7480        );
7481        count += counts[thread];
7482    }
7483}
7484
7485#[allow(dead_code)]
7486fn merge_compacted_lms_suffixes_32s_omp(
7487    t: &mut [SaSint],
7488    sa: &mut [SaSint],
7489    n: SaSint,
7490    m: SaSint,
7491    f: SaSint,
7492    threads: SaSint,
7493) {
7494    merge_unique_lms_suffixes_32s_omp(t, sa, n, m, threads);
7495    merge_nonunique_lms_suffixes_32s_omp(sa, n, m, f, threads);
7496}
7497
7498#[allow(dead_code)]
7499fn reconstruct_compacted_lms_suffixes_32s_2k_omp(
7500    t: &mut [SaSint],
7501    sa: &mut [SaSint],
7502    n: SaSint,
7503    k: SaSint,
7504    m: SaSint,
7505    fs: SaSint,
7506    f: SaSint,
7507    buckets: &mut [SaSint],
7508    local_buckets: SaSint,
7509    threads: SaSint,
7510    thread_state: &mut [ThreadState],
7511) {
7512    if f > 0 {
7513        let dst = (n - m - 1) as usize;
7514        let src = (n + fs - m) as usize;
7515        sa.copy_within(src..src + f as usize, dst);
7516
7517        count_and_gather_compacted_lms_suffixes_32s_2k_omp(
7518            t,
7519            sa,
7520            n,
7521            k,
7522            buckets,
7523            local_buckets,
7524            threads,
7525            thread_state,
7526        );
7527        reconstruct_lms_suffixes_omp(sa, n, m - f, threads);
7528
7529        let dst = (n - m - 1 + f) as usize;
7530        sa.copy_within(0..(m - f) as usize, dst);
7531        sa[..m as usize].fill(0);
7532
7533        merge_compacted_lms_suffixes_32s_omp(t, sa, n, m, f, threads);
7534    } else {
7535        count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize);
7536        reconstruct_lms_suffixes_omp(sa, n, m, threads);
7537    }
7538}
7539
7540#[allow(dead_code)]
7541fn reconstruct_compacted_lms_suffixes_32s_1k_omp(
7542    t: &mut [SaSint],
7543    sa: &mut [SaSint],
7544    n: SaSint,
7545    m: SaSint,
7546    fs: SaSint,
7547    f: SaSint,
7548    threads: SaSint,
7549) {
7550    if f > 0 {
7551        let dst = (n - m - 1) as usize;
7552        let src = (n + fs - m) as usize;
7553        sa.copy_within(src..src + f as usize, dst);
7554
7555        gather_compacted_lms_suffixes_32s(t, sa, n);
7556        reconstruct_lms_suffixes_omp(sa, n, m - f, threads);
7557
7558        let dst = (n - m - 1 + f) as usize;
7559        sa.copy_within(0..(m - f) as usize, dst);
7560        sa[..m as usize].fill(0);
7561
7562        merge_compacted_lms_suffixes_32s_omp(t, sa, n, m, f, threads);
7563    } else {
7564        gather_lms_suffixes_32s(t, sa, n);
7565        reconstruct_lms_suffixes_omp(sa, n, m, threads);
7566    }
7567}
7568
7569#[allow(dead_code)]
7570fn place_lms_suffixes_interval_16u(
7571    sa: &mut [SaSint],
7572    n: SaSint,
7573    mut m: SaSint,
7574    flags: SaSint,
7575    buckets: &mut [SaSint],
7576) {
7577    if (flags & LIBSAIS_FLAGS_GSA) != 0 {
7578        buckets[7 * ALPHABET_SIZE] -= 1;
7579    }
7580
7581    let mut j = n as isize;
7582    let mut c = ALPHABET_SIZE as isize - 2;
7583    while c >= 0 {
7584        let ci = c as usize;
7585        let l =
7586            buckets[buckets_index2(ci, 1) + buckets_index2(1, 0)] - buckets[buckets_index2(ci, 1)];
7587        if l > 0 {
7588            let i = buckets[7 * ALPHABET_SIZE + ci] as isize;
7589            if j - i > 0 {
7590                sa[i as usize..j as usize].fill(0);
7591            }
7592
7593            m -= l;
7594            j = i - l as isize;
7595            let src = m as usize;
7596            let dst = j as usize;
7597            sa.copy_within(src..src + l as usize, dst);
7598        }
7599        c -= 1;
7600    }
7601
7602    sa[..j as usize].fill(0);
7603
7604    if (flags & LIBSAIS_FLAGS_GSA) != 0 {
7605        buckets[7 * ALPHABET_SIZE] += 1;
7606    }
7607}
7608
7609#[allow(dead_code)]
7610fn place_lms_suffixes_interval_32s_4k(
7611    sa: &mut [SaSint],
7612    n: SaSint,
7613    k: SaSint,
7614    mut m: SaSint,
7615    buckets: &[SaSint],
7616) {
7617    let bucket_end = &buckets[3 * k as usize..4 * k as usize];
7618    let mut j = n as usize;
7619    let mut c = k - 2;
7620    while c >= 0 {
7621        let cu = c as usize;
7622        let l =
7623            buckets[buckets_index2(cu, 1) + buckets_index2(1, 0)] - buckets[buckets_index2(cu, 1)];
7624        if l > 0 {
7625            let i = bucket_end[cu] as usize;
7626            if j > i {
7627                sa[i..j].fill(0);
7628            }
7629
7630            m -= l;
7631            let dst = i - l as usize;
7632            sa.copy_within(m as usize..m as usize + l as usize, dst);
7633            j = dst;
7634        }
7635        c -= 1;
7636    }
7637
7638    sa[..j].fill(0);
7639}
7640
7641#[allow(dead_code)]
7642fn place_lms_suffixes_interval_32s_2k(
7643    sa: &mut [SaSint],
7644    n: SaSint,
7645    k: SaSint,
7646    mut m: SaSint,
7647    buckets: &[SaSint],
7648) {
7649    let mut j = n as usize;
7650    if k > 1 {
7651        let mut c = buckets_index2(k as usize - 2, 0) as isize;
7652        while c >= buckets_index2(0, 0) as isize {
7653            let cu = c as usize;
7654            let l = buckets[cu + buckets_index2(1, 1)] - buckets[cu + buckets_index2(0, 1)];
7655            if l > 0 {
7656                let i = buckets[cu] as usize;
7657                if j > i {
7658                    sa[i..j].fill(0);
7659                }
7660
7661                m -= l;
7662                let dst = i - l as usize;
7663                sa.copy_within(m as usize..m as usize + l as usize, dst);
7664                j = dst;
7665            }
7666            c -= buckets_index2(1, 0) as isize;
7667        }
7668    }
7669
7670    sa[..j].fill(0);
7671}
7672
7673#[allow(dead_code)]
7674fn place_lms_suffixes_interval_32s_1k(
7675    t: &[SaSint],
7676    sa: &mut [SaSint],
7677    k: SaSint,
7678    m: SaSint,
7679    buckets: &[SaSint],
7680) {
7681    let mut c = k - 1;
7682    let mut l = buckets[c as usize] as usize;
7683
7684    let mut i = m - 1;
7685    while i >= 0 {
7686        let p = sa[i as usize] as usize;
7687        if t[p] != c {
7688            c = t[p];
7689            let bucket_pos = buckets[c as usize] as usize;
7690            if l > bucket_pos {
7691                sa[bucket_pos..l].fill(0);
7692            }
7693            l = bucket_pos;
7694        }
7695        l -= 1;
7696        sa[l] = p as SaSint;
7697        i -= 1;
7698    }
7699
7700    sa[..l].fill(0);
7701}
7702
7703#[allow(dead_code)]
7704fn place_lms_suffixes_histogram_32s_6k(
7705    sa: &mut [SaSint],
7706    n: SaSint,
7707    k: SaSint,
7708    mut m: SaSint,
7709    buckets: &[SaSint],
7710) {
7711    let bucket_end = &buckets[5 * k as usize..6 * k as usize];
7712    let mut j = n as usize;
7713    let mut c = k - 2;
7714    while c >= 0 {
7715        let l = buckets[buckets_index4(c as usize, 1)] as usize;
7716        if l > 0 {
7717            let i = bucket_end[c as usize] as usize;
7718            if j > i {
7719                sa[i..j].fill(0);
7720            }
7721            let dst = i - l;
7722            m -= l as SaSint;
7723            sa.copy_within(m as usize..m as usize + l, dst);
7724            j = dst;
7725        }
7726        c -= 1;
7727    }
7728    sa[..j].fill(0);
7729}
7730
7731#[allow(dead_code)]
7732fn place_lms_suffixes_histogram_32s_4k(
7733    sa: &mut [SaSint],
7734    n: SaSint,
7735    k: SaSint,
7736    mut m: SaSint,
7737    buckets: &[SaSint],
7738) {
7739    let bucket_end = &buckets[3 * k as usize..4 * k as usize];
7740    let mut j = n as usize;
7741    let mut c = k - 2;
7742    while c >= 0 {
7743        let l = buckets[buckets_index2(c as usize, 1)] as usize;
7744        if l > 0 {
7745            let i = bucket_end[c as usize] as usize;
7746            if j > i {
7747                sa[i..j].fill(0);
7748            }
7749            let dst = i - l;
7750            m -= l as SaSint;
7751            sa.copy_within(m as usize..m as usize + l, dst);
7752            j = dst;
7753        }
7754        c -= 1;
7755    }
7756    sa[..j].fill(0);
7757}
7758
7759#[allow(dead_code)]
7760fn place_lms_suffixes_histogram_32s_2k(
7761    sa: &mut [SaSint],
7762    n: SaSint,
7763    k: SaSint,
7764    mut m: SaSint,
7765    buckets: &[SaSint],
7766) {
7767    let mut j = n as usize;
7768    if k > 1 {
7769        let mut c = buckets_index2(k as usize - 2, 0) as isize;
7770        while c >= buckets_index2(0, 0) as isize {
7771            let cu = c as usize;
7772            let l = buckets[cu + buckets_index2(0, 1)] as usize;
7773            if l > 0 {
7774                let i = buckets[cu] as usize;
7775                if j > i {
7776                    sa[i..j].fill(0);
7777                }
7778                let dst = i - l;
7779                m -= l as SaSint;
7780                sa.copy_within(m as usize..m as usize + l, dst);
7781                j = dst;
7782            }
7783            c -= buckets_index2(1, 0) as isize;
7784        }
7785    }
7786    sa[..j].fill(0);
7787}
7788
7789#[allow(dead_code)]
7790fn final_bwt_scan_left_to_right_16u_block_prepare(
7791    t: &[u16],
7792    sa: &mut [SaSint],
7793    k: SaSint,
7794    buckets: &mut [SaSint],
7795    cache: &mut [ThreadCache],
7796    omp_block_start: SaSint,
7797    omp_block_size: SaSint,
7798) -> SaSint {
7799    buckets[..k as usize].fill(0);
7800    let mut count = 0usize;
7801    for i in omp_block_start as usize..(omp_block_start + omp_block_size) as usize {
7802        let mut p = sa[i];
7803        sa[i] = p & SAINT_MAX;
7804        if p > 0 {
7805            p -= 1;
7806            let c = t[p as usize] as usize;
7807            sa[i] = c as SaSint | SAINT_MIN;
7808            buckets[c] += 1;
7809            cache[count].symbol = c as SaSint;
7810            cache[count].index = p
7811                | ((usize::from(t[(p - SaSint::from(p > 0)) as usize] < t[p as usize]) as SaSint)
7812                    << (SAINT_BIT - 1));
7813            count += 1;
7814        }
7815    }
7816    count as SaSint
7817}
7818
7819#[allow(dead_code)]
7820fn final_sorting_scan_left_to_right_16u_block_prepare(
7821    t: &[u16],
7822    sa: &mut [SaSint],
7823    k: SaSint,
7824    buckets: &mut [SaSint],
7825    cache: &mut [ThreadCache],
7826    omp_block_start: SaSint,
7827    omp_block_size: SaSint,
7828) -> SaSint {
7829    buckets[..k as usize].fill(0);
7830    let mut count = 0usize;
7831    for i in omp_block_start as usize..(omp_block_start + omp_block_size) as usize {
7832        let mut p = sa[i];
7833        sa[i] = p ^ SAINT_MIN;
7834        if p > 0 {
7835            p -= 1;
7836            let c = t[p as usize] as usize;
7837            buckets[c] += 1;
7838            cache[count].symbol = c as SaSint;
7839            cache[count].index = p
7840                | ((usize::from(t[(p - SaSint::from(p > 0)) as usize] < t[p as usize]) as SaSint)
7841                    << (SAINT_BIT - 1));
7842            count += 1;
7843        }
7844    }
7845    count as SaSint
7846}
7847
7848#[allow(dead_code)]
7849fn final_order_scan_left_to_right_16u_block_place(
7850    sa: &mut [SaSint],
7851    buckets: &mut [SaSint],
7852    cache: &[ThreadCache],
7853    count: SaSint,
7854) {
7855    for entry in cache.iter().take(count as usize) {
7856        let c = entry.symbol as usize;
7857        let dst = buckets[c] as usize;
7858        sa[dst] = entry.index;
7859        buckets[c] += 1;
7860    }
7861}
7862
7863#[allow(dead_code)]
7864fn final_bwt_aux_scan_left_to_right_16u_block_place(
7865    sa: &mut [SaSint],
7866    rm: SaSint,
7867    i_sample: &mut [SaSint],
7868    buckets: &mut [SaSint],
7869    cache: &[ThreadCache],
7870    count: SaSint,
7871) {
7872    for entry in cache.iter().take(count as usize) {
7873        let c = entry.symbol as usize;
7874        let dst = buckets[c] as usize;
7875        sa[dst] = entry.index;
7876        buckets[c] += 1;
7877        let p = entry.index & SAINT_MAX;
7878        if (p & rm) == 0 {
7879            i_sample[(p / (rm + 1)) as usize] = buckets[c];
7880        }
7881    }
7882}
7883
7884#[allow(dead_code)]
7885fn final_bwt_scan_left_to_right_16u_block_omp(
7886    t: &[u16],
7887    sa: &mut [SaSint],
7888    k: SaSint,
7889    induction_bucket: &mut [SaSint],
7890    block_start: SaSint,
7891    block_size: SaSint,
7892    threads: SaSint,
7893    thread_state: &mut [ThreadState],
7894) {
7895    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
7896        usize::try_from(threads)
7897            .expect("threads must be non-negative")
7898            .min(thread_state.len())
7899    } else {
7900        1
7901    };
7902    if thread_count <= 1 {
7903        final_bwt_scan_left_to_right_16u(t, sa, induction_bucket, block_start, block_size);
7904        return;
7905    }
7906
7907    let k_usize = usize::try_from(k).expect("k must be non-negative");
7908    let block_stride = (block_size / thread_count as SaSint) & !15;
7909
7910    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
7911        let local_start = thread as SaSint * block_stride;
7912        let local_size = if thread + 1 < thread_count {
7913            block_stride
7914        } else {
7915            block_size - local_start
7916        };
7917        state.count = final_bwt_scan_left_to_right_16u_block_prepare(
7918            t,
7919            sa,
7920            k,
7921            &mut state.buckets[..k_usize],
7922            &mut state.cache,
7923            block_start + local_start,
7924            local_size,
7925        );
7926    }
7927
7928    for state in thread_state.iter_mut().take(thread_count) {
7929        for c in 0..k_usize {
7930            let a = induction_bucket[c];
7931            let b = state.buckets[c];
7932            induction_bucket[c] = a + b;
7933            state.buckets[c] = a;
7934        }
7935    }
7936
7937    for state in thread_state.iter_mut().take(thread_count) {
7938        final_order_scan_left_to_right_16u_block_place(
7939            sa,
7940            &mut state.buckets[..k_usize],
7941            &state.cache,
7942            state.count,
7943        );
7944    }
7945}
7946
7947#[allow(dead_code)]
7948fn final_bwt_aux_scan_left_to_right_16u_block_omp(
7949    t: &[u16],
7950    sa: &mut [SaSint],
7951    k: SaSint,
7952    rm: SaSint,
7953    i_sample: &mut [SaSint],
7954    induction_bucket: &mut [SaSint],
7955    block_start: SaSint,
7956    block_size: SaSint,
7957    threads: SaSint,
7958    thread_state: &mut [ThreadState],
7959) {
7960    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
7961        usize::try_from(threads)
7962            .expect("threads must be non-negative")
7963            .min(thread_state.len())
7964    } else {
7965        1
7966    };
7967    if thread_count <= 1 {
7968        final_bwt_aux_scan_left_to_right_16u(
7969            t,
7970            sa,
7971            rm,
7972            i_sample,
7973            induction_bucket,
7974            block_start,
7975            block_size,
7976        );
7977        return;
7978    }
7979
7980    let k_usize = usize::try_from(k).expect("k must be non-negative");
7981    let block_stride = (block_size / thread_count as SaSint) & !15;
7982
7983    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
7984        let local_start = thread as SaSint * block_stride;
7985        let local_size = if thread + 1 < thread_count {
7986            block_stride
7987        } else {
7988            block_size - local_start
7989        };
7990        state.count = final_bwt_scan_left_to_right_16u_block_prepare(
7991            t,
7992            sa,
7993            k,
7994            &mut state.buckets[..k_usize],
7995            &mut state.cache,
7996            block_start + local_start,
7997            local_size,
7998        );
7999    }
8000
8001    for state in thread_state.iter_mut().take(thread_count) {
8002        for c in 0..k_usize {
8003            let a = induction_bucket[c];
8004            let b = state.buckets[c];
8005            induction_bucket[c] = a + b;
8006            state.buckets[c] = a;
8007        }
8008    }
8009
8010    for state in thread_state.iter_mut().take(thread_count) {
8011        final_bwt_aux_scan_left_to_right_16u_block_place(
8012            sa,
8013            rm,
8014            i_sample,
8015            &mut state.buckets[..k_usize],
8016            &state.cache,
8017            state.count,
8018        );
8019    }
8020}
8021
8022#[allow(dead_code)]
8023fn final_sorting_scan_left_to_right_16u_block_omp(
8024    t: &[u16],
8025    sa: &mut [SaSint],
8026    k: SaSint,
8027    induction_bucket: &mut [SaSint],
8028    block_start: SaSint,
8029    block_size: SaSint,
8030    threads: SaSint,
8031    thread_state: &mut [ThreadState],
8032) {
8033    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8034        usize::try_from(threads)
8035            .expect("threads must be non-negative")
8036            .min(thread_state.len())
8037    } else {
8038        1
8039    };
8040    if thread_count <= 1 {
8041        final_sorting_scan_left_to_right_16u(t, sa, induction_bucket, block_start, block_size);
8042        return;
8043    }
8044
8045    let k_usize = usize::try_from(k).expect("k must be non-negative");
8046    let block_stride = (block_size / thread_count as SaSint) & !15;
8047
8048    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8049        let local_start = thread as SaSint * block_stride;
8050        let local_size = if thread + 1 < thread_count {
8051            block_stride
8052        } else {
8053            block_size - local_start
8054        };
8055        state.count = final_sorting_scan_left_to_right_16u_block_prepare(
8056            t,
8057            sa,
8058            k,
8059            &mut state.buckets[..k_usize],
8060            &mut state.cache,
8061            block_start + local_start,
8062            local_size,
8063        );
8064    }
8065
8066    for state in thread_state.iter_mut().take(thread_count) {
8067        for c in 0..k_usize {
8068            let a = induction_bucket[c];
8069            let b = state.buckets[c];
8070            induction_bucket[c] = a + b;
8071            state.buckets[c] = a;
8072        }
8073    }
8074
8075    for state in thread_state.iter_mut().take(thread_count) {
8076        final_order_scan_left_to_right_16u_block_place(
8077            sa,
8078            &mut state.buckets[..k_usize],
8079            &state.cache,
8080            state.count,
8081        );
8082    }
8083}
8084
8085#[allow(dead_code)]
8086fn final_bwt_scan_left_to_right_16u_omp(
8087    t: &[u16],
8088    sa: &mut [SaSint],
8089    n: SaSint,
8090    k: SaSint,
8091    induction_bucket: &mut [SaSint],
8092    threads: SaSint,
8093) {
8094    let c = t[(n - 1) as usize] as usize;
8095    let dst = induction_bucket[c] as usize;
8096    induction_bucket[c] += 1;
8097    let mark = if t[(n - 2) as usize] < t[(n - 1) as usize] {
8098        SAINT_MIN
8099    } else {
8100        0
8101    };
8102    sa[dst] = (n - 1) | mark;
8103
8104    if threads == 1 || n < 65536 {
8105        final_bwt_scan_left_to_right_16u(t, sa, induction_bucket, 0, n);
8106    } else {
8107        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8108        let mut block_start = 0;
8109        while block_start < n {
8110            if sa[block_start as usize] == 0 {
8111                block_start += 1;
8112            } else {
8113                let mut block_end =
8114                    block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8115                if block_end > n {
8116                    block_end = n;
8117                }
8118                let mut block_scan_end = block_start + 1;
8119                while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
8120                    block_scan_end += 1;
8121                }
8122                let block_size = block_scan_end - block_start;
8123                if block_size < 32 {
8124                    while block_start < block_scan_end {
8125                        let mut p = sa[block_start as usize];
8126                        sa[block_start as usize] = p & SAINT_MAX;
8127                        if p > 0 {
8128                            p -= 1;
8129                            let c = t[p as usize] as usize;
8130                            sa[block_start as usize] = c as SaSint | SAINT_MIN;
8131                            let dst = induction_bucket[c] as usize;
8132                            induction_bucket[c] += 1;
8133                            let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
8134                                SAINT_MIN
8135                            } else {
8136                                0
8137                            };
8138                            sa[dst] = p | mark;
8139                        }
8140                        block_start += 1;
8141                    }
8142                } else {
8143                    final_bwt_scan_left_to_right_16u_block_omp(
8144                        t,
8145                        sa,
8146                        k,
8147                        induction_bucket,
8148                        block_start,
8149                        block_size,
8150                        threads,
8151                        &mut thread_state,
8152                    );
8153                    block_start = block_scan_end;
8154                }
8155            }
8156        }
8157    }
8158}
8159
8160#[allow(dead_code)]
8161fn final_bwt_aux_scan_left_to_right_16u_omp(
8162    t: &[u16],
8163    sa: &mut [SaSint],
8164    n: SaSint,
8165    k: SaSint,
8166    rm: SaSint,
8167    i_sample: &mut [SaSint],
8168    induction_bucket: &mut [SaSint],
8169    threads: SaSint,
8170) {
8171    let c = t[(n - 1) as usize] as usize;
8172    let dst = induction_bucket[c] as usize;
8173    induction_bucket[c] += 1;
8174    let mark = if t[(n - 2) as usize] < t[(n - 1) as usize] {
8175        SAINT_MIN
8176    } else {
8177        0
8178    };
8179    sa[dst] = (n - 1) | mark;
8180
8181    if ((n - 1) & rm) == 0 {
8182        i_sample[((n - 1) / (rm + 1)) as usize] = induction_bucket[c];
8183    }
8184
8185    if threads == 1 || n < 65536 {
8186        final_bwt_aux_scan_left_to_right_16u(t, sa, rm, i_sample, induction_bucket, 0, n);
8187    } else {
8188        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8189        let mut block_start = 0;
8190        while block_start < n {
8191            if sa[block_start as usize] == 0 {
8192                block_start += 1;
8193            } else {
8194                let mut block_end =
8195                    block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8196                if block_end > n {
8197                    block_end = n;
8198                }
8199                let mut block_scan_end = block_start + 1;
8200                while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
8201                    block_scan_end += 1;
8202                }
8203                let block_size = block_scan_end - block_start;
8204                if block_size < 32 {
8205                    while block_start < block_scan_end {
8206                        let mut p = sa[block_start as usize];
8207                        sa[block_start as usize] = p & SAINT_MAX;
8208                        if p > 0 {
8209                            p -= 1;
8210                            let c = t[p as usize] as usize;
8211                            sa[block_start as usize] = c as SaSint | SAINT_MIN;
8212                            let dst = induction_bucket[c] as usize;
8213                            induction_bucket[c] += 1;
8214                            let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
8215                                SAINT_MIN
8216                            } else {
8217                                0
8218                            };
8219                            sa[dst] = p | mark;
8220                            if (p & rm) == 0 {
8221                                i_sample[(p / (rm + 1)) as usize] = induction_bucket[c];
8222                            }
8223                        }
8224                        block_start += 1;
8225                    }
8226                } else {
8227                    final_bwt_aux_scan_left_to_right_16u_block_omp(
8228                        t,
8229                        sa,
8230                        k,
8231                        rm,
8232                        i_sample,
8233                        induction_bucket,
8234                        block_start,
8235                        block_size,
8236                        threads,
8237                        &mut thread_state,
8238                    );
8239                    block_start = block_scan_end;
8240                }
8241            }
8242        }
8243    }
8244}
8245
8246#[allow(dead_code)]
8247fn final_sorting_scan_left_to_right_16u_omp(
8248    t: &[u16],
8249    sa: &mut [SaSint],
8250    n: SaSint,
8251    k: SaSint,
8252    induction_bucket: &mut [SaSint],
8253    threads: SaSint,
8254) {
8255    let c = t[(n - 1) as usize] as usize;
8256    let dst = induction_bucket[c] as usize;
8257    induction_bucket[c] += 1;
8258    let mark = if t[(n - 2) as usize] < t[(n - 1) as usize] {
8259        SAINT_MIN
8260    } else {
8261        0
8262    };
8263    sa[dst] = (n - 1) | mark;
8264
8265    if threads == 1 || n < 65536 {
8266        final_sorting_scan_left_to_right_16u(t, sa, induction_bucket, 0, n);
8267    } else {
8268        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8269        let mut block_start = 0;
8270        while block_start < n {
8271            if sa[block_start as usize] == 0 {
8272                block_start += 1;
8273            } else {
8274                let mut block_end =
8275                    block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8276                if block_end > n {
8277                    block_end = n;
8278                }
8279                let mut block_scan_end = block_start + 1;
8280                while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
8281                    block_scan_end += 1;
8282                }
8283                let block_size = block_scan_end - block_start;
8284                if block_size < 32 {
8285                    while block_start < block_scan_end {
8286                        let mut p = sa[block_start as usize];
8287                        sa[block_start as usize] = p ^ SAINT_MIN;
8288                        if p > 0 {
8289                            p -= 1;
8290                            let c = t[p as usize] as usize;
8291                            let dst = induction_bucket[c] as usize;
8292                            induction_bucket[c] += 1;
8293                            let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
8294                                SAINT_MIN
8295                            } else {
8296                                0
8297                            };
8298                            sa[dst] = p | mark;
8299                        }
8300                        block_start += 1;
8301                    }
8302                } else {
8303                    final_sorting_scan_left_to_right_16u_block_omp(
8304                        t,
8305                        sa,
8306                        k,
8307                        induction_bucket,
8308                        block_start,
8309                        block_size,
8310                        threads,
8311                        &mut thread_state,
8312                    );
8313                    block_start = block_scan_end;
8314                }
8315            }
8316        }
8317    }
8318}
8319
8320#[allow(dead_code)]
8321fn final_bwt_scan_right_to_left_16u_block_prepare(
8322    t: &[u16],
8323    sa: &mut [SaSint],
8324    k: SaSint,
8325    buckets: &mut [SaSint],
8326    cache: &mut [ThreadCache],
8327    omp_block_start: SaSint,
8328    omp_block_size: SaSint,
8329) -> SaSint {
8330    buckets[..k as usize].fill(0);
8331    let mut count = 0usize;
8332    for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
8333        let mut p = sa[i];
8334        sa[i] = p & SAINT_MAX;
8335        if p > 0 {
8336            p -= 1;
8337            let c0 = t[(p - SaSint::from(p > 0)) as usize];
8338            let c1 = t[p as usize];
8339            sa[i] = c1 as SaSint;
8340            buckets[c1 as usize] += 1;
8341            cache[count].symbol = c1 as SaSint;
8342            cache[count].index = if c0 <= c1 {
8343                p
8344            } else {
8345                c0 as SaSint | SAINT_MIN
8346            };
8347            count += 1;
8348        }
8349    }
8350    count as SaSint
8351}
8352
8353#[allow(dead_code)]
8354fn final_bwt_aux_scan_right_to_left_16u_block_prepare(
8355    t: &[u16],
8356    sa: &mut [SaSint],
8357    k: SaSint,
8358    buckets: &mut [SaSint],
8359    cache: &mut [ThreadCache],
8360    omp_block_start: SaSint,
8361    omp_block_size: SaSint,
8362) -> SaSint {
8363    buckets[..k as usize].fill(0);
8364    let mut count = 0usize;
8365    for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
8366        let mut p = sa[i];
8367        sa[i] = p & SAINT_MAX;
8368        if p > 0 {
8369            p -= 1;
8370            let c0 = t[(p - SaSint::from(p > 0)) as usize];
8371            let c1 = t[p as usize];
8372            sa[i] = c1 as SaSint;
8373            buckets[c1 as usize] += 1;
8374            cache[count].symbol = c1 as SaSint;
8375            cache[count].index = if c0 <= c1 {
8376                p
8377            } else {
8378                c0 as SaSint | SAINT_MIN
8379            };
8380            cache[count + 1].index = p;
8381            count += 2;
8382        }
8383    }
8384    count as SaSint
8385}
8386
8387#[allow(dead_code)]
8388fn final_sorting_scan_right_to_left_16u_block_prepare(
8389    t: &[u16],
8390    sa: &mut [SaSint],
8391    k: SaSint,
8392    buckets: &mut [SaSint],
8393    cache: &mut [ThreadCache],
8394    omp_block_start: SaSint,
8395    omp_block_size: SaSint,
8396) -> SaSint {
8397    buckets[..k as usize].fill(0);
8398    let mut count = 0usize;
8399    for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
8400        let mut p = sa[i];
8401        sa[i] = p & SAINT_MAX;
8402        if p > 0 {
8403            p -= 1;
8404            let c = t[p as usize] as usize;
8405            buckets[c] += 1;
8406            cache[count].symbol = c as SaSint;
8407            cache[count].index = p
8408                | ((usize::from(t[(p - SaSint::from(p > 0)) as usize] > t[p as usize]) as SaSint)
8409                    << (SAINT_BIT - 1));
8410            count += 1;
8411        }
8412    }
8413    count as SaSint
8414}
8415
8416#[allow(dead_code)]
8417fn final_order_scan_right_to_left_16u_block_place(
8418    sa: &mut [SaSint],
8419    buckets: &mut [SaSint],
8420    cache: &[ThreadCache],
8421    count: SaSint,
8422) {
8423    for entry in cache.iter().take(count as usize) {
8424        let c = entry.symbol as usize;
8425        buckets[c] -= 1;
8426        sa[buckets[c] as usize] = entry.index;
8427    }
8428}
8429
8430#[allow(dead_code)]
8431fn final_gsa_scan_right_to_left_16u_block_place(
8432    sa: &mut [SaSint],
8433    buckets: &mut [SaSint],
8434    cache: &[ThreadCache],
8435    count: SaSint,
8436) {
8437    for entry in cache.iter().take(count as usize) {
8438        let c = entry.symbol as usize;
8439        if c > 0 {
8440            buckets[c] -= 1;
8441            sa[buckets[c] as usize] = entry.index;
8442        }
8443    }
8444}
8445
8446#[allow(dead_code)]
8447fn final_bwt_aux_scan_right_to_left_16u_block_place(
8448    sa: &mut [SaSint],
8449    rm: SaSint,
8450    i_sample: &mut [SaSint],
8451    buckets: &mut [SaSint],
8452    cache: &[ThreadCache],
8453    count: SaSint,
8454) {
8455    let mut i = 0usize;
8456    while i < count as usize {
8457        let c = cache[i].symbol as usize;
8458        buckets[c] -= 1;
8459        sa[buckets[c] as usize] = cache[i].index;
8460        let p = cache[i + 1].index;
8461        if (p & rm) == 0 {
8462            i_sample[(p / (rm + 1)) as usize] = buckets[c] + 1;
8463        }
8464        i += 2;
8465    }
8466}
8467
8468#[allow(dead_code)]
8469fn final_bwt_scan_right_to_left_16u_block_omp(
8470    t: &[u16],
8471    sa: &mut [SaSint],
8472    k: SaSint,
8473    induction_bucket: &mut [SaSint],
8474    block_start: SaSint,
8475    block_size: SaSint,
8476    threads: SaSint,
8477    thread_state: &mut [ThreadState],
8478) -> SaSint {
8479    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8480        usize::try_from(threads)
8481            .expect("threads must be non-negative")
8482            .min(thread_state.len())
8483    } else {
8484        1
8485    };
8486    if thread_count <= 1 {
8487        return final_bwt_scan_right_to_left_16u(t, sa, induction_bucket, block_start, block_size);
8488    }
8489
8490    let k_usize = usize::try_from(k).expect("k must be non-negative");
8491    let block_stride = (block_size / thread_count as SaSint) & !15;
8492
8493    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8494        let local_start = thread as SaSint * block_stride;
8495        let local_size = if thread + 1 < thread_count {
8496            block_stride
8497        } else {
8498            block_size - local_start
8499        };
8500        state.count = final_bwt_scan_right_to_left_16u_block_prepare(
8501            t,
8502            sa,
8503            k,
8504            &mut state.buckets[..k_usize],
8505            &mut state.cache,
8506            block_start + local_start,
8507            local_size,
8508        );
8509    }
8510
8511    for state in thread_state.iter_mut().take(thread_count).rev() {
8512        for c in 0..k_usize {
8513            let a = induction_bucket[c];
8514            let b = state.buckets[c];
8515            induction_bucket[c] = a - b;
8516            state.buckets[c] = a;
8517        }
8518    }
8519
8520    for state in thread_state.iter_mut().take(thread_count) {
8521        final_order_scan_right_to_left_16u_block_place(
8522            sa,
8523            &mut state.buckets[..k_usize],
8524            &state.cache,
8525            state.count,
8526        );
8527    }
8528
8529    -1
8530}
8531
8532#[allow(dead_code)]
8533fn final_bwt_aux_scan_right_to_left_16u_block_omp(
8534    t: &[u16],
8535    sa: &mut [SaSint],
8536    k: SaSint,
8537    rm: SaSint,
8538    i_sample: &mut [SaSint],
8539    induction_bucket: &mut [SaSint],
8540    block_start: SaSint,
8541    block_size: SaSint,
8542    threads: SaSint,
8543    thread_state: &mut [ThreadState],
8544) {
8545    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8546        usize::try_from(threads)
8547            .expect("threads must be non-negative")
8548            .min(thread_state.len())
8549    } else {
8550        1
8551    };
8552    if thread_count <= 1 {
8553        final_bwt_aux_scan_right_to_left_16u(
8554            t,
8555            sa,
8556            rm,
8557            i_sample,
8558            induction_bucket,
8559            block_start,
8560            block_size,
8561        );
8562        return;
8563    }
8564
8565    let k_usize = usize::try_from(k).expect("k must be non-negative");
8566    let block_stride = (block_size / thread_count as SaSint) & !15;
8567
8568    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8569        let local_start = thread as SaSint * block_stride;
8570        let local_size = if thread + 1 < thread_count {
8571            block_stride
8572        } else {
8573            block_size - local_start
8574        };
8575        state.count = final_bwt_aux_scan_right_to_left_16u_block_prepare(
8576            t,
8577            sa,
8578            k,
8579            &mut state.buckets[..k_usize],
8580            &mut state.cache,
8581            block_start + local_start,
8582            local_size,
8583        );
8584    }
8585
8586    for state in thread_state.iter_mut().take(thread_count).rev() {
8587        for c in 0..k_usize {
8588            let a = induction_bucket[c];
8589            let b = state.buckets[c];
8590            induction_bucket[c] = a - b;
8591            state.buckets[c] = a;
8592        }
8593    }
8594
8595    for state in thread_state.iter_mut().take(thread_count) {
8596        final_bwt_aux_scan_right_to_left_16u_block_place(
8597            sa,
8598            rm,
8599            i_sample,
8600            &mut state.buckets[..k_usize],
8601            &state.cache,
8602            state.count,
8603        );
8604    }
8605}
8606
8607#[allow(dead_code)]
8608fn final_sorting_scan_right_to_left_16u_block_omp(
8609    t: &[u16],
8610    sa: &mut [SaSint],
8611    k: SaSint,
8612    induction_bucket: &mut [SaSint],
8613    block_start: SaSint,
8614    block_size: SaSint,
8615    threads: SaSint,
8616    thread_state: &mut [ThreadState],
8617) {
8618    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8619        usize::try_from(threads)
8620            .expect("threads must be non-negative")
8621            .min(thread_state.len())
8622    } else {
8623        1
8624    };
8625    if thread_count <= 1 {
8626        final_sorting_scan_right_to_left_16u(t, sa, induction_bucket, block_start, block_size);
8627        return;
8628    }
8629
8630    let k_usize = usize::try_from(k).expect("k must be non-negative");
8631    let block_stride = (block_size / thread_count as SaSint) & !15;
8632
8633    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8634        let local_start = thread as SaSint * block_stride;
8635        let local_size = if thread + 1 < thread_count {
8636            block_stride
8637        } else {
8638            block_size - local_start
8639        };
8640        state.count = final_sorting_scan_right_to_left_16u_block_prepare(
8641            t,
8642            sa,
8643            k,
8644            &mut state.buckets[..k_usize],
8645            &mut state.cache,
8646            block_start + local_start,
8647            local_size,
8648        );
8649    }
8650
8651    for state in thread_state.iter_mut().take(thread_count).rev() {
8652        for c in 0..k_usize {
8653            let a = induction_bucket[c];
8654            let b = state.buckets[c];
8655            induction_bucket[c] = a - b;
8656            state.buckets[c] = a;
8657        }
8658    }
8659
8660    for state in thread_state.iter_mut().take(thread_count) {
8661        final_order_scan_right_to_left_16u_block_place(
8662            sa,
8663            &mut state.buckets[..k_usize],
8664            &state.cache,
8665            state.count,
8666        );
8667    }
8668}
8669
8670#[allow(dead_code)]
8671fn final_gsa_scan_right_to_left_16u_block_omp(
8672    t: &[u16],
8673    sa: &mut [SaSint],
8674    k: SaSint,
8675    induction_bucket: &mut [SaSint],
8676    block_start: SaSint,
8677    block_size: SaSint,
8678    threads: SaSint,
8679    thread_state: &mut [ThreadState],
8680) {
8681    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8682        usize::try_from(threads)
8683            .expect("threads must be non-negative")
8684            .min(thread_state.len())
8685    } else {
8686        1
8687    };
8688    if thread_count <= 1 {
8689        final_gsa_scan_right_to_left_16u(t, sa, induction_bucket, block_start, block_size);
8690        return;
8691    }
8692
8693    let k_usize = usize::try_from(k).expect("k must be non-negative");
8694    let block_stride = (block_size / thread_count as SaSint) & !15;
8695
8696    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8697        let local_start = thread as SaSint * block_stride;
8698        let local_size = if thread + 1 < thread_count {
8699            block_stride
8700        } else {
8701            block_size - local_start
8702        };
8703        state.count = final_sorting_scan_right_to_left_16u_block_prepare(
8704            t,
8705            sa,
8706            k,
8707            &mut state.buckets[..k_usize],
8708            &mut state.cache,
8709            block_start + local_start,
8710            local_size,
8711        );
8712    }
8713
8714    for state in thread_state.iter_mut().take(thread_count).rev() {
8715        for c in 0..k_usize {
8716            let a = induction_bucket[c];
8717            let b = state.buckets[c];
8718            induction_bucket[c] = a - b;
8719            state.buckets[c] = a;
8720        }
8721    }
8722
8723    for state in thread_state.iter_mut().take(thread_count) {
8724        final_gsa_scan_right_to_left_16u_block_place(
8725            sa,
8726            &mut state.buckets[..k_usize],
8727            &state.cache,
8728            state.count,
8729        );
8730    }
8731}
8732
8733#[allow(dead_code)]
8734fn final_bwt_scan_right_to_left_16u_omp(
8735    t: &[u16],
8736    sa: &mut [SaSint],
8737    n: SaSint,
8738    k: SaSint,
8739    induction_bucket: &mut [SaSint],
8740    threads: SaSint,
8741) -> SaSint {
8742    let mut index = -1;
8743
8744    if threads == 1 || n < 65536 {
8745        index = final_bwt_scan_right_to_left_16u(t, sa, induction_bucket, 0, n);
8746    } else {
8747        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8748        let mut block_start = n - 1;
8749        while block_start >= 0 {
8750            if sa[block_start as usize] == 0 {
8751                index = block_start;
8752                block_start -= 1;
8753            } else {
8754                let mut block_max_end =
8755                    block_start - threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8756                if block_max_end < 0 {
8757                    block_max_end = -1;
8758                }
8759                let mut block_end = block_start - 1;
8760                while block_end > block_max_end && sa[block_end as usize] != 0 {
8761                    block_end -= 1;
8762                }
8763                let block_size = block_start - block_end;
8764                if block_size < 32 {
8765                    while block_start > block_end {
8766                        let mut p = sa[block_start as usize];
8767                        sa[block_start as usize] = p & SAINT_MAX;
8768                        if p > 0 {
8769                            p -= 1;
8770                            let c0 = t[(p - SaSint::from(p > 0)) as usize];
8771                            let c1 = t[p as usize] as usize;
8772                            sa[block_start as usize] = c1 as SaSint;
8773                            induction_bucket[c1] -= 1;
8774                            sa[induction_bucket[c1] as usize] = if c0 <= c1 as u16 {
8775                                p
8776                            } else {
8777                                c0 as SaSint | SAINT_MIN
8778                            };
8779                        }
8780                        block_start -= 1;
8781                    }
8782                } else {
8783                    final_bwt_scan_right_to_left_16u_block_omp(
8784                        t,
8785                        sa,
8786                        k,
8787                        induction_bucket,
8788                        block_end + 1,
8789                        block_size,
8790                        threads,
8791                        &mut thread_state,
8792                    );
8793                    block_start = block_end;
8794                }
8795            }
8796        }
8797    }
8798    index
8799}
8800
8801#[allow(dead_code)]
8802fn final_bwt_aux_scan_right_to_left_16u_omp(
8803    t: &[u16],
8804    sa: &mut [SaSint],
8805    n: SaSint,
8806    k: SaSint,
8807    rm: SaSint,
8808    i_sample: &mut [SaSint],
8809    induction_bucket: &mut [SaSint],
8810    threads: SaSint,
8811) {
8812    if threads == 1 || n < 65536 {
8813        final_bwt_aux_scan_right_to_left_16u(t, sa, rm, i_sample, induction_bucket, 0, n);
8814    } else {
8815        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8816        let mut block_start = n - 1;
8817        while block_start >= 0 {
8818            if sa[block_start as usize] == 0 {
8819                block_start -= 1;
8820            } else {
8821                let mut block_max_end =
8822                    block_start - threads * ((PER_THREAD_CACHE_SIZE as SaSint - 16 * threads) / 2);
8823                if block_max_end < 0 {
8824                    block_max_end = -1;
8825                }
8826                let mut block_end = block_start - 1;
8827                while block_end > block_max_end && sa[block_end as usize] != 0 {
8828                    block_end -= 1;
8829                }
8830                let block_size = block_start - block_end;
8831                if block_size < 32 {
8832                    while block_start > block_end {
8833                        let mut p = sa[block_start as usize];
8834                        sa[block_start as usize] = p & SAINT_MAX;
8835                        if p > 0 {
8836                            p -= 1;
8837                            let c0 = t[(p - SaSint::from(p > 0)) as usize];
8838                            let c1 = t[p as usize] as usize;
8839                            sa[block_start as usize] = c1 as SaSint;
8840                            induction_bucket[c1] -= 1;
8841                            sa[induction_bucket[c1] as usize] = if c0 <= c1 as u16 {
8842                                p
8843                            } else {
8844                                c0 as SaSint | SAINT_MIN
8845                            };
8846                            if (p & rm) == 0 {
8847                                i_sample[(p / (rm + 1)) as usize] = induction_bucket[c1] + 1;
8848                            }
8849                        }
8850                        block_start -= 1;
8851                    }
8852                } else {
8853                    final_bwt_aux_scan_right_to_left_16u_block_omp(
8854                        t,
8855                        sa,
8856                        k,
8857                        rm,
8858                        i_sample,
8859                        induction_bucket,
8860                        block_end + 1,
8861                        block_size,
8862                        threads,
8863                        &mut thread_state,
8864                    );
8865                    block_start = block_end;
8866                }
8867            }
8868        }
8869    }
8870}
8871
8872#[allow(dead_code)]
8873fn final_sorting_scan_right_to_left_16u_omp(
8874    t: &[u16],
8875    sa: &mut [SaSint],
8876    omp_block_start: SaSint,
8877    omp_block_size: SaSint,
8878    k: SaSint,
8879    induction_bucket: &mut [SaSint],
8880    threads: SaSint,
8881) {
8882    if threads == 1 || omp_block_size < 65536 {
8883        final_sorting_scan_right_to_left_16u(
8884            t,
8885            sa,
8886            induction_bucket,
8887            omp_block_start,
8888            omp_block_size,
8889        );
8890    } else {
8891        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8892        let mut block_start = omp_block_start + omp_block_size - 1;
8893        while block_start >= omp_block_start {
8894            if sa[block_start as usize] == 0 {
8895                block_start -= 1;
8896            } else {
8897                let mut block_max_end =
8898                    block_start - threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8899                if block_max_end < omp_block_start {
8900                    block_max_end = omp_block_start - 1;
8901                }
8902                let mut block_end = block_start - 1;
8903                while block_end > block_max_end && sa[block_end as usize] != 0 {
8904                    block_end -= 1;
8905                }
8906                let block_size = block_start - block_end;
8907                if block_size < 32 {
8908                    while block_start > block_end {
8909                        let mut p = sa[block_start as usize];
8910                        sa[block_start as usize] = p & SAINT_MAX;
8911                        if p > 0 {
8912                            p -= 1;
8913                            let c = t[p as usize] as usize;
8914                            induction_bucket[c] -= 1;
8915                            let mark = if t[(p - SaSint::from(p > 0)) as usize] > t[p as usize] {
8916                                SAINT_MIN
8917                            } else {
8918                                0
8919                            };
8920                            sa[induction_bucket[c] as usize] = p | mark;
8921                        }
8922                        block_start -= 1;
8923                    }
8924                } else {
8925                    final_sorting_scan_right_to_left_16u_block_omp(
8926                        t,
8927                        sa,
8928                        k,
8929                        induction_bucket,
8930                        block_end + 1,
8931                        block_size,
8932                        threads,
8933                        &mut thread_state,
8934                    );
8935                    block_start = block_end;
8936                }
8937            }
8938        }
8939    }
8940}
8941
8942#[allow(dead_code)]
8943fn final_gsa_scan_right_to_left_16u_omp(
8944    t: &[u16],
8945    sa: &mut [SaSint],
8946    omp_block_start: SaSint,
8947    omp_block_size: SaSint,
8948    k: SaSint,
8949    induction_bucket: &mut [SaSint],
8950    threads: SaSint,
8951) {
8952    if threads == 1 || omp_block_size < 65536 {
8953        final_gsa_scan_right_to_left_16u(t, sa, induction_bucket, omp_block_start, omp_block_size);
8954    } else {
8955        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8956        let mut block_start = omp_block_start + omp_block_size - 1;
8957        while block_start >= omp_block_start {
8958            if sa[block_start as usize] == 0 {
8959                block_start -= 1;
8960            } else {
8961                let mut block_max_end =
8962                    block_start - threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8963                if block_max_end < omp_block_start {
8964                    block_max_end = omp_block_start - 1;
8965                }
8966                let mut block_end = block_start - 1;
8967                while block_end > block_max_end && sa[block_end as usize] != 0 {
8968                    block_end -= 1;
8969                }
8970                let block_size = block_start - block_end;
8971                if block_size < 32 {
8972                    while block_start > block_end {
8973                        let mut p = sa[block_start as usize];
8974                        sa[block_start as usize] = p & SAINT_MAX;
8975                        if p > 0 && t[(p - 1) as usize] > 0 {
8976                            p -= 1;
8977                            let c = t[p as usize] as usize;
8978                            induction_bucket[c] -= 1;
8979                            let mark = if t[(p - SaSint::from(p > 0)) as usize] > t[p as usize] {
8980                                SAINT_MIN
8981                            } else {
8982                                0
8983                            };
8984                            sa[induction_bucket[c] as usize] = p | mark;
8985                        }
8986                        block_start -= 1;
8987                    }
8988                } else {
8989                    final_gsa_scan_right_to_left_16u_block_omp(
8990                        t,
8991                        sa,
8992                        k,
8993                        induction_bucket,
8994                        block_end + 1,
8995                        block_size,
8996                        threads,
8997                        &mut thread_state,
8998                    );
8999                    block_start = block_end;
9000                }
9001            }
9002        }
9003    }
9004}
9005
9006#[allow(dead_code)]
9007fn induce_final_order_16u_omp(
9008    t: &[u16],
9009    sa: &mut [SaSint],
9010    n: SaSint,
9011    k: SaSint,
9012    flags: SaSint,
9013    r: SaSint,
9014    i_out: Option<&mut [SaSint]>,
9015    buckets: &mut [SaSint],
9016    threads: SaSint,
9017    _thread_state: &mut [ThreadState],
9018) -> SaSint {
9019    if (flags & LIBSAIS_FLAGS_BWT) == 0 {
9020        if (flags & LIBSAIS_FLAGS_GSA) != 0 {
9021            buckets[6 * ALPHABET_SIZE] = buckets[7 * ALPHABET_SIZE] - 1;
9022        }
9023
9024        let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9025        let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9026        let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9027
9028        final_sorting_scan_left_to_right_16u_omp(t, sa, n, k, bucket_start, threads);
9029        if threads > 1 && n >= 65_536 {
9030            clear_lms_suffixes_omp(
9031                sa,
9032                n,
9033                ALPHABET_SIZE as SaSint,
9034                bucket_start,
9035                bucket_end,
9036                threads,
9037            );
9038        }
9039
9040        if (flags & LIBSAIS_FLAGS_GSA) != 0 {
9041            flip_suffix_markers_omp(sa, bucket_end[0], threads);
9042            final_gsa_scan_right_to_left_16u_omp(
9043                t,
9044                sa,
9045                bucket_end[0],
9046                n - bucket_end[0],
9047                k,
9048                bucket_end,
9049                threads,
9050            );
9051        } else {
9052            final_sorting_scan_right_to_left_16u_omp(t, sa, 0, n, k, bucket_end, threads);
9053        }
9054
9055        0
9056    } else if let Some(i_out) = i_out {
9057        let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9058        let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9059        let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9060
9061        final_bwt_aux_scan_left_to_right_16u_omp(t, sa, n, k, r - 1, i_out, bucket_start, threads);
9062        if threads > 1 && n >= 65_536 {
9063            clear_lms_suffixes_omp(
9064                sa,
9065                n,
9066                ALPHABET_SIZE as SaSint,
9067                bucket_start,
9068                bucket_end,
9069                threads,
9070            );
9071        }
9072        final_bwt_aux_scan_right_to_left_16u_omp(t, sa, n, k, r - 1, i_out, bucket_end, threads);
9073        0
9074    } else {
9075        let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9076        let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9077        let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9078
9079        final_bwt_scan_left_to_right_16u_omp(t, sa, n, k, bucket_start, threads);
9080        if threads > 1 && n >= 65_536 {
9081            clear_lms_suffixes_omp(
9082                sa,
9083                n,
9084                ALPHABET_SIZE as SaSint,
9085                bucket_start,
9086                bucket_end,
9087                threads,
9088            );
9089        }
9090        final_bwt_scan_right_to_left_16u_omp(t, sa, n, k, bucket_end, threads)
9091    }
9092}
9093
9094#[allow(dead_code)]
9095fn bwt_copy_16u(u: &mut [u16], a: &[SaSint], n: SaSint) {
9096    let mut i = 0isize;
9097    let mut j = n as isize - 7;
9098    while i < j {
9099        u[i as usize] = a[i as usize] as u16;
9100        u[(i + 1) as usize] = a[(i + 1) as usize] as u16;
9101        u[(i + 2) as usize] = a[(i + 2) as usize] as u16;
9102        u[(i + 3) as usize] = a[(i + 3) as usize] as u16;
9103        u[(i + 4) as usize] = a[(i + 4) as usize] as u16;
9104        u[(i + 5) as usize] = a[(i + 5) as usize] as u16;
9105        u[(i + 6) as usize] = a[(i + 6) as usize] as u16;
9106        u[(i + 7) as usize] = a[(i + 7) as usize] as u16;
9107        i += 8;
9108    }
9109
9110    j += 7;
9111    while i < j {
9112        u[i as usize] = a[i as usize] as u16;
9113        i += 1;
9114    }
9115}
9116
9117#[allow(dead_code)]
9118fn bwt_copy_16u_omp(u: &mut [u16], a: &[SaSint], n: SaSint, threads: SaSint) {
9119    if threads == 1 || n < 65_536 {
9120        bwt_copy_16u(u, a, n);
9121        return;
9122    }
9123
9124    let block_stride = (n / threads) & !15;
9125    for thread in 0..threads {
9126        let block_start = thread * block_stride;
9127        let block_size = if thread < threads - 1 {
9128            block_stride
9129        } else {
9130            n - block_start
9131        };
9132        let start = block_start as usize;
9133        bwt_copy_16u(&mut u[start..], &a[start..], block_size);
9134    }
9135}
9136
9137#[allow(dead_code)]
9138fn convert_32u_to_64u(s: &[u32], d: &mut [u64], block_start: usize, block_size: usize) {
9139    for i in block_start..block_start + block_size {
9140        d[i] = u64::from(s[i]);
9141    }
9142}
9143
9144#[allow(dead_code)]
9145fn convert_inplace_32u_to_64u(v: &mut [u32], block_start: usize, block_size: usize) {
9146    for i in (block_start..block_start + block_size).rev() {
9147        v[i + i] = v[i];
9148        v[i + i + 1] = 0;
9149    }
9150}
9151
9152#[allow(dead_code)]
9153fn convert_inplace_64u_to_32u(v: &mut [u32], block_start: usize, block_size: usize) {
9154    for i in block_start..block_start + block_size {
9155        v[i] = v[i + i];
9156    }
9157}
9158
9159#[allow(dead_code)]
9160fn convert_inplace_32u_to_64u_omp(v: &mut [u32], n: SaSint, threads: SaSint) {
9161    let mut n = usize::try_from(n).expect("n must be non-negative");
9162    let threads = usize::try_from(threads.max(1)).expect("threads must be non-negative");
9163
9164    while n >= 65_536 {
9165        let block_size = n >> 1;
9166        n -= block_size;
9167
9168        let omp_block_stride = (block_size / threads) & !15usize;
9169        for thread in 0..threads {
9170            let block_start = thread * omp_block_stride;
9171            let size = if thread + 1 < threads {
9172                omp_block_stride
9173            } else {
9174                block_size - block_start
9175            };
9176            convert_inplace_32u_to_64u(v, n + block_start, size);
9177        }
9178    }
9179
9180    convert_inplace_32u_to_64u(v, 0, n);
9181}
9182
9183#[allow(dead_code)]
9184fn final_bwt_ltr_step(t: &[u16], sa: &mut [SaSint], induction_bucket: &mut [SaSint], index: usize) {
9185    let mut p = sa[index];
9186    sa[index] = p & SAINT_MAX;
9187    if p > 0 {
9188        p -= 1;
9189        let c = t[p as usize] as usize;
9190        sa[index] = t[p as usize] as SaSint | SAINT_MIN;
9191        let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
9192            SAINT_MIN
9193        } else {
9194            0
9195        };
9196        let dst = induction_bucket[c] as usize;
9197        sa[dst] = p | mark;
9198        induction_bucket[c] += 1;
9199    }
9200}
9201
9202#[allow(dead_code)]
9203fn final_bwt_rtl_step(
9204    t: &[u16],
9205    sa: &mut [SaSint],
9206    induction_bucket: &mut [SaSint],
9207    index: usize,
9208    primary_index: &mut SaSint,
9209) {
9210    let mut p = sa[index];
9211    if p == 0 {
9212        *primary_index = index as SaSint;
9213    }
9214    sa[index] = p & SAINT_MAX;
9215    if p > 0 {
9216        p -= 1;
9217        let c0 = t[(p - SaSint::from(p > 0)) as usize];
9218        let c1 = t[p as usize];
9219        sa[index] = c1 as SaSint;
9220        let induced = if c0 <= c1 {
9221            p
9222        } else {
9223            c0 as SaSint | SAINT_MIN
9224        };
9225        induction_bucket[c1 as usize] -= 1;
9226        sa[induction_bucket[c1 as usize] as usize] = induced;
9227    }
9228}
9229
9230#[allow(dead_code)]
9231fn final_bwt_aux_ltr_step(
9232    t: &[u16],
9233    sa: &mut [SaSint],
9234    rm: SaSint,
9235    i_sample: &mut [SaSint],
9236    induction_bucket: &mut [SaSint],
9237    index: usize,
9238) {
9239    let mut p = sa[index];
9240    sa[index] = p & SAINT_MAX;
9241    if p > 0 {
9242        p -= 1;
9243        let c = t[p as usize] as usize;
9244        sa[index] = t[p as usize] as SaSint | SAINT_MIN;
9245        let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
9246            SAINT_MIN
9247        } else {
9248            0
9249        };
9250        let dst = induction_bucket[c] as usize;
9251        sa[dst] = p | mark;
9252        induction_bucket[c] += 1;
9253        if (p & rm) == 0 {
9254            i_sample[(p / (rm + 1)) as usize] = induction_bucket[c];
9255        }
9256    }
9257}
9258
9259#[allow(dead_code)]
9260fn final_bwt_aux_rtl_step(
9261    t: &[u16],
9262    sa: &mut [SaSint],
9263    rm: SaSint,
9264    i_sample: &mut [SaSint],
9265    induction_bucket: &mut [SaSint],
9266    index: usize,
9267) {
9268    let mut p = sa[index];
9269    sa[index] = p & SAINT_MAX;
9270    if p > 0 {
9271        p -= 1;
9272        let c0 = t[(p - SaSint::from(p > 0)) as usize];
9273        let c1 = t[p as usize];
9274        sa[index] = c1 as SaSint;
9275        let induced = if c0 <= c1 {
9276            p
9277        } else {
9278            c0 as SaSint | SAINT_MIN
9279        };
9280        induction_bucket[c1 as usize] -= 1;
9281        sa[induction_bucket[c1 as usize] as usize] = induced;
9282        if (p & rm) == 0 {
9283            i_sample[(p / (rm + 1)) as usize] = induction_bucket[c1 as usize] + 1;
9284        }
9285    }
9286}
9287
9288#[allow(dead_code)]
9289fn main_32s_recursion(
9290    t_ptr: *mut SaSint,
9291    sa_ptr: *mut SaSint,
9292    sa_capacity: usize,
9293    n: SaSint,
9294    k: SaSint,
9295    fs: SaSint,
9296    threads: SaSint,
9297    thread_state: &mut [ThreadState],
9298    local_buffer: &mut [SaSint],
9299) -> SaSint {
9300    let fs = fs.min(SAINT_MAX - n);
9301    let local_buffer_size = SaSint::try_from(LIBSAIS_LOCAL_BUFFER_SIZE).expect("fits");
9302    let n_usize = usize::try_from(n).expect("n must be non-negative");
9303    let fs_usize = usize::try_from(fs).expect("fs must be non-negative");
9304    let total_len = n_usize + fs_usize;
9305    assert!(total_len <= sa_capacity);
9306
9307    if n <= i32::MAX as SaSint && k > 0 {
9308        let doubled_space = i128::from(fs) + i128::from(fs) + i128::from(n) + i128::from(n);
9309        let new_fs = if doubled_space <= i128::from(i32::MAX) {
9310            fs + fs + n
9311        } else {
9312            i32::MAX as SaSint - n
9313        };
9314
9315        if (new_fs / k >= 6)
9316            || (new_fs / k >= 4 && n <= (i32::MAX as SaSint) / 2)
9317            || (new_fs / k < 4 && new_fs >= fs)
9318        {
9319            let t = unsafe { std::slice::from_raw_parts_mut(t_ptr, n_usize) };
9320            let mut t32 = Vec::with_capacity(n_usize);
9321            for &value in t.iter() {
9322                let Ok(value) = i32::try_from(value) else {
9323                    break;
9324                };
9325                t32.push(value);
9326            }
9327
9328            if t32.len() == n_usize {
9329                let mut sa32 = vec![0_i32; n_usize + usize::try_from(new_fs).expect("fits")];
9330                let index = crate::libsais16::libsais16_int_omp(
9331                    &mut t32,
9332                    &mut sa32,
9333                    k as i32,
9334                    new_fs as i32,
9335                    threads as i32,
9336                ) as SaSint;
9337
9338                if index >= 0 {
9339                    let sa = unsafe { std::slice::from_raw_parts_mut(sa_ptr, n_usize) };
9340                    for (dst, src) in sa.iter_mut().zip(sa32.iter()) {
9341                        *dst = SaSint::from(*src);
9342                    }
9343                }
9344
9345                return index;
9346            }
9347        }
9348    }
9349
9350    if k > 0 && ((fs / k) >= 6 || (local_buffer_size / k) >= 6) {
9351        let k_usize = usize::try_from(k).expect("k must be non-negative");
9352        let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 6 {
9353            1024usize
9354        } else {
9355            16usize
9356        };
9357        let need = 6 * k_usize;
9358        let use_local_buffer = local_buffer_size > fs;
9359        let buckets_ptr = if use_local_buffer {
9360            local_buffer.as_mut_ptr()
9361        } else {
9362            unsafe {
9363                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9364                let start =
9365                    if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 6 {
9366                        let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
9367                        let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9368                        (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9369                    } else {
9370                        total_len - need
9371                    };
9372                sa[start..].as_mut_ptr()
9373            }
9374        };
9375
9376        let m = unsafe {
9377            let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9378            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9379            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9380            count_and_gather_lms_suffixes_32s_4k_omp(
9381                t,
9382                sa,
9383                n,
9384                k,
9385                buckets,
9386                SaSint::from(use_local_buffer),
9387                threads,
9388                thread_state,
9389            )
9390        };
9391        if m > 1 {
9392            let m_usize = usize::try_from(m).expect("m must be non-negative");
9393            unsafe {
9394                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9395                sa[..n_usize - m_usize].fill(0);
9396            }
9397
9398            let first_lms_suffix = unsafe {
9399                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9400                sa[n_usize - m_usize]
9401            };
9402            let left_suffixes_count = unsafe {
9403                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9404                initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
9405                    std::slice::from_raw_parts(t_ptr, n_usize),
9406                    k,
9407                    buckets,
9408                    first_lms_suffix,
9409                )
9410            };
9411
9412            unsafe {
9413                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9414                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9415                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9416                let (_, induction_bucket) = buckets.split_at_mut(4 * k_usize);
9417                radix_sort_lms_suffixes_32s_6k_omp(t, sa, n, m, induction_bucket, threads);
9418                if (n / 8192) < k {
9419                    radix_sort_set_markers_32s_6k_omp(sa, k, induction_bucket, threads);
9420                }
9421                if threads > 1 && n >= 65_536 {
9422                    sa[n_usize - m_usize..n_usize].fill(0);
9423                }
9424                initialize_buckets_for_partial_sorting_32s_6k(
9425                    t,
9426                    k,
9427                    buckets,
9428                    first_lms_suffix,
9429                    left_suffixes_count,
9430                );
9431                induce_partial_order_32s_6k_omp(
9432                    t,
9433                    sa,
9434                    n,
9435                    k,
9436                    buckets,
9437                    first_lms_suffix,
9438                    left_suffixes_count,
9439                    threads,
9440                    thread_state,
9441                );
9442            }
9443
9444            let names = unsafe {
9445                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9446                if (n / 8192) < k {
9447                    renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
9448                        sa,
9449                        n,
9450                        m,
9451                        threads,
9452                        thread_state,
9453                    )
9454                } else {
9455                    renumber_and_gather_lms_suffixes_omp(sa, n, m, fs, threads, thread_state)
9456                }
9457            };
9458
9459            if names < m {
9460                let f = if (n / 8192) < k {
9461                    unsafe {
9462                        let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9463                        let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9464                        compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9465                    }
9466                } else {
9467                    0
9468                };
9469
9470                let new_t_start =
9471                    total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9472                if main_32s_recursion(
9473                    unsafe {
9474                        std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9475                            .as_mut_ptr()
9476                    },
9477                    sa_ptr,
9478                    sa_capacity,
9479                    m - f,
9480                    names - f,
9481                    fs + n - 2 * m + f,
9482                    threads,
9483                    thread_state,
9484                    local_buffer,
9485                ) != 0
9486                {
9487                    return -2;
9488                }
9489
9490                unsafe {
9491                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9492                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9493                    let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9494                    reconstruct_compacted_lms_suffixes_32s_2k_omp(
9495                        t,
9496                        sa,
9497                        n,
9498                        k,
9499                        m,
9500                        fs,
9501                        f,
9502                        buckets,
9503                        SaSint::from(use_local_buffer),
9504                        threads,
9505                        thread_state,
9506                    );
9507                }
9508            } else {
9509                unsafe {
9510                    let t = std::slice::from_raw_parts(t_ptr, n_usize);
9511                    let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9512                    count_lms_suffixes_32s_2k(t, n, k, buckets);
9513                }
9514            }
9515
9516            unsafe {
9517                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9518                initialize_buckets_start_and_end_32s_4k(k, buckets);
9519                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9520                place_lms_suffixes_histogram_32s_4k(sa, n, k, m, buckets);
9521                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9522                induce_final_order_32s_4k(t, sa, n, k, buckets, threads, thread_state);
9523            }
9524        } else {
9525            unsafe {
9526                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9527                sa[0] = sa[n_usize - 1];
9528            }
9529
9530            unsafe {
9531                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9532                initialize_buckets_start_and_end_32s_6k(k, buckets);
9533                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9534                place_lms_suffixes_histogram_32s_6k(sa, n, k, m, buckets);
9535                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9536                induce_final_order_32s_6k(t, sa, n, k, buckets, threads, thread_state);
9537            }
9538        }
9539
9540        return 0;
9541    } else if k > 0 && n <= SAINT_MAX / 2 && ((fs / k) >= 4 || (local_buffer_size / k) >= 4) {
9542        let k_usize = usize::try_from(k).expect("k must be non-negative");
9543        let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 4 {
9544            1024usize
9545        } else {
9546            16usize
9547        };
9548        let need = 4 * k_usize;
9549        let use_local_buffer = local_buffer_size > fs;
9550        let buckets_ptr = if use_local_buffer {
9551            local_buffer.as_mut_ptr()
9552        } else {
9553            unsafe {
9554                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9555                let start =
9556                    if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 4 {
9557                        let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
9558                        let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9559                        (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9560                    } else {
9561                        total_len - need
9562                    };
9563                sa[start..].as_mut_ptr()
9564            }
9565        };
9566
9567        let m = unsafe {
9568            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9569            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9570            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9571            count_and_gather_lms_suffixes_32s_2k_omp(
9572                t,
9573                sa,
9574                n,
9575                k,
9576                buckets,
9577                SaSint::from(use_local_buffer),
9578                threads,
9579                thread_state,
9580            )
9581        };
9582        if m > 1 {
9583            let m_usize = usize::try_from(m).expect("m must be non-negative");
9584            unsafe {
9585                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9586                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9587                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9588                initialize_buckets_for_radix_and_partial_sorting_32s_4k(
9589                    t,
9590                    k,
9591                    buckets,
9592                    sa[n_usize - m_usize],
9593                );
9594                let (_, induction_bucket) = buckets.split_at_mut(1);
9595                radix_sort_lms_suffixes_32s_2k_omp(t, sa, n, m, induction_bucket, threads);
9596                radix_sort_set_markers_32s_4k_omp(sa, k, induction_bucket, threads);
9597                place_lms_suffixes_interval_32s_4k(sa, n, k, m - 1, buckets);
9598                induce_partial_order_32s_4k_omp(t, sa, n, k, buckets, threads, thread_state);
9599            }
9600
9601            let names = unsafe {
9602                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9603                renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(sa, n, m, threads, thread_state)
9604            };
9605            if names < m {
9606                let f = unsafe {
9607                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9608                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9609                    compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9610                };
9611
9612                let new_t_start =
9613                    total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9614                if main_32s_recursion(
9615                    unsafe {
9616                        std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9617                            .as_mut_ptr()
9618                    },
9619                    sa_ptr,
9620                    sa_capacity,
9621                    m - f,
9622                    names - f,
9623                    fs + n - 2 * m + f,
9624                    threads,
9625                    thread_state,
9626                    local_buffer,
9627                ) != 0
9628                {
9629                    return -2;
9630                }
9631
9632                unsafe {
9633                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9634                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9635                    let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9636                    reconstruct_compacted_lms_suffixes_32s_2k_omp(
9637                        t,
9638                        sa,
9639                        n,
9640                        k,
9641                        m,
9642                        fs,
9643                        f,
9644                        buckets,
9645                        SaSint::from(use_local_buffer),
9646                        threads,
9647                        thread_state,
9648                    );
9649                }
9650            } else {
9651                unsafe {
9652                    let t = std::slice::from_raw_parts(t_ptr, n_usize);
9653                    let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9654                    count_lms_suffixes_32s_2k(t, n, k, buckets);
9655                }
9656            }
9657        } else {
9658            unsafe {
9659                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9660                sa[0] = sa[n_usize - 1];
9661            }
9662        }
9663
9664        unsafe {
9665            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9666            initialize_buckets_start_and_end_32s_4k(k, buckets);
9667            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9668            place_lms_suffixes_histogram_32s_4k(sa, n, k, m, buckets);
9669            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9670            induce_final_order_32s_4k(t, sa, n, k, buckets, threads, thread_state);
9671        }
9672
9673        return 0;
9674    } else if k > 0 && ((fs / k) >= 2 || (local_buffer_size / k) >= 2) {
9675        let k_usize = usize::try_from(k).expect("k must be non-negative");
9676        let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 2 {
9677            1024usize
9678        } else {
9679            16usize
9680        };
9681        let need = 2 * k_usize;
9682        let use_local_buffer = local_buffer_size > fs;
9683        let buckets_ptr = if use_local_buffer {
9684            local_buffer.as_mut_ptr()
9685        } else {
9686            unsafe {
9687                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9688                let start =
9689                    if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 2 {
9690                        let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
9691                        let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9692                        (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9693                    } else {
9694                        total_len - need
9695                    };
9696                sa[start..].as_mut_ptr()
9697            }
9698        };
9699
9700        let m = unsafe {
9701            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9702            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9703            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9704            count_and_gather_lms_suffixes_32s_2k_omp(
9705                t,
9706                sa,
9707                n,
9708                k,
9709                buckets,
9710                SaSint::from(use_local_buffer),
9711                threads,
9712                thread_state,
9713            )
9714        };
9715        if m > 1 {
9716            let m_usize = usize::try_from(m).expect("m must be non-negative");
9717            unsafe {
9718                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9719                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9720                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9721                initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
9722                    t,
9723                    k,
9724                    buckets,
9725                    sa[n_usize - m_usize],
9726                );
9727                let (_, induction_bucket) = buckets.split_at_mut(1);
9728                radix_sort_lms_suffixes_32s_2k_omp(t, sa, n, m, induction_bucket, threads);
9729                place_lms_suffixes_interval_32s_2k(sa, n, k, m - 1, buckets);
9730            }
9731
9732            unsafe {
9733                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9734                initialize_buckets_start_and_end_32s_2k(k, buckets);
9735                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9736                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9737                induce_partial_order_32s_2k_omp(t, sa, n, k, buckets, threads, thread_state);
9738            }
9739
9740            let names = unsafe {
9741                let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9742                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9743                renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(t, sa, n, m, threads)
9744            };
9745            if names < m {
9746                let f = unsafe {
9747                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9748                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9749                    compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9750                };
9751
9752                let new_t_start =
9753                    total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9754                if main_32s_recursion(
9755                    unsafe {
9756                        std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9757                            .as_mut_ptr()
9758                    },
9759                    sa_ptr,
9760                    sa_capacity,
9761                    m - f,
9762                    names - f,
9763                    fs + n - 2 * m + f,
9764                    threads,
9765                    thread_state,
9766                    local_buffer,
9767                ) != 0
9768                {
9769                    return -2;
9770                }
9771
9772                unsafe {
9773                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9774                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9775                    let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9776                    reconstruct_compacted_lms_suffixes_32s_2k_omp(
9777                        t,
9778                        sa,
9779                        n,
9780                        k,
9781                        m,
9782                        fs,
9783                        f,
9784                        buckets,
9785                        SaSint::from(use_local_buffer),
9786                        threads,
9787                        thread_state,
9788                    );
9789                }
9790            } else {
9791                unsafe {
9792                    let t = std::slice::from_raw_parts(t_ptr, n_usize);
9793                    let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9794                    count_lms_suffixes_32s_2k(t, n, k, buckets);
9795                }
9796            }
9797        } else {
9798            unsafe {
9799                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9800                sa[0] = sa[n_usize - 1];
9801            }
9802        }
9803
9804        unsafe {
9805            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9806            initialize_buckets_end_32s_2k(k, buckets);
9807            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9808            place_lms_suffixes_histogram_32s_2k(sa, n, k, m, buckets);
9809        }
9810
9811        unsafe {
9812            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9813            initialize_buckets_start_and_end_32s_2k(k, buckets);
9814            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9815            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9816            induce_final_order_32s_2k(t, sa, n, k, buckets, threads, thread_state);
9817        }
9818
9819        0
9820    } else {
9821        let k_usize = usize::try_from(k).expect("k must be non-negative");
9822        let mut heap_buckets = if fs < k { Some(vec![0; k_usize]) } else { None };
9823        let alignment = if fs >= 1024 && (fs - 1024) >= k {
9824            1024usize
9825        } else {
9826            16usize
9827        };
9828        let mut buckets_ptr = if let Some(ref mut heap) = heap_buckets {
9829            heap.as_mut_ptr()
9830        } else {
9831            unsafe {
9832                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9833                let start = if fs_usize >= k_usize + alignment {
9834                    let byte_ptr = sa[total_len - k_usize - alignment..].as_mut_ptr() as usize;
9835                    let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9836                    (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9837                } else {
9838                    total_len - k_usize
9839                };
9840                sa[start..].as_mut_ptr()
9841            }
9842        };
9843
9844        if buckets_ptr.is_null() {
9845            return -2;
9846        }
9847
9848        unsafe {
9849            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9850            sa[..n_usize].fill(0);
9851        }
9852
9853        unsafe {
9854            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9855            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9856            count_suffixes_32s(t, n, k, buckets);
9857            initialize_buckets_end_32s_1k(k, buckets);
9858        }
9859
9860        let m = unsafe {
9861            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9862            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9863            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9864            radix_sort_lms_suffixes_32s_1k(t, sa, n, buckets)
9865        };
9866        if m > 1 {
9867            unsafe {
9868                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9869                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9870                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9871                induce_partial_order_32s_1k_omp(t, sa, n, k, buckets, threads, thread_state);
9872            }
9873
9874            let names = unsafe {
9875                let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9876                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9877                renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(t, sa, n, m, threads)
9878            };
9879            if names < m {
9880                if heap_buckets.is_some() {
9881                    let _ = heap_buckets.take();
9882                    buckets_ptr = std::ptr::null_mut();
9883                }
9884
9885                let f = unsafe {
9886                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9887                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9888                    compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9889                };
9890
9891                let new_t_start =
9892                    total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9893                if main_32s_recursion(
9894                    unsafe {
9895                        std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9896                            .as_mut_ptr()
9897                    },
9898                    sa_ptr,
9899                    sa_capacity,
9900                    m - f,
9901                    names - f,
9902                    fs + n - 2 * m + f,
9903                    threads,
9904                    thread_state,
9905                    local_buffer,
9906                ) != 0
9907                {
9908                    return -2;
9909                }
9910
9911                unsafe {
9912                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9913                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9914                    reconstruct_compacted_lms_suffixes_32s_1k_omp(t, sa, n, m, fs, f, threads);
9915                }
9916
9917                if buckets_ptr.is_null() {
9918                    heap_buckets = Some(vec![0; k_usize]);
9919                    buckets_ptr = heap_buckets.as_mut().unwrap().as_mut_ptr();
9920                    if buckets_ptr.is_null() {
9921                        return -2;
9922                    }
9923                }
9924            }
9925
9926            unsafe {
9927                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9928                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9929                count_suffixes_32s(t, n, k, buckets);
9930                initialize_buckets_end_32s_1k(k, buckets);
9931            }
9932            unsafe {
9933                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9934                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9935                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9936                place_lms_suffixes_interval_32s_1k(t, sa, k, m, buckets);
9937            }
9938        }
9939
9940        unsafe {
9941            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9942            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9943            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9944            induce_final_order_32s_1k(t, sa, n, k, buckets, threads, thread_state);
9945        }
9946
9947        0
9948    }
9949}
9950
9951#[allow(dead_code)]
9952fn main_32s_entry(
9953    t_ptr: *mut SaSint,
9954    sa: &mut [SaSint],
9955    n: SaSint,
9956    k: SaSint,
9957    fs: SaSint,
9958    threads: SaSint,
9959    thread_state: &mut [ThreadState],
9960) -> SaSint {
9961    let mut local_buffer = [0; 2 * LIBSAIS_LOCAL_BUFFER_SIZE];
9962    main_32s_recursion(
9963        t_ptr,
9964        sa.as_mut_ptr(),
9965        sa.len(),
9966        n,
9967        k,
9968        fs,
9969        threads,
9970        thread_state,
9971        &mut local_buffer[LIBSAIS_LOCAL_BUFFER_SIZE..],
9972    )
9973}
9974
9975#[allow(dead_code)]
9976fn main_16u(
9977    t: &[u16],
9978    sa: &mut [SaSint],
9979    n: SaSint,
9980    buckets: &mut [SaSint],
9981    flags: SaSint,
9982    r: SaSint,
9983    i_out: Option<&mut [SaSint]>,
9984    fs: SaSint,
9985    freq: Option<&mut [SaSint]>,
9986    threads: SaSint,
9987    thread_state: &mut [ThreadState],
9988) -> SaSint {
9989    let fs = fs.min(SAINT_MAX - n);
9990
9991    let m = count_and_gather_lms_suffixes_16u_omp(t, sa, n, buckets, threads, thread_state);
9992    let k = initialize_buckets_start_and_end_16u(buckets, freq);
9993
9994    if (flags & LIBSAIS_FLAGS_GSA) != 0 && (buckets[0] != 0 || buckets[2] != 0 || buckets[3] != 1) {
9995        return -1;
9996    }
9997
9998    if m > 0 {
9999        let first_lms_suffix = sa[(n - m) as usize];
10000        let left_suffixes_count =
10001            initialize_buckets_for_lms_suffixes_radix_sort_16u(t, buckets, first_lms_suffix);
10002
10003        if threads > 1 && n >= 65_536 {
10004            sa[..(n - m) as usize].fill(0);
10005        }
10006        radix_sort_lms_suffixes_16u_omp(t, sa, n, m, flags, buckets, threads, thread_state);
10007        if threads > 1 && n >= 65_536 {
10008            sa[(n - m) as usize..n as usize].fill(0);
10009        }
10010
10011        initialize_buckets_for_partial_sorting_16u(
10012            t,
10013            buckets,
10014            first_lms_suffix,
10015            left_suffixes_count,
10016        );
10017        induce_partial_order_16u_omp(
10018            t,
10019            sa,
10020            n,
10021            k,
10022            flags,
10023            buckets,
10024            first_lms_suffix,
10025            left_suffixes_count,
10026            threads,
10027        );
10028
10029        let names = renumber_and_gather_lms_suffixes_omp(sa, n, m, fs, threads, thread_state);
10030        if names < m {
10031            let recursive_t_start = (n + fs - m) as usize;
10032            let recursive_t_ptr = sa[recursive_t_start..].as_mut_ptr();
10033            if main_32s_entry(
10034                recursive_t_ptr,
10035                sa,
10036                m,
10037                names,
10038                fs + n - 2 * m,
10039                threads,
10040                thread_state,
10041            ) != 0
10042            {
10043                return -2;
10044            }
10045
10046            gather_lms_suffixes_16u_omp(t, sa, n, threads, thread_state);
10047            reconstruct_lms_suffixes_omp(sa, n, m, threads);
10048        }
10049
10050        place_lms_suffixes_interval_16u(sa, n, m, flags, buckets);
10051    } else {
10052        sa[..n as usize].fill(0);
10053    }
10054
10055    induce_final_order_16u_omp(t, sa, n, k, flags, r, i_out, buckets, threads, thread_state)
10056}
10057
10058#[allow(dead_code)]
10059fn main_16u_alloc(
10060    t: &[u16],
10061    sa: &mut [SaSint],
10062    flags: SaSint,
10063    r: SaSint,
10064    i_out: Option<&mut [SaSint]>,
10065    fs: SaSint,
10066    freq: Option<&mut [SaSint]>,
10067    threads: SaSint,
10068) -> SaSint {
10069    if fs < 0
10070        || threads < 0
10071        || sa.len()
10072            < t.len()
10073                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10074        || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10075    {
10076        return -1;
10077    }
10078
10079    fill_freq(t, freq);
10080    if t.len() <= 1 {
10081        if t.len() == 1 {
10082            sa[0] = 0;
10083        }
10084        return if (flags & LIBSAIS_FLAGS_BWT) != 0 {
10085            t.len() as SaSint
10086        } else {
10087            0
10088        };
10089    }
10090
10091    let mut buckets = vec![0; 8 * ALPHABET_SIZE];
10092    let threads = normalize_threads(threads);
10093    let mut thread_state = if threads > 1 {
10094        match alloc_thread_state(threads) {
10095            Some(thread_state) => thread_state,
10096            None => return -2,
10097        }
10098    } else {
10099        Vec::new()
10100    };
10101
10102    main_16u(
10103        t,
10104        sa,
10105        t.len() as SaSint,
10106        &mut buckets,
10107        flags,
10108        r,
10109        i_out,
10110        fs,
10111        None,
10112        threads,
10113        &mut thread_state,
10114    )
10115}
10116
10117fn main_16u_ctx(
10118    ctx: &mut Context,
10119    t: &[u16],
10120    sa: &mut [SaSint],
10121    flags: SaSint,
10122    r: SaSint,
10123    i_out: Option<&mut [SaSint]>,
10124    fs: SaSint,
10125    freq: Option<&mut [SaSint]>,
10126) -> SaSint {
10127    if fs < 0
10128        || sa.len()
10129            < t.len()
10130                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10131        || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10132    {
10133        return -1;
10134    }
10135
10136    if ctx.threads <= 0 || ctx.buckets.len() < 8 * ALPHABET_SIZE {
10137        return -2;
10138    }
10139
10140    fill_freq(t, freq);
10141    if t.len() <= 1 {
10142        if t.len() == 1 {
10143            sa[0] = 0;
10144        }
10145        return if (flags & LIBSAIS_FLAGS_BWT) != 0 {
10146            t.len() as SaSint
10147        } else {
10148            0
10149        };
10150    }
10151
10152    let mut empty_thread_state = [];
10153    let thread_state = if ctx.threads > 1 {
10154        match ctx.thread_state.as_deref_mut() {
10155            Some(thread_state) if thread_state.len() >= ctx.threads as usize => thread_state,
10156            None => return -2,
10157            Some(_) => return -2,
10158        }
10159    } else {
10160        &mut empty_thread_state
10161    };
10162
10163    main_16u(
10164        t,
10165        sa,
10166        t.len() as SaSint,
10167        &mut ctx.buckets,
10168        flags,
10169        r,
10170        i_out,
10171        fs,
10172        None,
10173        ctx.threads,
10174        thread_state,
10175    )
10176}
10177
10178fn main_long(
10179    t: &mut [SaSint],
10180    sa: &mut [SaSint],
10181    k: SaSint,
10182    fs: SaSint,
10183    threads: SaSint,
10184) -> SaSint {
10185    let threads = normalize_threads(threads);
10186    let mut thread_state = if threads > 1 {
10187        match alloc_thread_state(threads) {
10188            Some(thread_state) => thread_state,
10189            None => return -2,
10190        }
10191    } else {
10192        Vec::new()
10193    };
10194
10195    main_32s_entry(
10196        t.as_mut_ptr(),
10197        sa,
10198        t.len() as SaSint,
10199        k,
10200        fs,
10201        threads,
10202        &mut thread_state,
10203    )
10204}
10205
10206/// Constructs the suffix array of a given 16-bit string.
10207///
10208/// - `t` (`[0..n-1]`): the input 16-bit string.
10209/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10210/// - `fs`: extra space available at the end of `sa` (0 should be enough for most cases).
10211/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10212///
10213/// Returns 0 on success, -1 or -2 on error.
10214pub fn libsais16x64(
10215    t: &[u16],
10216    sa: &mut [SaSint],
10217    fs: SaSint,
10218    freq: Option<&mut [SaSint]>,
10219) -> SaSint {
10220    main_16u_alloc(t, sa, 0, 0, None, fs, freq, 1)
10221}
10222
10223/// Constructs the generalized suffix array (GSA) of a given 16-bit string set.
10224///
10225/// - `t` (`[0..n-1]`): the input 16-bit string set using 0 as separators (`t[n-1]` must be 0).
10226/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10227/// - `fs`: extra space available at the end of `sa` (0 should be enough for most cases).
10228/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10229///
10230/// Returns 0 on success, -1 or -2 on error.
10231pub fn libsais16x64_gsa(
10232    t: &[u16],
10233    sa: &mut [SaSint],
10234    fs: SaSint,
10235    freq: Option<&mut [SaSint]>,
10236) -> SaSint {
10237    main_16u_alloc(t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq, 1)
10238}
10239
10240/// Alias for `libsais16x64_long`. See its documentation.
10241pub fn libsais16x64_int(t: &mut [SaSint], sa: &mut [SaSint], k: SaSint, fs: SaSint) -> SaSint {
10242    if fs < 0
10243        || sa.len()
10244            < t.len()
10245                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10246    {
10247        return -1;
10248    }
10249
10250    if t.len() <= 1 {
10251        if t.len() == 1 {
10252            sa[0] = 0;
10253        }
10254        return 0;
10255    }
10256
10257    main_long(t, sa, k, fs, 1)
10258}
10259
10260/// Constructs the suffix array of a given integer array.
10261///
10262/// During construction the input array is modified, but restored at the end if no error occurred.
10263///
10264/// - `t` (`[0..n-1]`): the input integer array.
10265/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10266/// - `k`: the alphabet size of the input integer array.
10267/// - `fs`: extra space available at the end of `sa` (can be 0, but 4k or better 6k is recommended for optimal performance).
10268///
10269/// Returns 0 on success, -1 or -2 on error.
10270pub fn libsais16x64_long(t: &mut [SaSint], sa: &mut [SaSint], k: SaSint, fs: SaSint) -> SaSint {
10271    libsais16x64_int(t, sa, k, fs)
10272}
10273
10274/// Constructs the suffix array of a given 16-bit string using a libsais16x64 context.
10275///
10276/// - `ctx`: the libsais16x64 context.
10277/// - `t` (`[0..n-1]`): the input 16-bit string.
10278/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10279/// - `fs`: extra space available at the end of `sa` (0 should be enough for most cases).
10280/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10281///
10282/// Returns 0 on success, -1 or -2 on error.
10283pub fn libsais16x64_ctx(
10284    ctx: &mut Context,
10285    t: &[u16],
10286    sa: &mut [SaSint],
10287    fs: SaSint,
10288    freq: Option<&mut [SaSint]>,
10289) -> SaSint {
10290    main_16u_ctx(ctx, t, sa, 0, 0, None, fs, freq)
10291}
10292
10293/// Constructs the generalized suffix array (GSA) of a given 16-bit string set using a libsais16x64 context.
10294///
10295/// - `ctx`: the libsais16x64 context.
10296/// - `t` (`[0..n-1]`): the input 16-bit string set using 0 as separators (`t[n-1]` must be 0).
10297/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10298/// - `fs`: extra space available at the end of `sa` (0 should be enough for most cases).
10299/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10300///
10301/// Returns 0 on success, -1 or -2 on error.
10302pub fn libsais16x64_gsa_ctx(
10303    ctx: &mut Context,
10304    t: &[u16],
10305    sa: &mut [SaSint],
10306    fs: SaSint,
10307    freq: Option<&mut [SaSint]>,
10308) -> SaSint {
10309    main_16u_ctx(ctx, t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq)
10310}
10311
10312/// Constructs the suffix array of a given 16-bit string in parallel using OpenMP-style threading.
10313///
10314/// - `t` (`[0..n-1]`): the input 16-bit string.
10315/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10316/// - `fs`: extra space available at the end of `sa` (0 should be enough for most cases).
10317/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10318/// - `threads`: number of worker threads (can be 0 for the implementation default).
10319///
10320/// Returns 0 on success, -1 or -2 on error.
10321pub fn libsais16x64_omp(
10322    t: &[u16],
10323    sa: &mut [SaSint],
10324    fs: SaSint,
10325    freq: Option<&mut [SaSint]>,
10326    threads: SaSint,
10327) -> SaSint {
10328    if threads < 0 {
10329        -1
10330    } else {
10331        main_16u_alloc(t, sa, 0, 0, None, fs, freq, threads)
10332    }
10333}
10334
10335/// Constructs the generalized suffix array (GSA) of a given 16-bit string set in parallel using OpenMP-style threading.
10336///
10337/// - `t` (`[0..n-1]`): the input 16-bit string set using 0 as separators (`t[n-1]` must be 0).
10338/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10339/// - `fs`: extra space available at the end of `sa` (0 should be enough for most cases).
10340/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10341/// - `threads`: number of worker threads (can be 0 for the implementation default).
10342///
10343/// Returns 0 on success, -1 or -2 on error.
10344pub fn libsais16x64_gsa_omp(
10345    t: &[u16],
10346    sa: &mut [SaSint],
10347    fs: SaSint,
10348    freq: Option<&mut [SaSint]>,
10349    threads: SaSint,
10350) -> SaSint {
10351    if threads < 0 {
10352        -1
10353    } else {
10354        main_16u_alloc(t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq, threads)
10355    }
10356}
10357
10358/// Alias for `libsais16x64_long_omp`. See its documentation.
10359pub fn libsais16x64_int_omp(
10360    t: &mut [SaSint],
10361    sa: &mut [SaSint],
10362    k: SaSint,
10363    fs: SaSint,
10364    threads: SaSint,
10365) -> SaSint {
10366    if threads < 0
10367        || fs < 0
10368        || sa.len()
10369            < t.len()
10370                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10371    {
10372        return -1;
10373    }
10374
10375    if t.len() <= 1 {
10376        if t.len() == 1 {
10377            sa[0] = 0;
10378        }
10379        return 0;
10380    }
10381
10382    main_long(t, sa, k, fs, threads)
10383}
10384
10385/// Constructs the suffix array of a given integer array in parallel using OpenMP-style threading.
10386///
10387/// During construction the input array is modified, but restored at the end if no error occurred.
10388///
10389/// - `t` (`[0..n-1]`): the input integer array.
10390/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10391/// - `k`: the alphabet size of the input integer array.
10392/// - `fs`: extra space available at the end of `sa` (can be 0, but 4k or better 6k is recommended for optimal performance).
10393/// - `threads`: number of worker threads (can be 0 for the implementation default).
10394///
10395/// Returns 0 on success, -1 or -2 on error.
10396pub fn libsais16x64_long_omp(
10397    t: &mut [SaSint],
10398    sa: &mut [SaSint],
10399    k: SaSint,
10400    fs: SaSint,
10401    threads: SaSint,
10402) -> SaSint {
10403    libsais16x64_int_omp(t, sa, k, fs, threads)
10404}
10405
10406fn build_bwt(
10407    t: &[u16],
10408    u: &mut [u16],
10409    a: &mut [SaSint],
10410    fs: SaSint,
10411    freq: Option<&mut [SaSint]>,
10412    threads: SaSint,
10413) -> SaSint {
10414    if fs < 0
10415        || threads < 0
10416        || u.len() < t.len()
10417        || a.len()
10418            < t.len()
10419                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10420        || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10421    {
10422        return -1;
10423    }
10424    if t.len() <= 1 {
10425        fill_freq(t, freq);
10426        if t.len() == 1 {
10427            u[0] = t[0];
10428        }
10429        return t.len() as SaSint;
10430    }
10431
10432    let n = t.len();
10433    let mut index = main_16u_alloc(t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq, threads);
10434    if index >= 0 {
10435        index += 1;
10436        u[0] = t[n - 1];
10437        bwt_copy_16u(&mut u[1..], a, index - 1);
10438        bwt_copy_16u(
10439            &mut u[index as usize..],
10440            &a[index as usize..],
10441            n as SaSint - index,
10442        );
10443    }
10444    index
10445}
10446
10447/// Constructs the Burrows-Wheeler transformed 16-bit string (BWT) of a given 16-bit string.
10448///
10449/// - `t` (`[0..n-1]`): the input 16-bit string.
10450/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
10451/// - `a` (`[0..n-1+fs]`): the temporary array.
10452/// - `fs`: extra space available at the end of `a` (0 should be enough for most cases).
10453/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10454///
10455/// Returns the primary index on success, -1 or -2 on error.
10456pub fn libsais16x64_bwt(
10457    t: &[u16],
10458    u: &mut [u16],
10459    a: &mut [SaSint],
10460    fs: SaSint,
10461    freq: Option<&mut [SaSint]>,
10462) -> SaSint {
10463    build_bwt(t, u, a, fs, freq, 1)
10464}
10465
10466fn build_bwt_aux(
10467    t: &[u16],
10468    u: &mut [u16],
10469    a: &mut [SaSint],
10470    fs: SaSint,
10471    freq: Option<&mut [SaSint]>,
10472    r: SaSint,
10473    i: &mut [SaSint],
10474    threads: SaSint,
10475) -> SaSint {
10476    if threads < 0 || r < 2 || (r & (r - 1)) != 0 {
10477        return -1;
10478    }
10479    let samples = if t.is_empty() {
10480        1
10481    } else {
10482        (t.len() - 1) / r as usize + 1
10483    };
10484    if i.len() < samples {
10485        return -1;
10486    }
10487    let n = t.len();
10488    if n <= 1 {
10489        fill_freq(t, freq);
10490        if n == 1 {
10491            u[0] = t[0];
10492        }
10493        i[0] = n as SaSint;
10494        return 0;
10495    }
10496
10497    let index = main_16u_alloc(t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq, threads);
10498    if index == 0 {
10499        u[0] = t[n - 1];
10500        bwt_copy_16u(&mut u[1..], a, i[0] - 1);
10501        bwt_copy_16u(
10502            &mut u[i[0] as usize..],
10503            &a[i[0] as usize..],
10504            n as SaSint - i[0],
10505        );
10506    }
10507    index
10508}
10509
10510/// Constructs the Burrows-Wheeler transformed 16-bit string (BWT) of a given 16-bit string with auxiliary indexes.
10511///
10512/// - `t` (`[0..n-1]`): the input 16-bit string.
10513/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
10514/// - `a` (`[0..n-1+fs]`): the temporary array.
10515/// - `fs`: extra space available at the end of `a` (0 should be enough for most cases).
10516/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10517/// - `r`: sampling rate for the auxiliary indexes (must be a power of two).
10518/// - `i` (`[0..(n-1)/r]`): output auxiliary indexes.
10519///
10520/// Returns 0 on success, -1 or -2 on error.
10521pub fn libsais16x64_bwt_aux(
10522    t: &[u16],
10523    u: &mut [u16],
10524    a: &mut [SaSint],
10525    fs: SaSint,
10526    freq: Option<&mut [SaSint]>,
10527    r: SaSint,
10528    i: &mut [SaSint],
10529) -> SaSint {
10530    build_bwt_aux(t, u, a, fs, freq, r, i, 1)
10531}
10532
10533/// Constructs the Burrows-Wheeler transformed 16-bit string (BWT) of a given 16-bit string using a libsais16x64 context.
10534///
10535/// - `ctx`: the libsais16x64 context.
10536/// - `t` (`[0..n-1]`): the input 16-bit string.
10537/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
10538/// - `a` (`[0..n-1+fs]`): the temporary array.
10539/// - `fs`: extra space available at the end of `a` (0 should be enough for most cases).
10540/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10541///
10542/// Returns the primary index on success, -1 or -2 on error.
10543pub fn libsais16x64_bwt_ctx(
10544    ctx: &mut Context,
10545    t: &[u16],
10546    u: &mut [u16],
10547    a: &mut [SaSint],
10548    fs: SaSint,
10549    freq: Option<&mut [SaSint]>,
10550) -> SaSint {
10551    if fs < 0
10552        || u.len() < t.len()
10553        || a.len()
10554            < t.len()
10555                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10556        || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10557    {
10558        return -1;
10559    }
10560    if t.len() <= 1 {
10561        fill_freq(t, freq);
10562        if t.len() == 1 {
10563            u[0] = t[0];
10564        }
10565        return t.len() as SaSint;
10566    }
10567
10568    let n = t.len();
10569    let mut index = main_16u_ctx(ctx, t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq);
10570    if index >= 0 {
10571        index += 1;
10572        u[0] = t[n - 1];
10573        bwt_copy_16u(&mut u[1..], a, index - 1);
10574        bwt_copy_16u(
10575            &mut u[index as usize..],
10576            &a[index as usize..],
10577            n as SaSint - index,
10578        );
10579    }
10580    index
10581}
10582
10583/// Constructs the BWT of a given 16-bit string with auxiliary indexes using a libsais16x64 context.
10584///
10585/// - `ctx`: the libsais16x64 context.
10586/// - `t` (`[0..n-1]`): the input 16-bit string.
10587/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
10588/// - `a` (`[0..n-1+fs]`): the temporary array.
10589/// - `fs`: extra space available at the end of `a` (0 should be enough for most cases).
10590/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10591/// - `r`: sampling rate for the auxiliary indexes (must be a power of two).
10592/// - `i` (`[0..(n-1)/r]`): output auxiliary indexes.
10593///
10594/// Returns 0 on success, -1 or -2 on error.
10595pub fn libsais16x64_bwt_aux_ctx(
10596    ctx: &mut Context,
10597    t: &[u16],
10598    u: &mut [u16],
10599    a: &mut [SaSint],
10600    fs: SaSint,
10601    freq: Option<&mut [SaSint]>,
10602    r: SaSint,
10603    i: &mut [SaSint],
10604) -> SaSint {
10605    if fs < 0 || r < 2 || (r & (r - 1)) != 0 {
10606        return -1;
10607    }
10608    let samples = if t.is_empty() {
10609        1
10610    } else {
10611        (t.len() - 1) / r as usize + 1
10612    };
10613    if u.len() < t.len()
10614        || a.len()
10615            < t.len()
10616                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10617        || i.len() < samples
10618        || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10619    {
10620        return -1;
10621    }
10622    if t.len() <= 1 {
10623        fill_freq(t, freq);
10624        if t.len() == 1 {
10625            u[0] = t[0];
10626        }
10627        i[0] = t.len() as SaSint;
10628        return 0;
10629    }
10630
10631    let n = t.len();
10632    let index = main_16u_ctx(ctx, t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq);
10633    if index == 0 {
10634        u[0] = t[n - 1];
10635        bwt_copy_16u(&mut u[1..], a, i[0] - 1);
10636        bwt_copy_16u(
10637            &mut u[i[0] as usize..],
10638            &a[i[0] as usize..],
10639            n as SaSint - i[0],
10640        );
10641    }
10642    index
10643}
10644
10645/// Constructs the Burrows-Wheeler transformed 16-bit string (BWT) of a given 16-bit string in parallel using OpenMP-style threading.
10646///
10647/// - `t` (`[0..n-1]`): the input 16-bit string.
10648/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
10649/// - `a` (`[0..n-1+fs]`): the temporary array.
10650/// - `fs`: extra space available at the end of `a` (0 should be enough for most cases).
10651/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10652/// - `threads`: number of worker threads (can be 0 for the implementation default).
10653///
10654/// Returns the primary index on success, -1 or -2 on error.
10655pub fn libsais16x64_bwt_omp(
10656    t: &[u16],
10657    u: &mut [u16],
10658    a: &mut [SaSint],
10659    fs: SaSint,
10660    freq: Option<&mut [SaSint]>,
10661    threads: SaSint,
10662) -> SaSint {
10663    if threads < 0 {
10664        -1
10665    } else {
10666        build_bwt(t, u, a, fs, freq, threads)
10667    }
10668}
10669
10670/// Constructs the BWT of a given 16-bit string with auxiliary indexes in parallel using OpenMP-style threading.
10671///
10672/// - `t` (`[0..n-1]`): the input 16-bit string.
10673/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
10674/// - `a` (`[0..n-1+fs]`): the temporary array.
10675/// - `fs`: extra space available at the end of `a` (0 should be enough for most cases).
10676/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10677/// - `r`: sampling rate for the auxiliary indexes (must be a power of two).
10678/// - `i` (`[0..(n-1)/r]`): output auxiliary indexes.
10679/// - `threads`: number of worker threads (can be 0 for the implementation default).
10680///
10681/// Returns 0 on success, -1 or -2 on error.
10682pub fn libsais16x64_bwt_aux_omp(
10683    t: &[u16],
10684    u: &mut [u16],
10685    a: &mut [SaSint],
10686    fs: SaSint,
10687    freq: Option<&mut [SaSint]>,
10688    r: SaSint,
10689    i: &mut [SaSint],
10690    threads: SaSint,
10691) -> SaSint {
10692    if threads < 0 {
10693        -1
10694    } else {
10695        build_bwt_aux(t, u, a, fs, freq, r, i, threads)
10696    }
10697}
10698
10699fn validate_unbwt_aux(
10700    t: &[u16],
10701    u: &[u16],
10702    a: &[SaSint],
10703    freq: Option<&[SaSint]>,
10704    r: SaSint,
10705    i: &[SaSint],
10706) -> SaSint {
10707    let n = t.len();
10708    if u.len() < n
10709        || a.len() < n
10710        || freq.is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10711        || ((r != n as SaSint) && (r < 2 || (r & (r - 1)) != 0))
10712        || i.is_empty()
10713    {
10714        return -1;
10715    }
10716    if n <= 1 {
10717        return if i[0] == n as SaSint { 0 } else { -1 };
10718    }
10719
10720    let samples = (n - 1) / r as usize + 1;
10721    if i.len() < samples {
10722        return -1;
10723    }
10724
10725    for &index in &i[..samples] {
10726        if index <= 0 || index as usize > n {
10727            return -1;
10728        }
10729    }
10730    0
10731}
10732
10733fn unbwt_compute_histogram(t: &[u16], count: &mut [usize]) {
10734    for &symbol in t {
10735        count[symbol as usize] += 1;
10736    }
10737}
10738
10739fn unbwt_shift(n: usize) -> usize {
10740    let mut shift = 0usize;
10741    while (n >> shift) > (1usize << UNBWT_FASTBITS) {
10742        shift += 1;
10743    }
10744    shift
10745}
10746
10747fn unbwt_calculate_fastbits(bucket2: &mut [usize], fastbits: &mut [u16], shift: usize) {
10748    let mut v = 0usize;
10749    let mut sum = 1usize;
10750    for (w, bucket) in bucket2.iter_mut().enumerate().take(ALPHABET_SIZE) {
10751        let prev = sum;
10752        sum += *bucket;
10753        *bucket = prev;
10754        if prev != sum {
10755            while v <= ((sum - 1) >> shift) {
10756                fastbits[v] = w as u16;
10757                v += 1;
10758            }
10759        }
10760    }
10761}
10762
10763fn unbwt_calculate_p(t: &[u16], p: &mut [usize], bucket2: &mut [usize], index: usize) {
10764    for row in 0..index {
10765        let symbol = t[row] as usize;
10766        p[bucket2[symbol]] = row;
10767        bucket2[symbol] += 1;
10768    }
10769
10770    for row in index + 1..=t.len() {
10771        let symbol = t[row - 1] as usize;
10772        p[bucket2[symbol]] = row;
10773        bucket2[symbol] += 1;
10774    }
10775}
10776
10777#[allow(dead_code, non_snake_case)]
10778fn unbwt_calculate_P(
10779    t: &[u16],
10780    p: &mut [usize],
10781    bucket2: &mut [usize],
10782    index: usize,
10783    block_start: usize,
10784    block_end: usize,
10785) {
10786    let first_end = index.min(block_end);
10787    for row in block_start..first_end {
10788        let symbol = t[row] as usize;
10789        p[bucket2[symbol]] = row;
10790        bucket2[symbol] += 1;
10791    }
10792
10793    let second_start = block_start.max(index) + 1;
10794    for row in second_start..=block_end {
10795        let symbol = t[row - 1] as usize;
10796        p[bucket2[symbol]] = row;
10797        bucket2[symbol] += 1;
10798    }
10799}
10800
10801fn unbwt_init_single(
10802    t: &[u16],
10803    p: &mut [usize],
10804    freq: Option<&[SaSint]>,
10805    i: &[SaSint],
10806    bucket2: &mut [usize],
10807    fastbits: &mut [u16],
10808) {
10809    let shift = unbwt_shift(t.len());
10810    if let Some(freq) = freq {
10811        for c in 0..ALPHABET_SIZE {
10812            bucket2[c] = freq[c] as usize;
10813        }
10814    } else {
10815        bucket2.fill(0);
10816        unbwt_compute_histogram(t, bucket2);
10817    }
10818
10819    unbwt_calculate_fastbits(bucket2, fastbits, shift);
10820    unbwt_calculate_p(t, p, bucket2, i[0] as usize);
10821}
10822
10823#[allow(dead_code)]
10824fn unbwt_init_parallel(
10825    t: &[u16],
10826    p: &mut [usize],
10827    freq: Option<&[SaSint]>,
10828    i: &[SaSint],
10829    bucket2: &mut [usize],
10830    fastbits: &mut [u16],
10831    buckets: &mut [usize],
10832    threads: SaSint,
10833) {
10834    let n = t.len();
10835    let available_threads = buckets.len() / ALPHABET_SIZE;
10836    let num_threads = if threads > 1 && n >= 65_536 && available_threads > 1 {
10837        usize::try_from(threads)
10838            .expect("threads must be non-negative")
10839            .min(available_threads)
10840            .max(1)
10841    } else {
10842        1
10843    };
10844
10845    if num_threads == 1 {
10846        unbwt_init_single(t, p, freq, i, bucket2, fastbits);
10847        return;
10848    }
10849
10850    let index = usize::try_from(i[0]).expect("primary index must be non-negative");
10851    let shift = unbwt_shift(n);
10852    let block_stride = (n / num_threads) & !15usize;
10853
10854    for thread in 0..num_threads {
10855        let block_start = thread * block_stride;
10856        let block_size = if thread + 1 < num_threads {
10857            block_stride
10858        } else {
10859            n - block_start
10860        };
10861        let local = &mut buckets[thread * ALPHABET_SIZE..(thread + 1) * ALPHABET_SIZE];
10862        local.fill(0);
10863        unbwt_compute_histogram(&t[block_start..block_start + block_size], local);
10864    }
10865
10866    bucket2.fill(0);
10867    for thread in 0..num_threads {
10868        let local = &mut buckets[thread * ALPHABET_SIZE..(thread + 1) * ALPHABET_SIZE];
10869        for c in 0..ALPHABET_SIZE {
10870            let a = bucket2[c];
10871            let b = local[c];
10872            bucket2[c] = a + b;
10873            local[c] = a;
10874        }
10875    }
10876
10877    unbwt_calculate_fastbits(bucket2, fastbits, shift);
10878
10879    for thread in 0..num_threads {
10880        let block_start = thread * block_stride;
10881        let block_size = if thread + 1 < num_threads {
10882            block_stride
10883        } else {
10884            n - block_start
10885        };
10886        let local = &mut buckets[thread * ALPHABET_SIZE..(thread + 1) * ALPHABET_SIZE];
10887        for c in 0..ALPHABET_SIZE {
10888            local[c] += bucket2[c];
10889        }
10890        unbwt_calculate_P(t, p, local, index, block_start, block_start + block_size);
10891    }
10892
10893    let last_local = &buckets[(num_threads - 1) * ALPHABET_SIZE..num_threads * ALPHABET_SIZE];
10894    bucket2.copy_from_slice(last_local);
10895}
10896
10897fn unbwt_decode_symbol(
10898    p0: usize,
10899    p: &[usize],
10900    bucket2: &[usize],
10901    fastbits: &[u16],
10902    shift: usize,
10903) -> (u16, usize) {
10904    let mut c0 = fastbits[p0 >> shift] as usize;
10905    if bucket2[c0] <= p0 {
10906        while bucket2[c0] <= p0 {
10907            c0 += 1;
10908        }
10909    }
10910    (c0 as u16, p[p0])
10911}
10912
10913#[allow(dead_code)]
10914fn unbwt_decode_1(
10915    u: &mut [u16],
10916    p: &[usize],
10917    bucket2: &[usize],
10918    fastbits: &[u16],
10919    shift: usize,
10920    i0: &mut usize,
10921    k: usize,
10922) {
10923    let mut cursors = [*i0];
10924    unbwt_decode_lanes::<1>(u, p, bucket2, fastbits, shift, k, &mut cursors, k);
10925    *i0 = cursors[0];
10926}
10927
10928#[allow(dead_code)]
10929fn unbwt_decode_2(
10930    u: &mut [u16],
10931    p: &[usize],
10932    bucket2: &[usize],
10933    fastbits: &[u16],
10934    shift: usize,
10935    r: usize,
10936    i0: &mut usize,
10937    i1: &mut usize,
10938    k: usize,
10939) {
10940    let mut cursors = [*i0, *i1];
10941    unbwt_decode_lanes::<2>(u, p, bucket2, fastbits, shift, r, &mut cursors, k);
10942    *i0 = cursors[0];
10943    *i1 = cursors[1];
10944}
10945
10946#[allow(dead_code)]
10947fn unbwt_decode_3(
10948    u: &mut [u16],
10949    p: &[usize],
10950    bucket2: &[usize],
10951    fastbits: &[u16],
10952    shift: usize,
10953    r: usize,
10954    i0: &mut usize,
10955    i1: &mut usize,
10956    i2: &mut usize,
10957    k: usize,
10958) {
10959    let mut cursors = [*i0, *i1, *i2];
10960    unbwt_decode_lanes::<3>(u, p, bucket2, fastbits, shift, r, &mut cursors, k);
10961    *i0 = cursors[0];
10962    *i1 = cursors[1];
10963    *i2 = cursors[2];
10964}
10965
10966#[allow(dead_code)]
10967fn unbwt_decode_4(
10968    u: &mut [u16],
10969    p: &[usize],
10970    bucket2: &[usize],
10971    fastbits: &[u16],
10972    shift: usize,
10973    r: usize,
10974    i0: &mut usize,
10975    i1: &mut usize,
10976    i2: &mut usize,
10977    i3: &mut usize,
10978    k: usize,
10979) {
10980    let mut cursors = [*i0, *i1, *i2, *i3];
10981    unbwt_decode_lanes::<4>(u, p, bucket2, fastbits, shift, r, &mut cursors, k);
10982    *i0 = cursors[0];
10983    *i1 = cursors[1];
10984    *i2 = cursors[2];
10985    *i3 = cursors[3];
10986}
10987
10988#[allow(dead_code)]
10989fn unbwt_decode_5(
10990    u: &mut [u16],
10991    p: &[usize],
10992    bucket2: &[usize],
10993    fastbits: &[u16],
10994    shift: usize,
10995    r: usize,
10996    cursors: &mut [usize; 5],
10997    k: usize,
10998) {
10999    unbwt_decode_lanes::<5>(u, p, bucket2, fastbits, shift, r, cursors, k);
11000}
11001
11002#[allow(dead_code)]
11003fn unbwt_decode_6(
11004    u: &mut [u16],
11005    p: &[usize],
11006    bucket2: &[usize],
11007    fastbits: &[u16],
11008    shift: usize,
11009    r: usize,
11010    cursors: &mut [usize; 6],
11011    k: usize,
11012) {
11013    unbwt_decode_lanes::<6>(u, p, bucket2, fastbits, shift, r, cursors, k);
11014}
11015
11016#[allow(dead_code)]
11017fn unbwt_decode_7(
11018    u: &mut [u16],
11019    p: &[usize],
11020    bucket2: &[usize],
11021    fastbits: &[u16],
11022    shift: usize,
11023    r: usize,
11024    cursors: &mut [usize; 7],
11025    k: usize,
11026) {
11027    unbwt_decode_lanes::<7>(u, p, bucket2, fastbits, shift, r, cursors, k);
11028}
11029
11030#[allow(dead_code)]
11031fn unbwt_decode_8(
11032    u: &mut [u16],
11033    p: &[usize],
11034    bucket2: &[usize],
11035    fastbits: &[u16],
11036    shift: usize,
11037    r: usize,
11038    cursors: &mut [usize; 8],
11039    k: usize,
11040) {
11041    unbwt_decode_lanes::<8>(u, p, bucket2, fastbits, shift, r, cursors, k);
11042}
11043
11044fn unbwt_decode(
11045    u: &mut [u16],
11046    p: &[usize],
11047    n: usize,
11048    r: usize,
11049    i: &[SaSint],
11050    bucket2: &[usize],
11051    fastbits: &[u16],
11052) {
11053    let shift = unbwt_shift(n);
11054    let blocks = 1 + (n - 1) / r;
11055    let remainder = n - r * (blocks - 1);
11056    unbwt_decode_blocks(u, p, r, i, bucket2, fastbits, shift, blocks, remainder);
11057}
11058
11059fn unbwt_decode_blocks(
11060    u: &mut [u16],
11061    p: &[usize],
11062    r: usize,
11063    i: &[SaSint],
11064    bucket2: &[usize],
11065    fastbits: &[u16],
11066    shift: usize,
11067    blocks: usize,
11068    remainder: usize,
11069) {
11070    let mut blocks_left = blocks;
11071    let mut i_offset = 0usize;
11072    let mut u_offset = 0usize;
11073
11074    while blocks_left > 8 {
11075        let mut cursors = [
11076            i[i_offset] as usize,
11077            i[i_offset + 1] as usize,
11078            i[i_offset + 2] as usize,
11079            i[i_offset + 3] as usize,
11080            i[i_offset + 4] as usize,
11081            i[i_offset + 5] as usize,
11082            i[i_offset + 6] as usize,
11083            i[i_offset + 7] as usize,
11084        ];
11085        unbwt_decode_lanes::<8>(
11086            &mut u[u_offset..],
11087            p,
11088            bucket2,
11089            fastbits,
11090            shift,
11091            r,
11092            &mut cursors,
11093            r,
11094        );
11095        i_offset += 8;
11096        blocks_left -= 8;
11097        u_offset += 8 * r;
11098    }
11099
11100    match blocks_left {
11101        1 => {
11102            let mut cursors = [i[i_offset] as usize];
11103            unbwt_decode_lanes::<1>(
11104                &mut u[u_offset..],
11105                p,
11106                bucket2,
11107                fastbits,
11108                shift,
11109                r,
11110                &mut cursors,
11111                remainder,
11112            );
11113        }
11114        2 => {
11115            let mut cursors = [i[i_offset] as usize, i[i_offset + 1] as usize];
11116            unbwt_decode_lanes::<2>(
11117                &mut u[u_offset..],
11118                p,
11119                bucket2,
11120                fastbits,
11121                shift,
11122                r,
11123                &mut cursors,
11124                remainder,
11125            );
11126            let mut first = [cursors[0]];
11127            unbwt_decode_lanes::<1>(
11128                &mut u[u_offset + remainder..],
11129                p,
11130                bucket2,
11131                fastbits,
11132                shift,
11133                r,
11134                &mut first,
11135                r - remainder,
11136            );
11137        }
11138        3 => {
11139            let mut cursors = [
11140                i[i_offset] as usize,
11141                i[i_offset + 1] as usize,
11142                i[i_offset + 2] as usize,
11143            ];
11144            unbwt_decode_lanes::<3>(
11145                &mut u[u_offset..],
11146                p,
11147                bucket2,
11148                fastbits,
11149                shift,
11150                r,
11151                &mut cursors,
11152                remainder,
11153            );
11154            let mut first = [cursors[0], cursors[1]];
11155            unbwt_decode_lanes::<2>(
11156                &mut u[u_offset + remainder..],
11157                p,
11158                bucket2,
11159                fastbits,
11160                shift,
11161                r,
11162                &mut first,
11163                r - remainder,
11164            );
11165        }
11166        4 => {
11167            let mut cursors = [
11168                i[i_offset] as usize,
11169                i[i_offset + 1] as usize,
11170                i[i_offset + 2] as usize,
11171                i[i_offset + 3] as usize,
11172            ];
11173            unbwt_decode_lanes::<4>(
11174                &mut u[u_offset..],
11175                p,
11176                bucket2,
11177                fastbits,
11178                shift,
11179                r,
11180                &mut cursors,
11181                remainder,
11182            );
11183            let mut first = [cursors[0], cursors[1], cursors[2]];
11184            unbwt_decode_lanes::<3>(
11185                &mut u[u_offset + remainder..],
11186                p,
11187                bucket2,
11188                fastbits,
11189                shift,
11190                r,
11191                &mut first,
11192                r - remainder,
11193            );
11194        }
11195        5 => {
11196            let mut cursors = [
11197                i[i_offset] as usize,
11198                i[i_offset + 1] as usize,
11199                i[i_offset + 2] as usize,
11200                i[i_offset + 3] as usize,
11201                i[i_offset + 4] as usize,
11202            ];
11203            unbwt_decode_lanes::<5>(
11204                &mut u[u_offset..],
11205                p,
11206                bucket2,
11207                fastbits,
11208                shift,
11209                r,
11210                &mut cursors,
11211                remainder,
11212            );
11213            let mut first = [cursors[0], cursors[1], cursors[2], cursors[3]];
11214            unbwt_decode_lanes::<4>(
11215                &mut u[u_offset + remainder..],
11216                p,
11217                bucket2,
11218                fastbits,
11219                shift,
11220                r,
11221                &mut first,
11222                r - remainder,
11223            );
11224        }
11225        6 => {
11226            let mut cursors = [
11227                i[i_offset] as usize,
11228                i[i_offset + 1] as usize,
11229                i[i_offset + 2] as usize,
11230                i[i_offset + 3] as usize,
11231                i[i_offset + 4] as usize,
11232                i[i_offset + 5] as usize,
11233            ];
11234            unbwt_decode_lanes::<6>(
11235                &mut u[u_offset..],
11236                p,
11237                bucket2,
11238                fastbits,
11239                shift,
11240                r,
11241                &mut cursors,
11242                remainder,
11243            );
11244            let mut first = [cursors[0], cursors[1], cursors[2], cursors[3], cursors[4]];
11245            unbwt_decode_lanes::<5>(
11246                &mut u[u_offset + remainder..],
11247                p,
11248                bucket2,
11249                fastbits,
11250                shift,
11251                r,
11252                &mut first,
11253                r - remainder,
11254            );
11255        }
11256        7 => {
11257            let mut cursors = [
11258                i[i_offset] as usize,
11259                i[i_offset + 1] as usize,
11260                i[i_offset + 2] as usize,
11261                i[i_offset + 3] as usize,
11262                i[i_offset + 4] as usize,
11263                i[i_offset + 5] as usize,
11264                i[i_offset + 6] as usize,
11265            ];
11266            unbwt_decode_lanes::<7>(
11267                &mut u[u_offset..],
11268                p,
11269                bucket2,
11270                fastbits,
11271                shift,
11272                r,
11273                &mut cursors,
11274                remainder,
11275            );
11276            let mut first = [
11277                cursors[0], cursors[1], cursors[2], cursors[3], cursors[4], cursors[5],
11278            ];
11279            unbwt_decode_lanes::<6>(
11280                &mut u[u_offset + remainder..],
11281                p,
11282                bucket2,
11283                fastbits,
11284                shift,
11285                r,
11286                &mut first,
11287                r - remainder,
11288            );
11289        }
11290        _ => {
11291            let mut cursors = [
11292                i[i_offset] as usize,
11293                i[i_offset + 1] as usize,
11294                i[i_offset + 2] as usize,
11295                i[i_offset + 3] as usize,
11296                i[i_offset + 4] as usize,
11297                i[i_offset + 5] as usize,
11298                i[i_offset + 6] as usize,
11299                i[i_offset + 7] as usize,
11300            ];
11301            unbwt_decode_lanes::<8>(
11302                &mut u[u_offset..],
11303                p,
11304                bucket2,
11305                fastbits,
11306                shift,
11307                r,
11308                &mut cursors,
11309                remainder,
11310            );
11311            let mut first = [
11312                cursors[0], cursors[1], cursors[2], cursors[3], cursors[4], cursors[5], cursors[6],
11313            ];
11314            unbwt_decode_lanes::<7>(
11315                &mut u[u_offset + remainder..],
11316                p,
11317                bucket2,
11318                fastbits,
11319                shift,
11320                r,
11321                &mut first,
11322                r - remainder,
11323            );
11324        }
11325    }
11326}
11327
11328#[allow(dead_code)]
11329fn unbwt_decode_omp(
11330    u: &mut [u16],
11331    p: &[usize],
11332    n: usize,
11333    r: usize,
11334    i: &[SaSint],
11335    bucket2: &[usize],
11336    fastbits: &[u16],
11337    threads: SaSint,
11338) {
11339    let blocks = 1 + (n - 1) / r;
11340    let remainder = n - r * (blocks - 1);
11341    let num_threads = if threads > 1 && n >= 65_536 {
11342        usize::try_from(threads)
11343            .expect("threads must be non-negative")
11344            .min(blocks)
11345            .max(1)
11346    } else {
11347        1
11348    };
11349
11350    if num_threads == 1 {
11351        unbwt_decode(u, p, n, r, i, bucket2, fastbits);
11352        return;
11353    }
11354
11355    let shift = unbwt_shift(n);
11356    let block_stride = blocks / num_threads;
11357    let block_remainder = blocks % num_threads;
11358    for thread in 0..num_threads {
11359        let block_count = block_stride + usize::from(thread < block_remainder);
11360        let block_start = block_stride * thread + thread.min(block_remainder);
11361        let tail = if thread + 1 < num_threads {
11362            r
11363        } else {
11364            remainder
11365        };
11366        unbwt_decode_blocks(
11367            &mut u[r * block_start..],
11368            p,
11369            r,
11370            &i[block_start..],
11371            bucket2,
11372            fastbits,
11373            shift,
11374            block_count,
11375            tail,
11376        );
11377    }
11378}
11379
11380fn unbwt_decode_lanes<const LANES: usize>(
11381    u: &mut [u16],
11382    p: &[usize],
11383    bucket2: &[usize],
11384    fastbits: &[u16],
11385    shift: usize,
11386    r: usize,
11387    cursors: &mut [usize; LANES],
11388    k: usize,
11389) {
11390    for pos in 0..k {
11391        for lane in 0..LANES {
11392            let (symbol, next) = unbwt_decode_symbol(cursors[lane], p, bucket2, fastbits, shift);
11393            cursors[lane] = next;
11394            u[lane * r + pos] = symbol;
11395        }
11396    }
11397}
11398
11399fn unbwt_core(
11400    t: &[u16],
11401    u: &mut [u16],
11402    a: &mut [SaSint],
11403    freq: Option<&[SaSint]>,
11404    r: SaSint,
11405    i: &[SaSint],
11406) -> SaSint {
11407    let n = t.len();
11408    let shift = unbwt_shift(n);
11409    let mut bucket2 = vec![0usize; ALPHABET_SIZE];
11410    let mut fastbits = vec![0u16; 1 + (n >> shift)];
11411
11412    unbwt_core_with_buffers(t, u, a, freq, r, i, &mut bucket2, &mut fastbits, 1)
11413}
11414
11415fn unbwt_core_with_buffers(
11416    t: &[u16],
11417    u: &mut [u16],
11418    a: &mut [SaSint],
11419    freq: Option<&[SaSint]>,
11420    r: SaSint,
11421    i: &[SaSint],
11422    bucket2: &mut [usize],
11423    fastbits: &mut [u16],
11424    threads: SaSint,
11425) -> SaSint {
11426    let n = t.len();
11427    let shift = unbwt_shift(n);
11428    if bucket2.len() < ALPHABET_SIZE || fastbits.len() < 1 + (n >> shift) {
11429        return -2;
11430    }
11431
11432    let mut p = vec![0usize; n + 1];
11433    unbwt_init_single(
11434        t,
11435        &mut p,
11436        freq,
11437        i,
11438        &mut bucket2[..ALPHABET_SIZE],
11439        &mut fastbits[..1 + (n >> shift)],
11440    );
11441    unbwt_decode_omp(
11442        u,
11443        &p,
11444        n,
11445        r as usize,
11446        i,
11447        &bucket2[..ALPHABET_SIZE],
11448        &fastbits[..1 + (n >> shift)],
11449        threads,
11450    );
11451
11452    for (dst, &src) in a.iter_mut().zip(p.iter().skip(1)) {
11453        *dst = src as SaSint;
11454    }
11455    0
11456}
11457
11458fn inverse_bwt(
11459    t: &[u16],
11460    u: &mut [u16],
11461    a: &mut [SaSint],
11462    freq: Option<&[SaSint]>,
11463    primary: SaSint,
11464) -> SaSint {
11465    let n = t.len();
11466    let i = [primary];
11467    let rc = validate_unbwt_aux(t, u, a, freq, n as SaSint, &i);
11468    if rc != 0 {
11469        return rc;
11470    }
11471    if n <= 1 {
11472        if n == 1 {
11473            u[0] = t[0];
11474        }
11475        return 0;
11476    }
11477    unbwt_core(t, u, a, freq, n as SaSint, &i)
11478}
11479
11480/// Reconstructs the original 16-bit string from a given BWT and primary index.
11481///
11482/// - `t` (`[0..n-1]`): the input 16-bit string.
11483/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
11484/// - `a` (`[0..n]`): the temporary array (must have length `n + 1`).
11485/// - `freq` (`[0..65535]`): optional input symbol frequency table.
11486/// - `i`: the primary index.
11487///
11488/// Returns 0 on success, -1 or -2 on error.
11489pub fn libsais16x64_unbwt(
11490    t: &[u16],
11491    u: &mut [u16],
11492    a: &mut [SaSint],
11493    freq: Option<&[SaSint]>,
11494    i: SaSint,
11495) -> SaSint {
11496    inverse_bwt(t, u, a, freq, i)
11497}
11498
11499/// Reconstructs the original 16-bit string from a given BWT and primary index using a libsais16x64 reverse-BWT context.
11500///
11501/// - `ctx`: the libsais16x64 reverse-BWT context.
11502/// - `t` (`[0..n-1]`): the input 16-bit string.
11503/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
11504/// - `a` (`[0..n]`): the temporary array (must have length `n + 1`).
11505/// - `freq` (`[0..65535]`): optional input symbol frequency table.
11506/// - `i`: the primary index.
11507///
11508/// Returns 0 on success, -1 or -2 on error.
11509pub fn libsais16x64_unbwt_ctx(
11510    ctx: &mut UnbwtContext,
11511    t: &[u16],
11512    u: &mut [u16],
11513    a: &mut [SaSint],
11514    freq: Option<&[SaSint]>,
11515    i: SaSint,
11516) -> SaSint {
11517    libsais16x64_unbwt_aux_ctx(ctx, t, u, a, freq, t.len() as SaSint, &[i])
11518}
11519
11520/// Reconstructs the original 16-bit string from a given BWT with auxiliary indexes.
11521///
11522/// - `t` (`[0..n-1]`): the input 16-bit string.
11523/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
11524/// - `a` (`[0..n]`): the temporary array (must have length `n + 1`).
11525/// - `freq` (`[0..65535]`): optional input symbol frequency table.
11526/// - `r`: sampling rate for the auxiliary indexes (must be a power of two).
11527/// - `i` (`[0..(n-1)/r]`): input auxiliary indexes.
11528///
11529/// Returns 0 on success, -1 or -2 on error.
11530pub fn libsais16x64_unbwt_aux(
11531    t: &[u16],
11532    u: &mut [u16],
11533    a: &mut [SaSint],
11534    freq: Option<&[SaSint]>,
11535    r: SaSint,
11536    i: &[SaSint],
11537) -> SaSint {
11538    let rc = validate_unbwt_aux(t, u, a, freq, r, i);
11539    if rc != 0 {
11540        return rc;
11541    }
11542    if t.len() <= 1 {
11543        if t.len() == 1 {
11544            u[0] = t[0];
11545        }
11546        return 0;
11547    }
11548    unbwt_core(t, u, a, freq, r, i)
11549}
11550
11551/// Reconstructs the original 16-bit string from a given BWT with auxiliary indexes using a libsais16x64 reverse-BWT context.
11552///
11553/// - `ctx`: the libsais16x64 reverse-BWT context.
11554/// - `t` (`[0..n-1]`): the input 16-bit string.
11555/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
11556/// - `a` (`[0..n]`): the temporary array (must have length `n + 1`).
11557/// - `freq` (`[0..65535]`): optional input symbol frequency table.
11558/// - `r`: sampling rate for the auxiliary indexes (must be a power of two).
11559/// - `i` (`[0..(n-1)/r]`): input auxiliary indexes.
11560///
11561/// Returns 0 on success, -1 or -2 on error.
11562pub fn libsais16x64_unbwt_aux_ctx(
11563    ctx: &mut UnbwtContext,
11564    t: &[u16],
11565    u: &mut [u16],
11566    a: &mut [SaSint],
11567    freq: Option<&[SaSint]>,
11568    r: SaSint,
11569    i: &[SaSint],
11570) -> SaSint {
11571    let rc = validate_unbwt_aux(t, u, a, freq, r, i);
11572    if rc != 0 {
11573        return rc;
11574    }
11575    if t.len() <= 1 {
11576        if t.len() == 1 {
11577            u[0] = t[0];
11578        }
11579        return 0;
11580    }
11581    unbwt_core_with_buffers(
11582        t,
11583        u,
11584        a,
11585        freq,
11586        r,
11587        i,
11588        &mut ctx.bucket2,
11589        &mut ctx.fastbits,
11590        ctx.threads,
11591    )
11592}
11593
11594/// Reconstructs the original 16-bit string from a given BWT and primary index in parallel using OpenMP-style threading.
11595///
11596/// - `t` (`[0..n-1]`): the input 16-bit string.
11597/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
11598/// - `a` (`[0..n]`): the temporary array (must have length `n + 1`).
11599/// - `freq` (`[0..65535]`): optional input symbol frequency table.
11600/// - `i`: the primary index.
11601/// - `threads`: number of worker threads (can be 0 for the implementation default).
11602///
11603/// Returns 0 on success, -1 or -2 on error.
11604pub fn libsais16x64_unbwt_omp(
11605    t: &[u16],
11606    u: &mut [u16],
11607    a: &mut [SaSint],
11608    freq: Option<&[SaSint]>,
11609    i: SaSint,
11610    threads: SaSint,
11611) -> SaSint {
11612    if threads < 0 {
11613        -1
11614    } else {
11615        let primary = [i];
11616        libsais16x64_unbwt_aux_omp(t, u, a, freq, t.len() as SaSint, &primary, threads)
11617    }
11618}
11619
11620/// Reconstructs the original 16-bit string from a given BWT with auxiliary indexes in parallel using OpenMP-style threading.
11621///
11622/// - `t` (`[0..n-1]`): the input 16-bit string.
11623/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
11624/// - `a` (`[0..n]`): the temporary array (must have length `n + 1`).
11625/// - `freq` (`[0..65535]`): optional input symbol frequency table.
11626/// - `r`: sampling rate for the auxiliary indexes (must be a power of two).
11627/// - `i` (`[0..(n-1)/r]`): input auxiliary indexes.
11628/// - `threads`: number of worker threads (can be 0 for the implementation default).
11629///
11630/// Returns 0 on success, -1 or -2 on error.
11631pub fn libsais16x64_unbwt_aux_omp(
11632    t: &[u16],
11633    u: &mut [u16],
11634    a: &mut [SaSint],
11635    freq: Option<&[SaSint]>,
11636    r: SaSint,
11637    i: &[SaSint],
11638    threads: SaSint,
11639) -> SaSint {
11640    if threads < 0 {
11641        -1
11642    } else {
11643        let rc = validate_unbwt_aux(t, u, a, freq, r, i);
11644        if rc != 0 {
11645            return rc;
11646        }
11647        if t.len() <= 1 {
11648            if t.len() == 1 {
11649                u[0] = t[0];
11650            }
11651            return 0;
11652        }
11653        let n = t.len();
11654        let shift = unbwt_shift(n);
11655        let mut bucket2 = vec![0usize; ALPHABET_SIZE];
11656        let mut fastbits = vec![0u16; 1 + (n >> shift)];
11657        unbwt_core_with_buffers(
11658            t,
11659            u,
11660            a,
11661            freq,
11662            r,
11663            i,
11664            &mut bucket2,
11665            &mut fastbits,
11666            normalize_threads(threads),
11667        )
11668    }
11669}
11670
11671/// Constructs the permuted longest common prefix array (PLCP) of a given 16-bit string and suffix array.
11672///
11673/// - `t` (`[0..n-1]`): the input 16-bit string.
11674/// - `sa` (`[0..n-1]`): the input suffix array.
11675/// - `plcp` (`[0..n-1]`): the output permuted longest common prefix array.
11676///
11677/// Returns 0 on success, -1 on error.
11678pub fn libsais16x64_plcp(t: &[u16], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
11679    compute_plcp(t, sa, plcp, false)
11680}
11681
11682/// Constructs the PLCP of a given 16-bit string set and generalized suffix array (GSA).
11683///
11684/// - `t` (`[0..n-1]`): the input 16-bit string set using 0 as separators (`t[n-1]` must be 0).
11685/// - `sa` (`[0..n-1]`): the input generalized suffix array.
11686/// - `plcp` (`[0..n-1]`): the output permuted longest common prefix array.
11687///
11688/// Returns 0 on success, -1 on error.
11689pub fn libsais16x64_plcp_gsa(t: &[u16], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
11690    if t.last().copied().unwrap_or(0) != 0 {
11691        -1
11692    } else {
11693        compute_plcp(t, sa, plcp, true)
11694    }
11695}
11696
11697fn compute_plcp(t: &[u16], sa: &[SaSint], plcp: &mut [SaSint], gsa: bool) -> SaSint {
11698    if sa.len() != t.len() || plcp.len() != t.len() {
11699        return -1;
11700    }
11701    if t.len() <= 1 {
11702        if t.len() == 1 {
11703            plcp[0] = 0;
11704        }
11705        return 0;
11706    }
11707
11708    if compute_phi(sa, plcp) != 0 {
11709        return -1;
11710    }
11711
11712    compute_plcp_from_phi(t, plcp, gsa)
11713}
11714
11715fn compute_phi(sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
11716    let n = sa.len();
11717    let mut previous = n as SaSint;
11718    for &suffix_value in sa {
11719        let Some(suffix) = suffix_index(suffix_value, n) else {
11720            return -1;
11721        };
11722        plcp[suffix] = previous;
11723        previous = suffix_value;
11724    }
11725    0
11726}
11727
11728fn compute_plcp_from_phi(t: &[u16], plcp: &mut [SaSint], gsa: bool) -> SaSint {
11729    let n = t.len();
11730    let mut l = 0usize;
11731    for i in 0..t.len() {
11732        let previous = plcp[i];
11733        if previous == n as SaSint {
11734            plcp[i] = 0;
11735            l = 0;
11736            continue;
11737        }
11738
11739        let Some(prev) = suffix_index(previous, n) else {
11740            return -1;
11741        };
11742
11743        while i + l < t.len()
11744            && prev + l < t.len()
11745            && t[i + l] == t[prev + l]
11746            && (!gsa || t[i + l] != 0)
11747        {
11748            l += 1;
11749        }
11750        plcp[i] = l as SaSint;
11751        l = l.saturating_sub(1);
11752    }
11753    0
11754}
11755
11756#[allow(dead_code)]
11757fn compute_phi_omp(sa: &[SaSint], plcp: &mut [SaSint], n: SaSint, threads: SaSint) -> SaSint {
11758    let n_usize = n as usize;
11759    if threads == 1 || n < 65_536 {
11760        return compute_phi(&sa[..n_usize], &mut plcp[..n_usize]);
11761    }
11762
11763    let block_stride = (n / threads) & !15;
11764    for thread in 0..threads {
11765        let block_start = thread * block_stride;
11766        let block_size = if thread < threads - 1 {
11767            block_stride
11768        } else {
11769            n - block_start
11770        };
11771        let start = block_start as usize;
11772        let end = (block_start + block_size) as usize;
11773        let mut previous = if start > 0 { sa[start - 1] } else { n };
11774        for &suffix_value in &sa[start..end] {
11775            let Some(suffix) = suffix_index(suffix_value, n_usize) else {
11776                return -1;
11777            };
11778            plcp[suffix] = previous;
11779            previous = suffix_value;
11780        }
11781    }
11782    0
11783}
11784
11785#[allow(dead_code)]
11786fn compute_plcp_omp(t: &[u16], plcp: &mut [SaSint], n: SaSint, threads: SaSint) -> SaSint {
11787    if threads == 1 || n < 65_536 {
11788        let n = n as usize;
11789        return compute_plcp_from_phi(&t[..n], &mut plcp[..n], false);
11790    }
11791
11792    let block_stride = (n / threads) & !15;
11793    for thread in 0..threads {
11794        let block_start = thread * block_stride;
11795        let block_size = if thread < threads - 1 {
11796            block_stride
11797        } else {
11798            n - block_start
11799        };
11800        let rc = compute_plcp_range(
11801            t,
11802            plcp,
11803            n as usize,
11804            block_start as isize,
11805            block_size as isize,
11806            false,
11807        );
11808        if rc != 0 {
11809            return rc;
11810        }
11811    }
11812    0
11813}
11814
11815fn compute_plcp_range(
11816    t: &[u16],
11817    plcp: &mut [SaSint],
11818    n: usize,
11819    omp_block_start: isize,
11820    omp_block_size: isize,
11821    gsa: bool,
11822) -> SaSint {
11823    let mut l = 0usize;
11824    let end = (omp_block_start + omp_block_size) as usize;
11825    for i in omp_block_start as usize..end {
11826        let previous = plcp[i];
11827        if previous == n as SaSint {
11828            plcp[i] = 0;
11829            l = 0;
11830            continue;
11831        }
11832
11833        let Some(prev) = suffix_index(previous, n) else {
11834            return -1;
11835        };
11836
11837        while i + l < t.len()
11838            && prev + l < t.len()
11839            && t[i + l] == t[prev + l]
11840            && (!gsa || t[i + l] != 0)
11841        {
11842            l += 1;
11843        }
11844        plcp[i] = l as SaSint;
11845        l = l.saturating_sub(1);
11846    }
11847    0
11848}
11849
11850#[allow(dead_code)]
11851fn compute_plcp_gsa(
11852    t: &[u16],
11853    plcp: &mut [SaSint],
11854    omp_block_start: isize,
11855    omp_block_size: isize,
11856) -> SaSint {
11857    let n = t.len();
11858    let mut l = 0usize;
11859    let end = (omp_block_start + omp_block_size) as usize;
11860    for i in omp_block_start as usize..end {
11861        let previous = plcp[i];
11862        if previous == n as SaSint {
11863            plcp[i] = 0;
11864            l = 0;
11865            continue;
11866        }
11867
11868        let Some(prev) = suffix_index(previous, n) else {
11869            return -1;
11870        };
11871
11872        while i + l < t.len() && prev + l < t.len() && t[i + l] == t[prev + l] && t[i + l] != 0 {
11873            l += 1;
11874        }
11875        plcp[i] = l as SaSint;
11876        l = l.saturating_sub(1);
11877    }
11878    0
11879}
11880
11881#[allow(dead_code)]
11882fn compute_plcp_gsa_omp(t: &[u16], plcp: &mut [SaSint], n: SaSint, threads: SaSint) -> SaSint {
11883    if threads == 1 || n < 65_536 {
11884        return compute_plcp_gsa(t, plcp, 0, n as isize);
11885    }
11886
11887    let block_stride = (n / threads) & !15;
11888    for thread in 0..threads {
11889        let block_start = thread * block_stride;
11890        let block_size = if thread < threads - 1 {
11891            block_stride
11892        } else {
11893            n - block_start
11894        };
11895        let rc = compute_plcp_gsa(t, plcp, block_start as isize, block_size as isize);
11896        if rc != 0 {
11897            return rc;
11898        }
11899    }
11900    0
11901}
11902
11903#[allow(dead_code)]
11904fn compute_lcp(
11905    plcp: &[SaSint],
11906    sa: &[SaSint],
11907    lcp: &mut [SaSint],
11908    omp_block_start: isize,
11909    omp_block_size: isize,
11910) -> SaSint {
11911    let end = (omp_block_start + omp_block_size) as usize;
11912    for row in omp_block_start as usize..end {
11913        let Some(suffix) = suffix_index(sa[row], plcp.len()) else {
11914            return -1;
11915        };
11916        lcp[row] = plcp[suffix];
11917    }
11918    0
11919}
11920
11921#[allow(dead_code)]
11922fn compute_lcp_omp(
11923    plcp: &[SaSint],
11924    sa: &[SaSint],
11925    lcp: &mut [SaSint],
11926    n: SaSint,
11927    threads: SaSint,
11928) -> SaSint {
11929    if threads == 1 || n < 65_536 {
11930        return compute_lcp(plcp, sa, lcp, 0, n as isize);
11931    }
11932
11933    let block_stride = (n / threads) & !15;
11934    for thread in 0..threads {
11935        let block_start = thread * block_stride;
11936        let block_size = if thread < threads - 1 {
11937            block_stride
11938        } else {
11939            n - block_start
11940        };
11941        let rc = compute_lcp(plcp, sa, lcp, block_start as isize, block_size as isize);
11942        if rc != 0 {
11943            return rc;
11944        }
11945    }
11946    0
11947}
11948
11949/// Constructs the longest common prefix array (LCP) from a PLCP and suffix array.
11950///
11951/// - `plcp` (`[0..n-1]`): the input permuted longest common prefix array.
11952/// - `sa` (`[0..n-1]`): the input suffix array or generalized suffix array (GSA).
11953/// - `lcp` (`[0..n-1]`): the output longest common prefix array (can alias `sa`).
11954///
11955/// Returns 0 on success, -1 on error.
11956pub fn libsais16x64_lcp(plcp: &[SaSint], sa: &[SaSint], lcp: &mut [SaSint]) -> SaSint {
11957    if plcp.len() != sa.len() || lcp.len() != sa.len() {
11958        return -1;
11959    }
11960    for (row, &suffix) in sa.iter().enumerate() {
11961        let Some(suffix) = suffix_index(suffix, plcp.len()) else {
11962            return -1;
11963        };
11964        lcp[row] = plcp[suffix];
11965    }
11966    0
11967}
11968
11969fn suffix_index(value: SaSint, len: usize) -> Option<usize> {
11970    usize::try_from(value).ok().filter(|&index| index < len)
11971}
11972
11973/// Constructs the PLCP of a given 16-bit string and suffix array in parallel using OpenMP-style threading.
11974///
11975/// - `t` (`[0..n-1]`): the input 16-bit string.
11976/// - `sa` (`[0..n-1]`): the input suffix array.
11977/// - `plcp` (`[0..n-1]`): the output permuted longest common prefix array.
11978/// - `threads`: number of worker threads (can be 0 for the implementation default).
11979///
11980/// Returns 0 on success, -1 on error.
11981pub fn libsais16x64_plcp_omp(
11982    t: &[u16],
11983    sa: &[SaSint],
11984    plcp: &mut [SaSint],
11985    threads: SaSint,
11986) -> SaSint {
11987    if threads < 0 {
11988        return -1;
11989    }
11990    if sa.len() != t.len() || plcp.len() != t.len() {
11991        return -1;
11992    }
11993    if t.len() <= 1 {
11994        if t.len() == 1 {
11995            plcp[0] = 0;
11996        }
11997        return 0;
11998    }
11999
12000    let n = t.len() as SaSint;
12001    let threads = normalize_threads(threads);
12002    if compute_phi_omp(sa, plcp, n, threads) != 0 {
12003        return -1;
12004    }
12005    compute_plcp_omp(t, plcp, n, threads)
12006}
12007
12008/// Constructs the PLCP of a given 16-bit string set and GSA in parallel using OpenMP-style threading.
12009///
12010/// - `t` (`[0..n-1]`): the input 16-bit string set using 0 as separators (`t[n-1]` must be 0).
12011/// - `sa` (`[0..n-1]`): the input generalized suffix array.
12012/// - `plcp` (`[0..n-1]`): the output permuted longest common prefix array.
12013/// - `threads`: number of worker threads (can be 0 for the implementation default).
12014///
12015/// Returns 0 on success, -1 on error.
12016pub fn libsais16x64_plcp_gsa_omp(
12017    t: &[u16],
12018    sa: &[SaSint],
12019    plcp: &mut [SaSint],
12020    threads: SaSint,
12021) -> SaSint {
12022    if threads < 0 {
12023        return -1;
12024    }
12025    if t.last().copied().unwrap_or(0) != 0 {
12026        return -1;
12027    }
12028    if sa.len() != t.len() || plcp.len() != t.len() {
12029        return -1;
12030    }
12031    if t.len() <= 1 {
12032        if t.len() == 1 {
12033            plcp[0] = 0;
12034        }
12035        return 0;
12036    }
12037
12038    let n = t.len() as SaSint;
12039    let threads = normalize_threads(threads);
12040    if compute_phi_omp(sa, plcp, n, threads) != 0 {
12041        return -1;
12042    }
12043    compute_plcp_gsa_omp(t, plcp, n, threads)
12044}
12045
12046/// Constructs the LCP from a PLCP and suffix array in parallel using OpenMP-style threading.
12047///
12048/// - `plcp` (`[0..n-1]`): the input permuted longest common prefix array.
12049/// - `sa` (`[0..n-1]`): the input suffix array or generalized suffix array (GSA).
12050/// - `lcp` (`[0..n-1]`): the output longest common prefix array (can alias `sa`).
12051/// - `threads`: number of worker threads (can be 0 for the implementation default).
12052///
12053/// Returns 0 on success, -1 on error.
12054pub fn libsais16x64_lcp_omp(
12055    plcp: &[SaSint],
12056    sa: &[SaSint],
12057    lcp: &mut [SaSint],
12058    threads: SaSint,
12059) -> SaSint {
12060    if threads < 0 {
12061        return -1;
12062    }
12063    if plcp.len() != sa.len() || lcp.len() != sa.len() {
12064        return -1;
12065    }
12066
12067    compute_lcp_omp(
12068        plcp,
12069        sa,
12070        lcp,
12071        sa.len() as SaSint,
12072        normalize_threads(threads),
12073    )
12074}
12075
12076#[cfg(all(test, feature = "upstream-c"))]
12077mod tests {
12078    use super::*;
12079
12080    unsafe extern "C" {
12081        fn probe_public_libsais16x64(
12082            t: *const u16,
12083            sa: *mut SaSint,
12084            n: SaSint,
12085            fs: SaSint,
12086        ) -> SaSint;
12087        fn probe_public_libsais16x64_freq(
12088            t: *const u16,
12089            sa: *mut SaSint,
12090            n: SaSint,
12091            fs: SaSint,
12092            freq: *mut SaSint,
12093        ) -> SaSint;
12094        fn probe_public_libsais16x64_gsa(
12095            t: *const u16,
12096            sa: *mut SaSint,
12097            n: SaSint,
12098            fs: SaSint,
12099        ) -> SaSint;
12100        fn probe_public_libsais16x64_gsa_freq(
12101            t: *const u16,
12102            sa: *mut SaSint,
12103            n: SaSint,
12104            fs: SaSint,
12105            freq: *mut SaSint,
12106        ) -> SaSint;
12107        fn probe_public_libsais16x64_long(
12108            t: *mut SaSint,
12109            sa: *mut SaSint,
12110            n: SaSint,
12111            k: SaSint,
12112            fs: SaSint,
12113        ) -> SaSint;
12114        fn probe_libsais16x64_main_32s_entry(
12115            t: *mut SaSint,
12116            sa: *mut SaSint,
12117            n: SaSint,
12118            k: SaSint,
12119            fs: SaSint,
12120            threads: SaSint,
12121        ) -> SaSint;
12122        fn probe_libsais16x64_final_sorting_scan_left_to_right_32s(
12123            t: *const SaSint,
12124            sa: *mut SaSint,
12125            induction_bucket: *mut SaSint,
12126            omp_block_start: SaSint,
12127            omp_block_size: SaSint,
12128        );
12129        fn probe_libsais16x64_final_sorting_scan_right_to_left_32s(
12130            t: *const SaSint,
12131            sa: *mut SaSint,
12132            induction_bucket: *mut SaSint,
12133            omp_block_start: SaSint,
12134            omp_block_size: SaSint,
12135        );
12136        fn probe_libsais16x64_clear_lms_suffixes_omp(
12137            sa: *mut SaSint,
12138            n: SaSint,
12139            k: SaSint,
12140            bucket_start: *mut SaSint,
12141            bucket_end: *mut SaSint,
12142            threads: SaSint,
12143        );
12144        fn probe_libsais16x64_flip_suffix_markers_omp(sa: *mut SaSint, l: SaSint, threads: SaSint);
12145        fn probe_libsais16x64_induce_final_order_32s_6k(
12146            t: *const SaSint,
12147            sa: *mut SaSint,
12148            n: SaSint,
12149            k: SaSint,
12150            buckets: *mut SaSint,
12151            threads: SaSint,
12152        );
12153        fn probe_libsais16x64_induce_final_order_32s_4k(
12154            t: *const SaSint,
12155            sa: *mut SaSint,
12156            n: SaSint,
12157            k: SaSint,
12158            buckets: *mut SaSint,
12159            threads: SaSint,
12160        );
12161        fn probe_libsais16x64_induce_final_order_32s_2k(
12162            t: *const SaSint,
12163            sa: *mut SaSint,
12164            n: SaSint,
12165            k: SaSint,
12166            buckets: *mut SaSint,
12167            threads: SaSint,
12168        );
12169        fn probe_libsais16x64_induce_final_order_32s_1k(
12170            t: *const SaSint,
12171            sa: *mut SaSint,
12172            n: SaSint,
12173            k: SaSint,
12174            buckets: *mut SaSint,
12175            threads: SaSint,
12176        );
12177        fn probe_libsais16x64_induce_partial_order_32s_6k_omp(
12178            t: *const SaSint,
12179            sa: *mut SaSint,
12180            n: SaSint,
12181            k: SaSint,
12182            buckets: *mut SaSint,
12183            first_lms_suffix: SaSint,
12184            left_suffixes_count: SaSint,
12185            threads: SaSint,
12186        );
12187        fn probe_libsais16x64_induce_partial_order_32s_4k_omp(
12188            t: *const SaSint,
12189            sa: *mut SaSint,
12190            n: SaSint,
12191            k: SaSint,
12192            buckets: *mut SaSint,
12193            threads: SaSint,
12194        );
12195        fn probe_libsais16x64_induce_partial_order_32s_2k_omp(
12196            t: *const SaSint,
12197            sa: *mut SaSint,
12198            n: SaSint,
12199            k: SaSint,
12200            buckets: *mut SaSint,
12201            threads: SaSint,
12202        );
12203        fn probe_libsais16x64_induce_partial_order_32s_1k_omp(
12204            t: *const SaSint,
12205            sa: *mut SaSint,
12206            n: SaSint,
12207            k: SaSint,
12208            buckets: *mut SaSint,
12209            threads: SaSint,
12210        );
12211        fn probe_libsais16x64_induce_partial_order_16u_omp(
12212            t: *const u16,
12213            sa: *mut SaSint,
12214            n: SaSint,
12215            k: SaSint,
12216            flags: SaSint,
12217            buckets: *mut SaSint,
12218            first_lms_suffix: SaSint,
12219            left_suffixes_count: SaSint,
12220            threads: SaSint,
12221        );
12222        fn probe_libsais16x64_induce_final_order_16u_omp(
12223            t: *const u16,
12224            sa: *mut SaSint,
12225            n: SaSint,
12226            k: SaSint,
12227            flags: SaSint,
12228            r: SaSint,
12229            i: *mut SaSint,
12230            buckets: *mut SaSint,
12231            threads: SaSint,
12232        ) -> SaSint;
12233        fn probe_public_libsais16x64_bwt(
12234            t: *const u16,
12235            u: *mut u16,
12236            a: *mut SaSint,
12237            n: SaSint,
12238            fs: SaSint,
12239        ) -> SaSint;
12240        fn probe_public_libsais16x64_bwt_freq(
12241            t: *const u16,
12242            u: *mut u16,
12243            a: *mut SaSint,
12244            n: SaSint,
12245            fs: SaSint,
12246            freq: *mut SaSint,
12247        ) -> SaSint;
12248        fn probe_public_libsais16x64_bwt_aux(
12249            t: *const u16,
12250            u: *mut u16,
12251            a: *mut SaSint,
12252            n: SaSint,
12253            fs: SaSint,
12254            r: SaSint,
12255            i: *mut SaSint,
12256        ) -> SaSint;
12257        fn probe_public_libsais16x64_bwt_aux_freq(
12258            t: *const u16,
12259            u: *mut u16,
12260            a: *mut SaSint,
12261            n: SaSint,
12262            fs: SaSint,
12263            freq: *mut SaSint,
12264            r: SaSint,
12265            i: *mut SaSint,
12266        ) -> SaSint;
12267        fn probe_public_libsais16x64_unbwt(
12268            t: *const u16,
12269            u: *mut u16,
12270            a: *mut SaSint,
12271            n: SaSint,
12272            i: SaSint,
12273        ) -> SaSint;
12274        fn probe_public_libsais16x64_unbwt_freq(
12275            t: *const u16,
12276            u: *mut u16,
12277            a: *mut SaSint,
12278            n: SaSint,
12279            freq: *const SaSint,
12280            i: SaSint,
12281        ) -> SaSint;
12282        fn probe_public_libsais16x64_unbwt_aux(
12283            t: *const u16,
12284            u: *mut u16,
12285            a: *mut SaSint,
12286            n: SaSint,
12287            r: SaSint,
12288            i: *const SaSint,
12289        ) -> SaSint;
12290        fn probe_public_libsais16x64_unbwt_aux_freq(
12291            t: *const u16,
12292            u: *mut u16,
12293            a: *mut SaSint,
12294            n: SaSint,
12295            freq: *const SaSint,
12296            r: SaSint,
12297            i: *const SaSint,
12298        ) -> SaSint;
12299        fn probe_public_libsais16x64_plcp(
12300            t: *const u16,
12301            sa: *const SaSint,
12302            plcp: *mut SaSint,
12303            n: SaSint,
12304        ) -> SaSint;
12305        fn probe_public_libsais16x64_plcp_gsa(
12306            t: *const u16,
12307            sa: *const SaSint,
12308            plcp: *mut SaSint,
12309            n: SaSint,
12310        ) -> SaSint;
12311        fn probe_public_libsais16x64_lcp(
12312            plcp: *const SaSint,
12313            sa: *const SaSint,
12314            lcp: *mut SaSint,
12315            n: SaSint,
12316        ) -> SaSint;
12317        fn probe_libsais16x64_gather_lms_suffixes_16u(
12318            t: *const u16,
12319            sa: *mut SaSint,
12320            n: SaSint,
12321            m: SaSint,
12322            omp_block_start: SaSint,
12323            omp_block_size: SaSint,
12324        );
12325        fn probe_libsais16x64_count_and_gather_lms_suffixes_16u(
12326            t: *const u16,
12327            sa: *mut SaSint,
12328            n: SaSint,
12329            buckets: *mut SaSint,
12330            omp_block_start: SaSint,
12331            omp_block_size: SaSint,
12332        ) -> SaSint;
12333        fn probe_libsais16x64_initialize_buckets_start_and_end_16u(
12334            buckets: *mut SaSint,
12335            freq: *mut SaSint,
12336        ) -> SaSint;
12337        fn probe_libsais16x64_initialize_buckets_for_lms_suffixes_radix_sort_16u(
12338            t: *const u16,
12339            buckets: *mut SaSint,
12340            first_lms_suffix: SaSint,
12341        ) -> SaSint;
12342        fn probe_libsais16x64_radix_sort_lms_suffixes_16u(
12343            t: *const u16,
12344            sa: *mut SaSint,
12345            induction_bucket: *mut SaSint,
12346            omp_block_start: SaSint,
12347            omp_block_size: SaSint,
12348        );
12349        fn probe_libsais16x64_initialize_buckets_for_partial_sorting_16u(
12350            t: *const u16,
12351            buckets: *mut SaSint,
12352            first_lms_suffix: SaSint,
12353            left_suffixes_count: SaSint,
12354        );
12355        fn probe_libsais16x64_partial_sorting_scan_left_to_right_16u(
12356            t: *const u16,
12357            sa: *mut SaSint,
12358            buckets: *mut SaSint,
12359            d: SaSint,
12360            omp_block_start: SaSint,
12361            omp_block_size: SaSint,
12362        ) -> SaSint;
12363        fn probe_libsais16x64_partial_sorting_scan_right_to_left_16u(
12364            t: *const u16,
12365            sa: *mut SaSint,
12366            buckets: *mut SaSint,
12367            d: SaSint,
12368            omp_block_start: SaSint,
12369            omp_block_size: SaSint,
12370        ) -> SaSint;
12371        fn probe_libsais16x64_partial_gsa_scan_right_to_left_16u(
12372            t: *const u16,
12373            sa: *mut SaSint,
12374            buckets: *mut SaSint,
12375            d: SaSint,
12376            omp_block_start: SaSint,
12377            omp_block_size: SaSint,
12378        ) -> SaSint;
12379        fn probe_libsais16x64_partial_sorting_shift_markers_16u_omp(
12380            sa: *mut SaSint,
12381            n: SaSint,
12382            buckets: *const SaSint,
12383            threads: SaSint,
12384        );
12385        fn probe_libsais16x64_final_sorting_scan_left_to_right_16u(
12386            t: *const u16,
12387            sa: *mut SaSint,
12388            induction_bucket: *mut SaSint,
12389            omp_block_start: SaSint,
12390            omp_block_size: SaSint,
12391        );
12392        fn probe_libsais16x64_final_sorting_scan_right_to_left_16u(
12393            t: *const u16,
12394            sa: *mut SaSint,
12395            induction_bucket: *mut SaSint,
12396            omp_block_start: SaSint,
12397            omp_block_size: SaSint,
12398        );
12399        fn probe_libsais16x64_final_gsa_scan_right_to_left_16u(
12400            t: *const u16,
12401            sa: *mut SaSint,
12402            induction_bucket: *mut SaSint,
12403            omp_block_start: SaSint,
12404            omp_block_size: SaSint,
12405        );
12406        fn probe_libsais16x64_final_bwt_scan_left_to_right_16u(
12407            t: *const u16,
12408            sa: *mut SaSint,
12409            induction_bucket: *mut SaSint,
12410            omp_block_start: SaSint,
12411            omp_block_size: SaSint,
12412        );
12413        fn probe_libsais16x64_final_bwt_scan_right_to_left_16u(
12414            t: *const u16,
12415            sa: *mut SaSint,
12416            induction_bucket: *mut SaSint,
12417            omp_block_start: SaSint,
12418            omp_block_size: SaSint,
12419        ) -> SaSint;
12420        fn probe_libsais16x64_final_bwt_aux_scan_left_to_right_16u(
12421            t: *const u16,
12422            sa: *mut SaSint,
12423            rm: SaSint,
12424            i_sample: *mut SaSint,
12425            induction_bucket: *mut SaSint,
12426            omp_block_start: SaSint,
12427            omp_block_size: SaSint,
12428        );
12429        fn probe_libsais16x64_final_bwt_aux_scan_right_to_left_16u(
12430            t: *const u16,
12431            sa: *mut SaSint,
12432            rm: SaSint,
12433            i_sample: *mut SaSint,
12434            induction_bucket: *mut SaSint,
12435            omp_block_start: SaSint,
12436            omp_block_size: SaSint,
12437        );
12438        fn probe_libsais16x64_renumber_lms_suffixes_16u(
12439            sa: *mut SaSint,
12440            m: SaSint,
12441            name: SaSint,
12442            omp_block_start: SaSint,
12443            omp_block_size: SaSint,
12444        ) -> SaSint;
12445        fn probe_libsais16x64_place_lms_suffixes_interval_16u(
12446            sa: *mut SaSint,
12447            n: SaSint,
12448            m: SaSint,
12449            flags: SaSint,
12450            buckets: *mut SaSint,
12451        );
12452        fn probe_libsais16x64_bwt_copy_16u(u: *mut u16, a: *mut SaSint, n: SaSint);
12453        fn probe_libsais16x64_gather_lms_suffixes_16u_omp(
12454            t: *const u16,
12455            sa: *mut SaSint,
12456            n: SaSint,
12457            threads: SaSint,
12458        );
12459        fn probe_libsais16x64_count_and_gather_lms_suffixes_16u_omp(
12460            t: *const u16,
12461            sa: *mut SaSint,
12462            n: SaSint,
12463            buckets: *mut SaSint,
12464            threads: SaSint,
12465        ) -> SaSint;
12466        fn probe_libsais16x64_radix_sort_lms_suffixes_16u_omp(
12467            t: *const u16,
12468            sa: *mut SaSint,
12469            n: SaSint,
12470            m: SaSint,
12471            flags: SaSint,
12472            buckets: *mut SaSint,
12473            threads: SaSint,
12474        );
12475        fn probe_libsais16x64_partial_sorting_scan_left_to_right_16u_omp(
12476            t: *const u16,
12477            sa: *mut SaSint,
12478            n: SaSint,
12479            k: SaSint,
12480            buckets: *mut SaSint,
12481            left_suffixes_count: SaSint,
12482            d: SaSint,
12483            threads: SaSint,
12484        ) -> SaSint;
12485        fn probe_libsais16x64_partial_sorting_scan_right_to_left_16u_omp(
12486            t: *const u16,
12487            sa: *mut SaSint,
12488            n: SaSint,
12489            k: SaSint,
12490            buckets: *mut SaSint,
12491            first_lms_suffix: SaSint,
12492            left_suffixes_count: SaSint,
12493            d: SaSint,
12494            threads: SaSint,
12495        );
12496        fn probe_libsais16x64_partial_gsa_scan_right_to_left_16u_omp(
12497            t: *const u16,
12498            sa: *mut SaSint,
12499            n: SaSint,
12500            k: SaSint,
12501            buckets: *mut SaSint,
12502            first_lms_suffix: SaSint,
12503            left_suffixes_count: SaSint,
12504            d: SaSint,
12505            threads: SaSint,
12506        );
12507        fn probe_libsais16x64_renumber_lms_suffixes_16u_omp(
12508            sa: *mut SaSint,
12509            m: SaSint,
12510            threads: SaSint,
12511        ) -> SaSint;
12512        fn probe_libsais16x64_final_bwt_scan_left_to_right_16u_omp(
12513            t: *const u16,
12514            sa: *mut SaSint,
12515            n: SaSint,
12516            k: SaSint,
12517            induction_bucket: *mut SaSint,
12518            threads: SaSint,
12519        );
12520        fn probe_libsais16x64_final_bwt_aux_scan_left_to_right_16u_omp(
12521            t: *const u16,
12522            sa: *mut SaSint,
12523            n: SaSint,
12524            k: SaSint,
12525            rm: SaSint,
12526            i_sample: *mut SaSint,
12527            induction_bucket: *mut SaSint,
12528            threads: SaSint,
12529        );
12530        fn probe_libsais16x64_final_sorting_scan_left_to_right_16u_omp(
12531            t: *const u16,
12532            sa: *mut SaSint,
12533            n: SaSint,
12534            k: SaSint,
12535            induction_bucket: *mut SaSint,
12536            threads: SaSint,
12537        );
12538        fn probe_libsais16x64_final_bwt_scan_right_to_left_16u_omp(
12539            t: *const u16,
12540            sa: *mut SaSint,
12541            n: SaSint,
12542            k: SaSint,
12543            induction_bucket: *mut SaSint,
12544            threads: SaSint,
12545        ) -> SaSint;
12546        fn probe_libsais16x64_final_bwt_aux_scan_right_to_left_16u_omp(
12547            t: *const u16,
12548            sa: *mut SaSint,
12549            n: SaSint,
12550            k: SaSint,
12551            rm: SaSint,
12552            i_sample: *mut SaSint,
12553            induction_bucket: *mut SaSint,
12554            threads: SaSint,
12555        );
12556        fn probe_libsais16x64_final_sorting_scan_right_to_left_16u_omp(
12557            t: *const u16,
12558            sa: *mut SaSint,
12559            omp_block_start: SaSint,
12560            omp_block_size: SaSint,
12561            k: SaSint,
12562            induction_bucket: *mut SaSint,
12563            threads: SaSint,
12564        );
12565        fn probe_libsais16x64_final_gsa_scan_right_to_left_16u_omp(
12566            t: *const u16,
12567            sa: *mut SaSint,
12568            omp_block_start: SaSint,
12569            omp_block_size: SaSint,
12570            k: SaSint,
12571            induction_bucket: *mut SaSint,
12572            threads: SaSint,
12573        );
12574        fn probe_libsais16x64_bwt_copy_16u_omp(
12575            u: *mut u16,
12576            a: *mut SaSint,
12577            n: SaSint,
12578            threads: SaSint,
12579        );
12580        fn probe_libsais16x64_gather_marked_lms_suffixes(
12581            sa: *mut SaSint,
12582            m: SaSint,
12583            l: SaSint,
12584            omp_block_start: SaSint,
12585            omp_block_size: SaSint,
12586        ) -> SaSint;
12587        fn probe_libsais16x64_gather_marked_lms_suffixes_omp(
12588            sa: *mut SaSint,
12589            n: SaSint,
12590            m: SaSint,
12591            fs: SaSint,
12592            threads: SaSint,
12593        );
12594        fn probe_libsais16x64_renumber_and_gather_lms_suffixes_omp(
12595            sa: *mut SaSint,
12596            n: SaSint,
12597            m: SaSint,
12598            fs: SaSint,
12599            threads: SaSint,
12600        ) -> SaSint;
12601        fn probe_libsais16x64_reconstruct_lms_suffixes(
12602            sa: *mut SaSint,
12603            n: SaSint,
12604            m: SaSint,
12605            omp_block_start: SaSint,
12606            omp_block_size: SaSint,
12607        );
12608        fn probe_libsais16x64_reconstruct_lms_suffixes_omp(
12609            sa: *mut SaSint,
12610            n: SaSint,
12611            m: SaSint,
12612            threads: SaSint,
12613        );
12614        fn probe_libsais16x64_renumber_distinct_lms_suffixes_32s_4k(
12615            sa: *mut SaSint,
12616            m: SaSint,
12617            name: SaSint,
12618            omp_block_start: SaSint,
12619            omp_block_size: SaSint,
12620        ) -> SaSint;
12621        fn probe_libsais16x64_mark_distinct_lms_suffixes_32s(
12622            sa: *mut SaSint,
12623            m: SaSint,
12624            omp_block_start: SaSint,
12625            omp_block_size: SaSint,
12626        );
12627        fn probe_libsais16x64_clamp_lms_suffixes_length_32s(
12628            sa: *mut SaSint,
12629            m: SaSint,
12630            omp_block_start: SaSint,
12631            omp_block_size: SaSint,
12632        );
12633        fn probe_libsais16x64_renumber_distinct_lms_suffixes_32s_4k_omp(
12634            sa: *mut SaSint,
12635            m: SaSint,
12636            threads: SaSint,
12637        ) -> SaSint;
12638        fn probe_libsais16x64_mark_distinct_lms_suffixes_32s_omp(
12639            sa: *mut SaSint,
12640            n: SaSint,
12641            m: SaSint,
12642            threads: SaSint,
12643        );
12644        fn probe_libsais16x64_clamp_lms_suffixes_length_32s_omp(
12645            sa: *mut SaSint,
12646            n: SaSint,
12647            m: SaSint,
12648            threads: SaSint,
12649        );
12650        fn probe_libsais16x64_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
12651            sa: *mut SaSint,
12652            n: SaSint,
12653            m: SaSint,
12654            threads: SaSint,
12655        ) -> SaSint;
12656        fn probe_libsais16x64_renumber_unique_and_nonunique_lms_suffixes_32s(
12657            t: *mut SaSint,
12658            sa: *mut SaSint,
12659            m: SaSint,
12660            f: SaSint,
12661            omp_block_start: SaSint,
12662            omp_block_size: SaSint,
12663        ) -> SaSint;
12664        fn probe_libsais16x64_compact_unique_and_nonunique_lms_suffixes_32s(
12665            sa: *mut SaSint,
12666            m: SaSint,
12667            pl: *mut SaSint,
12668            pr: *mut SaSint,
12669            omp_block_start: SaSint,
12670            omp_block_size: SaSint,
12671        );
12672        fn probe_libsais16x64_renumber_unique_and_nonunique_lms_suffixes_32s_omp(
12673            t: *mut SaSint,
12674            sa: *mut SaSint,
12675            m: SaSint,
12676            threads: SaSint,
12677        ) -> SaSint;
12678        fn probe_libsais16x64_compact_unique_and_nonunique_lms_suffixes_32s_omp(
12679            sa: *mut SaSint,
12680            n: SaSint,
12681            m: SaSint,
12682            fs: SaSint,
12683            f: SaSint,
12684            threads: SaSint,
12685        );
12686        fn probe_libsais16x64_compact_lms_suffixes_32s_omp(
12687            t: *mut SaSint,
12688            sa: *mut SaSint,
12689            n: SaSint,
12690            m: SaSint,
12691            fs: SaSint,
12692            threads: SaSint,
12693        ) -> SaSint;
12694        fn probe_libsais16x64_merge_unique_lms_suffixes_32s(
12695            t: *mut SaSint,
12696            sa: *mut SaSint,
12697            n: SaSint,
12698            m: SaSint,
12699            l: SaSint,
12700            omp_block_start: SaSint,
12701            omp_block_size: SaSint,
12702        );
12703        fn probe_libsais16x64_merge_nonunique_lms_suffixes_32s(
12704            sa: *mut SaSint,
12705            n: SaSint,
12706            m: SaSint,
12707            l: SaSint,
12708            omp_block_start: SaSint,
12709            omp_block_size: SaSint,
12710        );
12711        fn probe_libsais16x64_merge_unique_lms_suffixes_32s_omp(
12712            t: *mut SaSint,
12713            sa: *mut SaSint,
12714            n: SaSint,
12715            m: SaSint,
12716            threads: SaSint,
12717        );
12718        fn probe_libsais16x64_merge_nonunique_lms_suffixes_32s_omp(
12719            sa: *mut SaSint,
12720            n: SaSint,
12721            m: SaSint,
12722            f: SaSint,
12723            threads: SaSint,
12724        );
12725        fn probe_libsais16x64_merge_compacted_lms_suffixes_32s_omp(
12726            t: *mut SaSint,
12727            sa: *mut SaSint,
12728            n: SaSint,
12729            m: SaSint,
12730            f: SaSint,
12731            threads: SaSint,
12732        );
12733        fn probe_libsais16x64_radix_sort_lms_suffixes_32s_6k(
12734            t: *const SaSint,
12735            sa: *mut SaSint,
12736            induction_bucket: *mut SaSint,
12737            omp_block_start: SaSint,
12738            omp_block_size: SaSint,
12739        );
12740        fn probe_libsais16x64_radix_sort_lms_suffixes_32s_2k(
12741            t: *const SaSint,
12742            sa: *mut SaSint,
12743            induction_bucket: *mut SaSint,
12744            omp_block_start: SaSint,
12745            omp_block_size: SaSint,
12746        );
12747        fn probe_libsais16x64_radix_sort_lms_suffixes_32s_6k_omp(
12748            t: *const SaSint,
12749            sa: *mut SaSint,
12750            n: SaSint,
12751            m: SaSint,
12752            induction_bucket: *mut SaSint,
12753            threads: SaSint,
12754        );
12755        fn probe_libsais16x64_radix_sort_lms_suffixes_32s_2k_omp(
12756            t: *const SaSint,
12757            sa: *mut SaSint,
12758            n: SaSint,
12759            m: SaSint,
12760            induction_bucket: *mut SaSint,
12761            threads: SaSint,
12762        );
12763        fn probe_libsais16x64_radix_sort_lms_suffixes_32s_1k(
12764            t: *const SaSint,
12765            sa: *mut SaSint,
12766            n: SaSint,
12767            buckets: *mut SaSint,
12768        ) -> SaSint;
12769        fn probe_libsais16x64_radix_sort_set_markers_32s_6k(
12770            sa: *mut SaSint,
12771            induction_bucket: *mut SaSint,
12772            omp_block_start: SaSint,
12773            omp_block_size: SaSint,
12774        );
12775        fn probe_libsais16x64_radix_sort_set_markers_32s_4k(
12776            sa: *mut SaSint,
12777            induction_bucket: *mut SaSint,
12778            omp_block_start: SaSint,
12779            omp_block_size: SaSint,
12780        );
12781        fn probe_libsais16x64_radix_sort_set_markers_32s_6k_omp(
12782            sa: *mut SaSint,
12783            k: SaSint,
12784            induction_bucket: *mut SaSint,
12785            threads: SaSint,
12786        );
12787        fn probe_libsais16x64_radix_sort_set_markers_32s_4k_omp(
12788            sa: *mut SaSint,
12789            k: SaSint,
12790            induction_bucket: *mut SaSint,
12791            threads: SaSint,
12792        );
12793        fn probe_libsais16x64_place_lms_suffixes_histogram_32s_6k(
12794            sa: *mut SaSint,
12795            n: SaSint,
12796            k: SaSint,
12797            m: SaSint,
12798            buckets: *const SaSint,
12799        );
12800        fn probe_libsais16x64_place_lms_suffixes_histogram_32s_4k(
12801            sa: *mut SaSint,
12802            n: SaSint,
12803            k: SaSint,
12804            m: SaSint,
12805            buckets: *const SaSint,
12806        );
12807        fn probe_libsais16x64_place_lms_suffixes_histogram_32s_2k(
12808            sa: *mut SaSint,
12809            n: SaSint,
12810            k: SaSint,
12811            m: SaSint,
12812            buckets: *const SaSint,
12813        );
12814        fn probe_libsais16x64_gather_lms_suffixes_32s(
12815            t: *const SaSint,
12816            sa: *mut SaSint,
12817            n: SaSint,
12818        ) -> SaSint;
12819        fn probe_libsais16x64_gather_compacted_lms_suffixes_32s(
12820            t: *const SaSint,
12821            sa: *mut SaSint,
12822            n: SaSint,
12823        ) -> SaSint;
12824        fn probe_libsais16x64_count_lms_suffixes_32s_2k(
12825            t: *const SaSint,
12826            n: SaSint,
12827            k: SaSint,
12828            buckets: *mut SaSint,
12829        );
12830        fn probe_libsais16x64_count_and_gather_lms_suffixes_32s_4k(
12831            t: *const SaSint,
12832            sa: *mut SaSint,
12833            n: SaSint,
12834            k: SaSint,
12835            buckets: *mut SaSint,
12836            omp_block_start: SaSint,
12837            omp_block_size: SaSint,
12838        ) -> SaSint;
12839        fn probe_libsais16x64_count_and_gather_lms_suffixes_32s_4k_omp(
12840            t: *const SaSint,
12841            sa: *mut SaSint,
12842            n: SaSint,
12843            k: SaSint,
12844            buckets: *mut SaSint,
12845            local_buckets: SaSint,
12846            threads: SaSint,
12847        ) -> SaSint;
12848        fn probe_libsais16x64_count_suffixes_32s(
12849            t: *const SaSint,
12850            n: SaSint,
12851            k: SaSint,
12852            buckets: *mut SaSint,
12853        );
12854        fn probe_libsais16x64_initialize_buckets_start_and_end_32s_6k(
12855            k: SaSint,
12856            buckets: *mut SaSint,
12857        );
12858        fn probe_libsais16x64_initialize_buckets_start_and_end_32s_4k(
12859            k: SaSint,
12860            buckets: *mut SaSint,
12861        );
12862        fn probe_libsais16x64_initialize_buckets_end_32s_2k(k: SaSint, buckets: *mut SaSint);
12863        fn probe_libsais16x64_initialize_buckets_start_and_end_32s_2k(
12864            k: SaSint,
12865            buckets: *mut SaSint,
12866        );
12867        fn probe_libsais16x64_initialize_buckets_start_32s_1k(k: SaSint, buckets: *mut SaSint);
12868        fn probe_libsais16x64_initialize_buckets_end_32s_1k(k: SaSint, buckets: *mut SaSint);
12869        fn probe_libsais16x64_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
12870            t: *const SaSint,
12871            k: SaSint,
12872            buckets: *mut SaSint,
12873            first_lms_suffix: SaSint,
12874        );
12875        fn probe_libsais16x64_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
12876            t: *const SaSint,
12877            k: SaSint,
12878            buckets: *mut SaSint,
12879            first_lms_suffix: SaSint,
12880        ) -> SaSint;
12881        fn probe_libsais16x64_initialize_buckets_for_radix_and_partial_sorting_32s_4k(
12882            t: *const SaSint,
12883            k: SaSint,
12884            buckets: *mut SaSint,
12885            first_lms_suffix: SaSint,
12886        );
12887        fn probe_libsais16x64_place_lms_suffixes_interval_32s_4k(
12888            sa: *mut SaSint,
12889            n: SaSint,
12890            k: SaSint,
12891            m: SaSint,
12892            buckets: *const SaSint,
12893        );
12894        fn probe_libsais16x64_place_lms_suffixes_interval_32s_2k(
12895            sa: *mut SaSint,
12896            n: SaSint,
12897            k: SaSint,
12898            m: SaSint,
12899            buckets: *const SaSint,
12900        );
12901        fn probe_libsais16x64_place_lms_suffixes_interval_32s_1k(
12902            t: *const SaSint,
12903            sa: *mut SaSint,
12904            k: SaSint,
12905            m: SaSint,
12906            buckets: *mut SaSint,
12907        );
12908        fn probe_libsais16x64_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
12909            t: *mut SaSint,
12910            sa: *mut SaSint,
12911            n: SaSint,
12912            m: SaSint,
12913            threads: SaSint,
12914        ) -> SaSint;
12915        fn probe_libsais16x64_partial_sorting_shift_markers_32s_6k_omp(
12916            sa: *mut SaSint,
12917            k: SaSint,
12918            buckets: *const SaSint,
12919            threads: SaSint,
12920        );
12921        fn probe_libsais16x64_partial_sorting_shift_markers_32s_4k(sa: *mut SaSint, n: SaSint);
12922        fn probe_libsais16x64_partial_sorting_shift_buckets_32s_6k(k: SaSint, buckets: *mut SaSint);
12923        fn probe_libsais16x64_partial_sorting_scan_left_to_right_32s_6k(
12924            t: *const SaSint,
12925            sa: *mut SaSint,
12926            buckets: *mut SaSint,
12927            d: SaSint,
12928            omp_block_start: SaSint,
12929            omp_block_size: SaSint,
12930        ) -> SaSint;
12931        fn probe_libsais16x64_partial_sorting_scan_left_to_right_32s_4k(
12932            t: *const SaSint,
12933            sa: *mut SaSint,
12934            k: SaSint,
12935            buckets: *mut SaSint,
12936            d: SaSint,
12937            omp_block_start: SaSint,
12938            omp_block_size: SaSint,
12939        ) -> SaSint;
12940        fn probe_libsais16x64_partial_sorting_scan_left_to_right_32s_1k(
12941            t: *const SaSint,
12942            sa: *mut SaSint,
12943            buckets: *mut SaSint,
12944            omp_block_start: SaSint,
12945            omp_block_size: SaSint,
12946        );
12947        fn probe_libsais16x64_partial_sorting_scan_left_to_right_32s_6k_omp(
12948            t: *const SaSint,
12949            sa: *mut SaSint,
12950            n: SaSint,
12951            buckets: *mut SaSint,
12952            left_suffixes_count: SaSint,
12953            d: SaSint,
12954            threads: SaSint,
12955        ) -> SaSint;
12956        fn probe_libsais16x64_partial_sorting_scan_left_to_right_32s_4k_omp(
12957            t: *const SaSint,
12958            sa: *mut SaSint,
12959            n: SaSint,
12960            k: SaSint,
12961            buckets: *mut SaSint,
12962            d: SaSint,
12963            threads: SaSint,
12964        ) -> SaSint;
12965        fn probe_libsais16x64_partial_sorting_scan_left_to_right_32s_1k_omp(
12966            t: *const SaSint,
12967            sa: *mut SaSint,
12968            n: SaSint,
12969            buckets: *mut SaSint,
12970            threads: SaSint,
12971        );
12972        fn probe_libsais16x64_partial_sorting_scan_right_to_left_32s_6k(
12973            t: *const SaSint,
12974            sa: *mut SaSint,
12975            buckets: *mut SaSint,
12976            d: SaSint,
12977            omp_block_start: SaSint,
12978            omp_block_size: SaSint,
12979        ) -> SaSint;
12980        fn probe_libsais16x64_partial_sorting_scan_right_to_left_32s_4k(
12981            t: *const SaSint,
12982            sa: *mut SaSint,
12983            k: SaSint,
12984            buckets: *mut SaSint,
12985            d: SaSint,
12986            omp_block_start: SaSint,
12987            omp_block_size: SaSint,
12988        ) -> SaSint;
12989        fn probe_libsais16x64_partial_sorting_scan_right_to_left_32s_1k(
12990            t: *const SaSint,
12991            sa: *mut SaSint,
12992            buckets: *mut SaSint,
12993            omp_block_start: SaSint,
12994            omp_block_size: SaSint,
12995        );
12996        fn probe_libsais16x64_partial_sorting_scan_right_to_left_32s_6k_omp(
12997            t: *const SaSint,
12998            sa: *mut SaSint,
12999            n: SaSint,
13000            buckets: *mut SaSint,
13001            first_lms_suffix: SaSint,
13002            left_suffixes_count: SaSint,
13003            d: SaSint,
13004            threads: SaSint,
13005        ) -> SaSint;
13006        fn probe_libsais16x64_partial_sorting_scan_right_to_left_32s_4k_omp(
13007            t: *const SaSint,
13008            sa: *mut SaSint,
13009            n: SaSint,
13010            k: SaSint,
13011            buckets: *mut SaSint,
13012            d: SaSint,
13013            threads: SaSint,
13014        ) -> SaSint;
13015        fn probe_libsais16x64_partial_sorting_scan_right_to_left_32s_1k_omp(
13016            t: *const SaSint,
13017            sa: *mut SaSint,
13018            n: SaSint,
13019            buckets: *mut SaSint,
13020            threads: SaSint,
13021        );
13022        fn probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_4k(
13023            sa: *mut SaSint,
13024            omp_block_start: SaSint,
13025            omp_block_size: SaSint,
13026        ) -> SaSint;
13027        fn probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_1k(
13028            sa: *mut SaSint,
13029            omp_block_start: SaSint,
13030            omp_block_size: SaSint,
13031        ) -> SaSint;
13032        fn probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_4k_omp(
13033            sa: *mut SaSint,
13034            n: SaSint,
13035            threads: SaSint,
13036        );
13037        fn probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_1k_omp(
13038            sa: *mut SaSint,
13039            n: SaSint,
13040            threads: SaSint,
13041        );
13042        fn probe_libsais16x64_count_and_gather_lms_suffixes_32s_2k(
13043            t: *const SaSint,
13044            sa: *mut SaSint,
13045            n: SaSint,
13046            k: SaSint,
13047            buckets: *mut SaSint,
13048            omp_block_start: SaSint,
13049            omp_block_size: SaSint,
13050        ) -> SaSint;
13051        fn probe_libsais16x64_count_and_gather_compacted_lms_suffixes_32s_2k(
13052            t: *const SaSint,
13053            sa: *mut SaSint,
13054            n: SaSint,
13055            k: SaSint,
13056            buckets: *mut SaSint,
13057            omp_block_start: SaSint,
13058            omp_block_size: SaSint,
13059        ) -> SaSint;
13060        fn probe_libsais16x64_count_and_gather_lms_suffixes_32s_2k_omp(
13061            t: *const SaSint,
13062            sa: *mut SaSint,
13063            n: SaSint,
13064            k: SaSint,
13065            buckets: *mut SaSint,
13066            local_buckets: SaSint,
13067            threads: SaSint,
13068        ) -> SaSint;
13069        fn probe_libsais16x64_count_and_gather_compacted_lms_suffixes_32s_2k_omp(
13070            t: *const SaSint,
13071            sa: *mut SaSint,
13072            n: SaSint,
13073            k: SaSint,
13074            buckets: *mut SaSint,
13075            local_buckets: SaSint,
13076            threads: SaSint,
13077        );
13078        fn probe_libsais16x64_reconstruct_compacted_lms_suffixes_32s_2k_omp(
13079            t: *mut SaSint,
13080            sa: *mut SaSint,
13081            n: SaSint,
13082            k: SaSint,
13083            m: SaSint,
13084            fs: SaSint,
13085            f: SaSint,
13086            buckets: *mut SaSint,
13087            local_buckets: SaSint,
13088            threads: SaSint,
13089        );
13090        fn probe_libsais16x64_reconstruct_compacted_lms_suffixes_32s_1k_omp(
13091            t: *mut SaSint,
13092            sa: *mut SaSint,
13093            n: SaSint,
13094            m: SaSint,
13095            fs: SaSint,
13096            f: SaSint,
13097            threads: SaSint,
13098        );
13099    }
13100
13101    fn brute_sa(t: &[u16]) -> Vec<SaSint> {
13102        let mut sa: Vec<_> = (0..t.len() as SaSint).collect();
13103        sa.sort_by(|&a, &b| t[a as usize..].cmp(&t[b as usize..]));
13104        sa
13105    }
13106
13107    #[test]
13108    fn libsais16x64_gather_lms_suffixes_16u_matches_c() {
13109        let cases: &[&[u16]] = &[
13110            &[2, 1, 3, 1, 2, 0],
13111            &[7, 7, 7, 7, 0],
13112            &[3, 1, 2, 1, 0, 4, 1, 0],
13113            &[9, 1, 9, 1, 9, 0, 2, 2, 0],
13114        ];
13115
13116        for &text in cases {
13117            let n = text.len() as SaSint;
13118            let mut rust_sa = vec![-99; text.len()];
13119            let mut c_sa = rust_sa.clone();
13120
13121            gather_lms_suffixes_16u(text, &mut rust_sa, n, n - 1, 0, n);
13122            unsafe {
13123                probe_libsais16x64_gather_lms_suffixes_16u(
13124                    text.as_ptr(),
13125                    c_sa.as_mut_ptr(),
13126                    n,
13127                    n - 1,
13128                    0,
13129                    n,
13130                );
13131            }
13132
13133            assert_eq!(rust_sa, c_sa);
13134        }
13135    }
13136
13137    #[test]
13138    fn libsais16x64_count_and_gather_lms_suffixes_16u_matches_c() {
13139        let cases: &[&[u16]] = &[
13140            &[2, 1, 3, 1, 2, 0],
13141            &[7, 7, 7, 7, 0],
13142            &[3, 1, 2, 1, 0, 4, 1, 0],
13143            &[9, 1, 9, 1, 9, 0, 2, 2, 0],
13144        ];
13145
13146        for &text in cases {
13147            let n = text.len() as SaSint;
13148            let mut rust_sa = vec![-99; text.len()];
13149            let mut c_sa = rust_sa.clone();
13150            let mut rust_buckets = vec![-1; 4 * ALPHABET_SIZE];
13151            let mut c_buckets = rust_buckets.clone();
13152
13153            let rust_m =
13154                count_and_gather_lms_suffixes_16u(text, &mut rust_sa, n, &mut rust_buckets, 0, n);
13155            let c_m = unsafe {
13156                probe_libsais16x64_count_and_gather_lms_suffixes_16u(
13157                    text.as_ptr(),
13158                    c_sa.as_mut_ptr(),
13159                    n,
13160                    c_buckets.as_mut_ptr(),
13161                    0,
13162                    n,
13163                )
13164            };
13165
13166            assert_eq!(rust_m, c_m);
13167            assert_eq!(rust_sa, c_sa);
13168            assert_eq!(rust_buckets, c_buckets);
13169        }
13170    }
13171
13172    #[test]
13173    fn libsais16x64_initialize_buckets_start_and_end_16u_matches_c() {
13174        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13175        for (symbol, counts) in [
13176            (0usize, [1, 0, 0, 2]),
13177            (1, [0, 3, 1, 0]),
13178            (7, [2, 1, 0, 1]),
13179            (1024, [0, 0, 5, 0]),
13180        ] {
13181            for state in 0..4 {
13182                rust_buckets[buckets_index4(symbol, state)] = counts[state];
13183            }
13184        }
13185        let mut c_buckets = rust_buckets.clone();
13186        let mut rust_freq = vec![-1; ALPHABET_SIZE];
13187        let mut c_freq = rust_freq.clone();
13188
13189        let rust_k = initialize_buckets_start_and_end_16u(&mut rust_buckets, Some(&mut rust_freq));
13190        let c_k = unsafe {
13191            probe_libsais16x64_initialize_buckets_start_and_end_16u(
13192                c_buckets.as_mut_ptr(),
13193                c_freq.as_mut_ptr(),
13194            )
13195        };
13196
13197        assert_eq!(rust_k, c_k);
13198        assert_eq!(rust_buckets, c_buckets);
13199        assert_eq!(rust_freq, c_freq);
13200
13201        let mut rust_buckets_no_freq = vec![0; 8 * ALPHABET_SIZE];
13202        rust_buckets_no_freq[..4 * ALPHABET_SIZE]
13203            .copy_from_slice(&rust_buckets[..4 * ALPHABET_SIZE]);
13204        let mut c_buckets_no_freq = rust_buckets_no_freq.clone();
13205
13206        let rust_k = initialize_buckets_start_and_end_16u(&mut rust_buckets_no_freq, None);
13207        let c_k = unsafe {
13208            probe_libsais16x64_initialize_buckets_start_and_end_16u(
13209                c_buckets_no_freq.as_mut_ptr(),
13210                std::ptr::null_mut(),
13211            )
13212        };
13213
13214        assert_eq!(rust_k, c_k);
13215        assert_eq!(rust_buckets_no_freq, c_buckets_no_freq);
13216    }
13217
13218    #[test]
13219    fn libsais16x64_lms_radix_bucket_initialization_matches_c() {
13220        let text = [3, 1, 2, 1, 0, 4, 1, 0];
13221        let n = text.len() as SaSint;
13222        let mut rust_sa = vec![-99; text.len()];
13223        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13224        let m = count_and_gather_lms_suffixes_16u(
13225            &text,
13226            &mut rust_sa,
13227            n,
13228            &mut rust_buckets[..4 * ALPHABET_SIZE],
13229            0,
13230            n,
13231        );
13232        initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
13233        let first_lms_suffix = rust_sa[(n - m) as usize];
13234
13235        let mut c_buckets = rust_buckets.clone();
13236        let rust_count = initialize_buckets_for_lms_suffixes_radix_sort_16u(
13237            &text,
13238            &mut rust_buckets,
13239            first_lms_suffix,
13240        );
13241        let c_count = unsafe {
13242            probe_libsais16x64_initialize_buckets_for_lms_suffixes_radix_sort_16u(
13243                text.as_ptr(),
13244                c_buckets.as_mut_ptr(),
13245                first_lms_suffix,
13246            )
13247        };
13248
13249        assert_eq!(rust_count, c_count);
13250        assert_eq!(rust_buckets, c_buckets);
13251    }
13252
13253    #[test]
13254    fn libsais16x64_radix_sort_lms_suffixes_16u_matches_c() {
13255        let text = [3, 1, 2, 1, 0, 4, 1, 0];
13256        let n = text.len() as SaSint;
13257        let mut rust_sa = vec![-99; text.len()];
13258        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13259        let m = count_and_gather_lms_suffixes_16u(
13260            &text,
13261            &mut rust_sa,
13262            n,
13263            &mut rust_buckets[..4 * ALPHABET_SIZE],
13264            0,
13265            n,
13266        );
13267        initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
13268        let first_lms_suffix = rust_sa[(n - m) as usize];
13269        initialize_buckets_for_lms_suffixes_radix_sort_16u(
13270            &text,
13271            &mut rust_buckets,
13272            first_lms_suffix,
13273        );
13274
13275        let mut c_sa = rust_sa.clone();
13276        let mut c_buckets = rust_buckets.clone();
13277        {
13278            let induction_bucket = &mut rust_buckets[4 * ALPHABET_SIZE..];
13279            radix_sort_lms_suffixes_16u(&text, &mut rust_sa, induction_bucket, n - m + 1, m - 1);
13280        }
13281        unsafe {
13282            probe_libsais16x64_radix_sort_lms_suffixes_16u(
13283                text.as_ptr(),
13284                c_sa.as_mut_ptr(),
13285                c_buckets[4 * ALPHABET_SIZE..].as_mut_ptr(),
13286                n - m + 1,
13287                m - 1,
13288            );
13289        }
13290
13291        assert_eq!(rust_sa, c_sa);
13292        assert_eq!(rust_buckets, c_buckets);
13293    }
13294
13295    #[test]
13296    fn libsais16x64_initialize_buckets_for_partial_sorting_16u_matches_c() {
13297        let text = [3, 1, 2, 1, 0, 4, 1, 0];
13298        let n = text.len() as SaSint;
13299        let mut rust_sa = vec![-99; text.len()];
13300        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13301        let m = count_and_gather_lms_suffixes_16u(
13302            &text,
13303            &mut rust_sa,
13304            n,
13305            &mut rust_buckets[..4 * ALPHABET_SIZE],
13306            0,
13307            n,
13308        );
13309        initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
13310        let first_lms_suffix = rust_sa[(n - m) as usize];
13311        let left_suffixes_count = initialize_buckets_for_lms_suffixes_radix_sort_16u(
13312            &text,
13313            &mut rust_buckets,
13314            first_lms_suffix,
13315        );
13316        let mut c_buckets = rust_buckets.clone();
13317
13318        initialize_buckets_for_partial_sorting_16u(
13319            &text,
13320            &mut rust_buckets,
13321            first_lms_suffix,
13322            left_suffixes_count,
13323        );
13324        unsafe {
13325            probe_libsais16x64_initialize_buckets_for_partial_sorting_16u(
13326                text.as_ptr(),
13327                c_buckets.as_mut_ptr(),
13328                first_lms_suffix,
13329                left_suffixes_count,
13330            );
13331        }
13332
13333        assert_eq!(rust_buckets, c_buckets);
13334    }
13335
13336    fn partial_scan_fixture() -> ([u16; 10], Vec<SaSint>, Vec<SaSint>) {
13337        let text = [1, 0, 2, 1, 3, 0, 2, 4, 1, 0];
13338        let mut sa = vec![0; 128];
13339        sa[..5].copy_from_slice(&[3, 5 | SAINT_MIN, 7, 2, 9 | SAINT_MIN]);
13340
13341        let mut buckets = vec![0; 6 * ALPHABET_SIZE];
13342        for v in 0..32 {
13343            buckets[v] = 80 + (v as SaSint) * 4;
13344            buckets[2 * ALPHABET_SIZE + v] = if v % 3 == 0 { 2 } else { 0 };
13345            buckets[4 * ALPHABET_SIZE + v] = 20 + (v as SaSint) * 4;
13346        }
13347
13348        (text, sa, buckets)
13349    }
13350
13351    #[test]
13352    fn libsais16x64_partial_sorting_scan_left_to_right_16u_matches_c() {
13353        let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
13354        let mut c_sa = rust_sa.clone();
13355        let mut c_buckets = rust_buckets.clone();
13356
13357        let rust_d =
13358            partial_sorting_scan_left_to_right_16u(&text, &mut rust_sa, &mut rust_buckets, 3, 0, 5);
13359        let c_d = unsafe {
13360            probe_libsais16x64_partial_sorting_scan_left_to_right_16u(
13361                text.as_ptr(),
13362                c_sa.as_mut_ptr(),
13363                c_buckets.as_mut_ptr(),
13364                3,
13365                0,
13366                5,
13367            )
13368        };
13369
13370        assert_eq!(rust_d, c_d);
13371        assert_eq!(rust_sa, c_sa);
13372        assert_eq!(rust_buckets, c_buckets);
13373    }
13374
13375    #[test]
13376    fn libsais16x64_partial_sorting_scan_right_to_left_16u_matches_c() {
13377        let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
13378        let mut c_sa = rust_sa.clone();
13379        let mut c_buckets = rust_buckets.clone();
13380
13381        let rust_d =
13382            partial_sorting_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_buckets, 3, 0, 5);
13383        let c_d = unsafe {
13384            probe_libsais16x64_partial_sorting_scan_right_to_left_16u(
13385                text.as_ptr(),
13386                c_sa.as_mut_ptr(),
13387                c_buckets.as_mut_ptr(),
13388                3,
13389                0,
13390                5,
13391            )
13392        };
13393
13394        assert_eq!(rust_d, c_d);
13395        assert_eq!(rust_sa, c_sa);
13396        assert_eq!(rust_buckets, c_buckets);
13397    }
13398
13399    #[test]
13400    fn libsais16x64_partial_gsa_scan_right_to_left_16u_matches_c() {
13401        let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
13402        let mut c_sa = rust_sa.clone();
13403        let mut c_buckets = rust_buckets.clone();
13404
13405        let rust_d =
13406            partial_gsa_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_buckets, 3, 0, 5);
13407        let c_d = unsafe {
13408            probe_libsais16x64_partial_gsa_scan_right_to_left_16u(
13409                text.as_ptr(),
13410                c_sa.as_mut_ptr(),
13411                c_buckets.as_mut_ptr(),
13412                3,
13413                0,
13414                5,
13415            )
13416        };
13417
13418        assert_eq!(rust_d, c_d);
13419        assert_eq!(rust_sa, c_sa);
13420        assert_eq!(rust_buckets, c_buckets);
13421    }
13422
13423    #[test]
13424    fn libsais16x64_partial_sorting_shift_markers_16u_matches_c() {
13425        let mut rust_sa = vec![0; 16];
13426        rust_sa[2..6].copy_from_slice(&[1, 2 | SAINT_MIN, 3 | SAINT_MIN, 4]);
13427        rust_sa[8..12].copy_from_slice(&[5 | SAINT_MIN, 6, 7 | SAINT_MIN, 8]);
13428        let mut c_sa = rust_sa.clone();
13429
13430        let mut buckets = vec![0; 6 * ALPHABET_SIZE];
13431        buckets[0] = 2;
13432        buckets[2] = 8;
13433        buckets[4 * ALPHABET_SIZE + 2] = 6;
13434        buckets[4 * ALPHABET_SIZE + 4] = 12;
13435
13436        let n = rust_sa.len() as SaSint;
13437        partial_sorting_shift_markers_16u_omp(&mut rust_sa, n, &buckets, 1);
13438        unsafe {
13439            probe_libsais16x64_partial_sorting_shift_markers_16u_omp(
13440                c_sa.as_mut_ptr(),
13441                c_sa.len() as SaSint,
13442                buckets.as_ptr(),
13443                1,
13444            );
13445        }
13446
13447        assert_eq!(rust_sa, c_sa);
13448    }
13449
13450    #[test]
13451    fn libsais16x64_partial_left_to_right_16u_block_omp_uses_cache_pipeline() {
13452        let block_size = 65_536usize;
13453        let k = 512usize;
13454        let text: Vec<u16> = (0..block_size + 2)
13455            .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13456            .collect();
13457        let sa_len = block_size + 2 * k * 100;
13458        let mut base_sa = vec![0; sa_len];
13459        for (i, slot) in base_sa.iter_mut().take(block_size).enumerate() {
13460            *slot = (i + 2) as SaSint;
13461        }
13462        let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13463        for v in 0..2 * k {
13464            base_buckets[4 * ALPHABET_SIZE + v] = (block_size + v * 100) as SaSint;
13465        }
13466
13467        let mut scalar_sa = base_sa.clone();
13468        let mut threaded_sa = base_sa;
13469        let mut scalar_buckets = base_buckets.clone();
13470        let mut threaded_buckets = base_buckets;
13471        let mut thread_state = alloc_thread_state(4).unwrap();
13472        let scalar_d = partial_sorting_scan_left_to_right_16u(
13473            &text,
13474            &mut scalar_sa,
13475            &mut scalar_buckets,
13476            0,
13477            0,
13478            block_size as SaSint,
13479        );
13480        let threaded_d = partial_sorting_scan_left_to_right_16u_block_omp(
13481            &text,
13482            &mut threaded_sa,
13483            k as SaSint,
13484            &mut threaded_buckets,
13485            0,
13486            0,
13487            block_size as SaSint,
13488            4,
13489            &mut thread_state,
13490        );
13491
13492        assert_eq!(threaded_d, scalar_d);
13493        assert_eq!(threaded_sa, scalar_sa);
13494        assert_eq!(threaded_buckets, scalar_buckets);
13495    }
13496
13497    #[test]
13498    fn libsais16x64_partial_left_to_right_16u_omp_uses_block_pipeline() {
13499        let block_size = 65_536usize;
13500        let k = 512usize;
13501        let text: Vec<u16> = (0..block_size + 2)
13502            .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13503            .collect();
13504        let sa_len = block_size + 2 * k * 100;
13505        let mut base_sa = vec![0; sa_len];
13506        for (i, slot) in base_sa.iter_mut().take(block_size).enumerate() {
13507            let value = (i + 2) as SaSint;
13508            *slot = if i % 17 == 0 {
13509                value | SAINT_MIN
13510            } else {
13511                value
13512            };
13513        }
13514        let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13515        for v in 0..2 * k {
13516            base_buckets[4 * ALPHABET_SIZE + v] = (block_size + v * 100) as SaSint;
13517            base_buckets[2 * ALPHABET_SIZE + v] = if v % 5 == 0 { 3 } else { 0 };
13518        }
13519
13520        let mut scalar_sa = base_sa.clone();
13521        let mut threaded_sa = base_sa;
13522        let mut scalar_buckets = base_buckets.clone();
13523        let mut threaded_buckets = base_buckets;
13524        let scalar_d = partial_sorting_scan_left_to_right_16u_omp(
13525            &text,
13526            &mut scalar_sa,
13527            text.len() as SaSint,
13528            k as SaSint,
13529            &mut scalar_buckets,
13530            block_size as SaSint,
13531            7,
13532            1,
13533        );
13534        let threaded_d = partial_sorting_scan_left_to_right_16u_omp(
13535            &text,
13536            &mut threaded_sa,
13537            text.len() as SaSint,
13538            k as SaSint,
13539            &mut threaded_buckets,
13540            block_size as SaSint,
13541            7,
13542            4,
13543        );
13544
13545        assert_eq!(threaded_d, scalar_d);
13546        assert_eq!(threaded_sa, scalar_sa);
13547        assert_eq!(threaded_buckets, scalar_buckets);
13548    }
13549
13550    #[test]
13551    fn libsais16x64_partial_right_to_left_16u_block_omp_uses_cache_pipeline() {
13552        let block_size = 65_536usize;
13553        let k = 512usize;
13554        let width = 2 * k;
13555        let block_start = width * 200 + 1024;
13556        let text: Vec<u16> = (0..block_size + 2)
13557            .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13558            .collect();
13559        let sa_len = block_start + block_size + 1;
13560        let mut base_sa = vec![0; sa_len];
13561        for i in 0..block_size {
13562            let value = (i + 2) as SaSint;
13563            base_sa[block_start + i] = if i % 17 == 0 {
13564                value | SAINT_MIN
13565            } else {
13566                value
13567            };
13568        }
13569        let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13570        for v in 0..width {
13571            base_buckets[v] = ((v + 1) * 200) as SaSint;
13572            base_buckets[2 * ALPHABET_SIZE + v] = if v % 5 == 0 { 3 } else { 0 };
13573        }
13574
13575        let mut scalar_sa = base_sa.clone();
13576        let mut threaded_sa = base_sa.clone();
13577        let mut scalar_buckets = base_buckets.clone();
13578        let mut threaded_buckets = base_buckets.clone();
13579        let mut thread_state = alloc_thread_state(4).unwrap();
13580        let scalar_d = partial_sorting_scan_right_to_left_16u(
13581            &text,
13582            &mut scalar_sa,
13583            &mut scalar_buckets,
13584            7,
13585            block_start as SaSint,
13586            block_size as SaSint,
13587        );
13588        let threaded_d = partial_sorting_scan_right_to_left_16u_block_omp(
13589            &text,
13590            &mut threaded_sa,
13591            k as SaSint,
13592            &mut threaded_buckets,
13593            7,
13594            block_start as SaSint,
13595            block_size as SaSint,
13596            4,
13597            &mut thread_state,
13598        );
13599        assert_eq!(threaded_d, scalar_d);
13600        assert_eq!(threaded_sa, scalar_sa);
13601        assert_eq!(threaded_buckets, scalar_buckets);
13602
13603        let mut scalar_sa = base_sa;
13604        let mut threaded_sa = scalar_sa.clone();
13605        let mut scalar_buckets = base_buckets.clone();
13606        let mut threaded_buckets = base_buckets;
13607        let scalar_d = partial_gsa_scan_right_to_left_16u(
13608            &text,
13609            &mut scalar_sa,
13610            &mut scalar_buckets,
13611            7,
13612            block_start as SaSint,
13613            block_size as SaSint,
13614        );
13615        let threaded_d = partial_gsa_scan_right_to_left_16u_block_omp(
13616            &text,
13617            &mut threaded_sa,
13618            k as SaSint,
13619            &mut threaded_buckets,
13620            7,
13621            block_start as SaSint,
13622            block_size as SaSint,
13623            4,
13624            &mut thread_state,
13625        );
13626        assert_eq!(threaded_d, scalar_d);
13627        assert_eq!(threaded_sa, scalar_sa);
13628        assert_eq!(threaded_buckets, scalar_buckets);
13629    }
13630
13631    #[test]
13632    fn libsais16x64_partial_right_to_left_16u_omp_uses_block_pipeline() {
13633        let block_size = 65_536usize;
13634        let k = 512usize;
13635        let width = 2 * k;
13636        let block_start = width * 200 + 1024;
13637        let text: Vec<u16> = (0..block_size + 2)
13638            .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13639            .collect();
13640        let sa_len = block_start + block_size + 1;
13641        let n = sa_len as SaSint;
13642        let first_lms_suffix = n - (block_start + block_size) as SaSint;
13643        let left_suffixes_count = block_start as SaSint - 1;
13644        let mut base_sa = vec![0; sa_len];
13645        for i in 0..block_size {
13646            let value = (i + 2) as SaSint;
13647            base_sa[block_start + i] = if i % 17 == 0 {
13648                value | SAINT_MIN
13649            } else {
13650                value
13651            };
13652        }
13653        let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13654        for v in 0..width {
13655            base_buckets[v] = ((v + 1) * 200) as SaSint;
13656            base_buckets[2 * ALPHABET_SIZE + v] = if v % 5 == 0 { 3 } else { 0 };
13657        }
13658
13659        let mut scalar_sa = base_sa.clone();
13660        let mut threaded_sa = base_sa.clone();
13661        let mut scalar_buckets = base_buckets.clone();
13662        let mut threaded_buckets = base_buckets.clone();
13663        partial_sorting_scan_right_to_left_16u_omp(
13664            &text,
13665            &mut scalar_sa,
13666            n,
13667            k as SaSint,
13668            &mut scalar_buckets,
13669            first_lms_suffix,
13670            left_suffixes_count,
13671            7,
13672            1,
13673        );
13674        partial_sorting_scan_right_to_left_16u_omp(
13675            &text,
13676            &mut threaded_sa,
13677            n,
13678            k as SaSint,
13679            &mut threaded_buckets,
13680            first_lms_suffix,
13681            left_suffixes_count,
13682            7,
13683            4,
13684        );
13685        assert_eq!(threaded_sa, scalar_sa);
13686        assert_eq!(threaded_buckets, scalar_buckets);
13687
13688        let mut scalar_sa = base_sa;
13689        let mut threaded_sa = scalar_sa.clone();
13690        let mut scalar_buckets = base_buckets.clone();
13691        let mut threaded_buckets = base_buckets;
13692        partial_gsa_scan_right_to_left_16u_omp(
13693            &text,
13694            &mut scalar_sa,
13695            n,
13696            k as SaSint,
13697            &mut scalar_buckets,
13698            first_lms_suffix,
13699            left_suffixes_count,
13700            7,
13701            1,
13702        );
13703        partial_gsa_scan_right_to_left_16u_omp(
13704            &text,
13705            &mut threaded_sa,
13706            n,
13707            k as SaSint,
13708            &mut threaded_buckets,
13709            first_lms_suffix,
13710            left_suffixes_count,
13711            7,
13712            4,
13713        );
13714        assert_eq!(threaded_sa, scalar_sa);
13715        assert_eq!(threaded_buckets, scalar_buckets);
13716    }
13717
13718    fn final_scan_fixture() -> ([u16; 10], Vec<SaSint>, Vec<SaSint>) {
13719        let text = [1, 0, 2, 1, 3, 0, 2, 4, 1, 0];
13720        let mut sa = vec![0; 96];
13721        sa[..6].copy_from_slice(&[3, 0, 5 | SAINT_MIN, 7, 2, 9 | SAINT_MIN]);
13722
13723        let mut induction_bucket = vec![0; ALPHABET_SIZE];
13724        for c in 0..8 {
13725            induction_bucket[c] = 24 + (c as SaSint) * 6;
13726        }
13727
13728        (text, sa, induction_bucket)
13729    }
13730
13731    fn final_order_buckets(induction_bucket: &[SaSint]) -> Vec<SaSint> {
13732        let mut buckets = vec![0; 8 * ALPHABET_SIZE];
13733        buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE].copy_from_slice(induction_bucket);
13734        buckets[7 * ALPHABET_SIZE..8 * ALPHABET_SIZE].copy_from_slice(induction_bucket);
13735        buckets
13736    }
13737
13738    #[test]
13739    fn libsais16x64_final_sorting_scan_left_to_right_16u_matches_c() {
13740        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
13741        let mut c_sa = rust_sa.clone();
13742        let mut c_bucket = rust_bucket.clone();
13743
13744        final_sorting_scan_left_to_right_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
13745        unsafe {
13746            probe_libsais16x64_final_sorting_scan_left_to_right_16u(
13747                text.as_ptr(),
13748                c_sa.as_mut_ptr(),
13749                c_bucket.as_mut_ptr(),
13750                0,
13751                6,
13752            );
13753        }
13754
13755        assert_eq!(rust_sa, c_sa);
13756        assert_eq!(rust_bucket, c_bucket);
13757    }
13758
13759    #[test]
13760    fn libsais16x64_final_sorting_scan_right_to_left_16u_matches_c() {
13761        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
13762        let mut c_sa = rust_sa.clone();
13763        let mut c_bucket = rust_bucket.clone();
13764
13765        final_sorting_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
13766        unsafe {
13767            probe_libsais16x64_final_sorting_scan_right_to_left_16u(
13768                text.as_ptr(),
13769                c_sa.as_mut_ptr(),
13770                c_bucket.as_mut_ptr(),
13771                0,
13772                6,
13773            );
13774        }
13775
13776        assert_eq!(rust_sa, c_sa);
13777        assert_eq!(rust_bucket, c_bucket);
13778    }
13779
13780    #[test]
13781    fn libsais16x64_final_gsa_scan_right_to_left_16u_matches_c() {
13782        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
13783        let mut c_sa = rust_sa.clone();
13784        let mut c_bucket = rust_bucket.clone();
13785
13786        final_gsa_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
13787        unsafe {
13788            probe_libsais16x64_final_gsa_scan_right_to_left_16u(
13789                text.as_ptr(),
13790                c_sa.as_mut_ptr(),
13791                c_bucket.as_mut_ptr(),
13792                0,
13793                6,
13794            );
13795        }
13796
13797        assert_eq!(rust_sa, c_sa);
13798        assert_eq!(rust_bucket, c_bucket);
13799    }
13800
13801    #[test]
13802    fn libsais16x64_final_sorting_32s_helpers_behave_like_upstream_shapes() {
13803        let t = vec![0, 1, 2, 1, 0, 1, 2, 1, 0];
13804
13805        let mut rust_sa = vec![1, 0, 0];
13806        let mut rust_bucket = vec![0, 1, 3];
13807        let mut c_sa = rust_sa.clone();
13808        let mut c_bucket = rust_bucket.clone();
13809        final_sorting_scan_left_to_right_32s(&t, &mut rust_sa, &mut rust_bucket, 0, 1);
13810        unsafe {
13811            probe_libsais16x64_final_sorting_scan_left_to_right_32s(
13812                t.as_ptr(),
13813                c_sa.as_mut_ptr(),
13814                c_bucket.as_mut_ptr(),
13815                0,
13816                1,
13817            );
13818        }
13819        assert_eq!(rust_sa, c_sa);
13820        assert_eq!(rust_bucket, c_bucket);
13821
13822        let mut rust_sa = vec![0, 2, 0];
13823        let mut rust_bucket = vec![1, 2, 3];
13824        let mut c_sa = rust_sa.clone();
13825        let mut c_bucket = rust_bucket.clone();
13826        final_sorting_scan_right_to_left_32s(&t, &mut rust_sa, &mut rust_bucket, 0, 2);
13827        unsafe {
13828            probe_libsais16x64_final_sorting_scan_right_to_left_32s(
13829                t.as_ptr(),
13830                c_sa.as_mut_ptr(),
13831                c_bucket.as_mut_ptr(),
13832                0,
13833                2,
13834            );
13835        }
13836        assert_eq!(rust_sa, c_sa);
13837        assert_eq!(rust_bucket, c_bucket);
13838
13839        let mut sa = vec![1, 2, 0, 0];
13840        let mut induction_bucket = vec![0, 1, 3];
13841        let mut cache = vec![ThreadCache::default(); PER_THREAD_CACHE_SIZE];
13842        final_sorting_scan_left_to_right_32s_block_omp(
13843            &t,
13844            &mut sa,
13845            &mut induction_bucket,
13846            &mut cache,
13847            0,
13848            2,
13849            2,
13850        );
13851        assert_eq!(sa[0] & SAINT_MAX, 0);
13852        assert_eq!(sa[1] & SAINT_MAX, 1);
13853        assert_eq!(induction_bucket[0], 1);
13854        assert_eq!(induction_bucket[1], 2);
13855
13856        let mut sa = vec![0, 2, 0, 0];
13857        let mut induction_bucket = vec![1, 2, 3];
13858        let mut cache = vec![ThreadCache::default(); PER_THREAD_CACHE_SIZE];
13859        final_sorting_scan_right_to_left_32s_block_omp(
13860            &t,
13861            &mut sa,
13862            &mut induction_bucket,
13863            &mut cache,
13864            0,
13865            2,
13866            2,
13867        );
13868        assert_eq!(sa[1] & SAINT_MAX, 1);
13869        assert_eq!(induction_bucket[1], 1);
13870    }
13871
13872    #[test]
13873    fn libsais16x64_final_left_to_right_16u_block_omp_uses_cache_pipeline() {
13874        let block_size = 65_536usize;
13875        let k = 512usize;
13876        let text: Vec<u16> = (0..=block_size).map(|i| 1 + (i % (k - 1)) as u16).collect();
13877        let sa_len = block_size + k * 200;
13878        let mut base_sa = vec![0; sa_len];
13879        for (i, slot) in base_sa.iter_mut().take(block_size).enumerate() {
13880            *slot = (i + 1) as SaSint;
13881        }
13882        let mut base_bucket = vec![0; k];
13883        for c in 0..k {
13884            base_bucket[c] = (block_size + c * 200) as SaSint;
13885        }
13886
13887        let mut scalar_sa = base_sa.clone();
13888        let mut threaded_sa = base_sa.clone();
13889        let mut scalar_bucket = base_bucket.clone();
13890        let mut threaded_bucket = base_bucket.clone();
13891        let mut thread_state = alloc_thread_state(4).unwrap();
13892        final_bwt_scan_left_to_right_16u(
13893            &text,
13894            &mut scalar_sa,
13895            &mut scalar_bucket,
13896            0,
13897            block_size as SaSint,
13898        );
13899        final_bwt_scan_left_to_right_16u_block_omp(
13900            &text,
13901            &mut threaded_sa,
13902            k as SaSint,
13903            &mut threaded_bucket,
13904            0,
13905            block_size as SaSint,
13906            4,
13907            &mut thread_state,
13908        );
13909        assert_eq!(threaded_sa, scalar_sa);
13910        assert_eq!(threaded_bucket, scalar_bucket);
13911
13912        let rm = 3;
13913        let mut scalar_sa = base_sa.clone();
13914        let mut threaded_sa = base_sa.clone();
13915        let mut scalar_bucket = base_bucket.clone();
13916        let mut threaded_bucket = base_bucket.clone();
13917        let mut scalar_i = vec![-1; (block_size / (rm as usize + 1)) + 2];
13918        let mut threaded_i = scalar_i.clone();
13919        final_bwt_aux_scan_left_to_right_16u(
13920            &text,
13921            &mut scalar_sa,
13922            rm,
13923            &mut scalar_i,
13924            &mut scalar_bucket,
13925            0,
13926            block_size as SaSint,
13927        );
13928        final_bwt_aux_scan_left_to_right_16u_block_omp(
13929            &text,
13930            &mut threaded_sa,
13931            k as SaSint,
13932            rm,
13933            &mut threaded_i,
13934            &mut threaded_bucket,
13935            0,
13936            block_size as SaSint,
13937            4,
13938            &mut thread_state,
13939        );
13940        assert_eq!(threaded_sa, scalar_sa);
13941        assert_eq!(threaded_i, scalar_i);
13942        assert_eq!(threaded_bucket, scalar_bucket);
13943
13944        let mut scalar_sa = base_sa;
13945        let mut threaded_sa = scalar_sa.clone();
13946        let mut scalar_bucket = base_bucket.clone();
13947        let mut threaded_bucket = base_bucket;
13948        final_sorting_scan_left_to_right_16u(
13949            &text,
13950            &mut scalar_sa,
13951            &mut scalar_bucket,
13952            0,
13953            block_size as SaSint,
13954        );
13955        final_sorting_scan_left_to_right_16u_block_omp(
13956            &text,
13957            &mut threaded_sa,
13958            k as SaSint,
13959            &mut threaded_bucket,
13960            0,
13961            block_size as SaSint,
13962            4,
13963            &mut thread_state,
13964        );
13965        assert_eq!(threaded_sa, scalar_sa);
13966        assert_eq!(threaded_bucket, scalar_bucket);
13967    }
13968
13969    #[test]
13970    fn libsais16x64_final_right_to_left_16u_block_omp_uses_cache_pipeline() {
13971        let block_size = 65_536usize;
13972        let k = 512usize;
13973        let block_start = k * 200 + 1024;
13974        let text: Vec<u16> = (0..=block_size + 1)
13975            .map(|i| 1 + (i % (k - 1)) as u16)
13976            .collect();
13977        let sa_len = block_start + block_size + 1;
13978        let mut base_sa = vec![0; sa_len];
13979        for i in 0..block_size {
13980            base_sa[block_start + i] = (i + 1) as SaSint;
13981        }
13982        let mut base_bucket = vec![0; k];
13983        for c in 0..k {
13984            base_bucket[c] = ((c + 1) * 200) as SaSint;
13985        }
13986
13987        let mut scalar_sa = base_sa.clone();
13988        let mut threaded_sa = base_sa.clone();
13989        let mut scalar_bucket = base_bucket.clone();
13990        let mut threaded_bucket = base_bucket.clone();
13991        let mut thread_state = alloc_thread_state(4).unwrap();
13992        final_bwt_scan_right_to_left_16u(
13993            &text,
13994            &mut scalar_sa,
13995            &mut scalar_bucket,
13996            block_start as SaSint,
13997            block_size as SaSint,
13998        );
13999        final_bwt_scan_right_to_left_16u_block_omp(
14000            &text,
14001            &mut threaded_sa,
14002            k as SaSint,
14003            &mut threaded_bucket,
14004            block_start as SaSint,
14005            block_size as SaSint,
14006            4,
14007            &mut thread_state,
14008        );
14009        assert_eq!(threaded_sa, scalar_sa);
14010        assert_eq!(threaded_bucket, scalar_bucket);
14011
14012        let rm = 3;
14013        let mut scalar_sa = base_sa.clone();
14014        let mut threaded_sa = base_sa.clone();
14015        let mut scalar_bucket = base_bucket.clone();
14016        let mut threaded_bucket = base_bucket.clone();
14017        let mut scalar_i = vec![-1; (block_size / (rm as usize + 1)) + 2];
14018        let mut threaded_i = scalar_i.clone();
14019        final_bwt_aux_scan_right_to_left_16u(
14020            &text,
14021            &mut scalar_sa,
14022            rm,
14023            &mut scalar_i,
14024            &mut scalar_bucket,
14025            block_start as SaSint,
14026            block_size as SaSint,
14027        );
14028        final_bwt_aux_scan_right_to_left_16u_block_omp(
14029            &text,
14030            &mut threaded_sa,
14031            k as SaSint,
14032            rm,
14033            &mut threaded_i,
14034            &mut threaded_bucket,
14035            block_start as SaSint,
14036            block_size as SaSint,
14037            4,
14038            &mut thread_state,
14039        );
14040        assert_eq!(threaded_sa, scalar_sa);
14041        assert_eq!(threaded_i, scalar_i);
14042        assert_eq!(threaded_bucket, scalar_bucket);
14043
14044        let mut scalar_sa = base_sa.clone();
14045        let mut threaded_sa = base_sa.clone();
14046        let mut scalar_bucket = base_bucket.clone();
14047        let mut threaded_bucket = base_bucket.clone();
14048        final_sorting_scan_right_to_left_16u(
14049            &text,
14050            &mut scalar_sa,
14051            &mut scalar_bucket,
14052            block_start as SaSint,
14053            block_size as SaSint,
14054        );
14055        final_sorting_scan_right_to_left_16u_block_omp(
14056            &text,
14057            &mut threaded_sa,
14058            k as SaSint,
14059            &mut threaded_bucket,
14060            block_start as SaSint,
14061            block_size as SaSint,
14062            4,
14063            &mut thread_state,
14064        );
14065        assert_eq!(threaded_sa, scalar_sa);
14066        assert_eq!(threaded_bucket, scalar_bucket);
14067
14068        let mut scalar_sa = base_sa;
14069        let mut threaded_sa = scalar_sa.clone();
14070        let mut scalar_bucket = base_bucket.clone();
14071        let mut threaded_bucket = base_bucket;
14072        final_gsa_scan_right_to_left_16u(
14073            &text,
14074            &mut scalar_sa,
14075            &mut scalar_bucket,
14076            block_start as SaSint,
14077            block_size as SaSint,
14078        );
14079        final_gsa_scan_right_to_left_16u_block_omp(
14080            &text,
14081            &mut threaded_sa,
14082            k as SaSint,
14083            &mut threaded_bucket,
14084            block_start as SaSint,
14085            block_size as SaSint,
14086            4,
14087            &mut thread_state,
14088        );
14089        assert_eq!(threaded_sa, scalar_sa);
14090        assert_eq!(threaded_bucket, scalar_bucket);
14091    }
14092
14093    #[test]
14094    fn libsais16x64_clear_lms_suffixes_omp_zeroes_requested_bucket_ranges() {
14095        let mut rust_sa = vec![5, 4, 3, 2, 1, 9];
14096        let mut c_sa = rust_sa.clone();
14097        let n = rust_sa.len() as SaSint;
14098        let mut bucket_start = vec![1, 4, 5];
14099        let mut bucket_end = vec![3, 5, 5];
14100
14101        clear_lms_suffixes_omp(&mut rust_sa, n, 3, &bucket_start, &bucket_end, 2);
14102        unsafe {
14103            probe_libsais16x64_clear_lms_suffixes_omp(
14104                c_sa.as_mut_ptr(),
14105                n,
14106                3,
14107                bucket_start.as_mut_ptr(),
14108                bucket_end.as_mut_ptr(),
14109                2,
14110            );
14111        }
14112
14113        assert_eq!(rust_sa, c_sa);
14114    }
14115
14116    #[test]
14117    fn libsais16x64_partial_order_wrapper_helpers_match_manual_sequence() {
14118        let mut rust_sa = vec![1, 2, 3, 4];
14119        let mut c_sa = rust_sa.clone();
14120        flip_suffix_markers_omp(&mut rust_sa, 3, 2);
14121        unsafe {
14122            probe_libsais16x64_flip_suffix_markers_omp(c_sa.as_mut_ptr(), 3, 2);
14123        }
14124        assert_eq!(rust_sa, c_sa);
14125
14126        let t = vec![0, 1, 2, 1, 0, 1, 2, 1, 0];
14127        let n = t.len() as SaSint;
14128        let k = 3;
14129        let mut wrapped_sa = vec![0; t.len()];
14130        let mut wrapped_buckets = vec![0; k as usize];
14131        let mut wrapped_state = alloc_thread_state(1).unwrap();
14132        induce_partial_order_32s_1k_omp(
14133            &t,
14134            &mut wrapped_sa,
14135            n,
14136            k,
14137            &mut wrapped_buckets,
14138            1,
14139            &mut wrapped_state,
14140        );
14141
14142        let mut manual_sa = vec![0; t.len()];
14143        let mut manual_buckets = vec![0; k as usize];
14144        let mut manual_state = alloc_thread_state(1).unwrap();
14145        count_suffixes_32s(&t, n, k, &mut manual_buckets);
14146        initialize_buckets_start_32s_1k(k, &mut manual_buckets);
14147        partial_sorting_scan_left_to_right_32s_1k_omp(
14148            &t,
14149            &mut manual_sa,
14150            n,
14151            &mut manual_buckets,
14152            1,
14153            &mut manual_state,
14154        );
14155        count_suffixes_32s(&t, n, k, &mut manual_buckets);
14156        initialize_buckets_end_32s_1k(k, &mut manual_buckets);
14157        partial_sorting_scan_right_to_left_32s_1k_omp(
14158            &t,
14159            &mut manual_sa,
14160            n,
14161            &mut manual_buckets,
14162            1,
14163            &mut manual_state,
14164        );
14165        partial_sorting_gather_lms_suffixes_32s_1k_omp(&mut manual_sa, n, 1, &mut manual_state);
14166
14167        assert_eq!(wrapped_sa, manual_sa);
14168        assert_eq!(wrapped_buckets, manual_buckets);
14169    }
14170
14171    #[test]
14172    fn libsais16x64_induce_partial_order_32s_wrappers_match_c() {
14173        let t = make_main_32s_stress_text(128, 24);
14174        let n = t.len() as SaSint;
14175        let k = 24;
14176        let threads = 1;
14177
14178        let mut rust_sa = vec![0; t.len()];
14179        let mut rust_buckets = vec![0; 6 * k as usize];
14180        let mut rust_state = alloc_thread_state(threads).unwrap();
14181        let m = count_and_gather_lms_suffixes_32s_4k_omp(
14182            &t,
14183            &mut rust_sa,
14184            n,
14185            k,
14186            &mut rust_buckets,
14187            1,
14188            threads,
14189            &mut rust_state,
14190        );
14191        assert!(m > 1);
14192        rust_sa[..(n - m) as usize].fill(0);
14193        let first_lms_suffix = rust_sa[(n - m) as usize];
14194        let left_suffixes_count = initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
14195            &t,
14196            k,
14197            &mut rust_buckets,
14198            first_lms_suffix,
14199        );
14200        let (_, induction_bucket) = rust_buckets.split_at_mut(4 * k as usize);
14201        radix_sort_lms_suffixes_32s_6k_omp(&t, &mut rust_sa, n, m, induction_bucket, threads);
14202        radix_sort_set_markers_32s_6k_omp(&mut rust_sa, k, induction_bucket, threads);
14203        initialize_buckets_for_partial_sorting_32s_6k(
14204            &t,
14205            k,
14206            &mut rust_buckets,
14207            first_lms_suffix,
14208            left_suffixes_count,
14209        );
14210        let mut c_sa = rust_sa.clone();
14211        let mut c_buckets = rust_buckets.clone();
14212        induce_partial_order_32s_6k_omp(
14213            &t,
14214            &mut rust_sa,
14215            n,
14216            k,
14217            &mut rust_buckets,
14218            first_lms_suffix,
14219            left_suffixes_count,
14220            threads,
14221            &mut rust_state,
14222        );
14223        unsafe {
14224            probe_libsais16x64_induce_partial_order_32s_6k_omp(
14225                t.as_ptr(),
14226                c_sa.as_mut_ptr(),
14227                n,
14228                k,
14229                c_buckets.as_mut_ptr(),
14230                first_lms_suffix,
14231                left_suffixes_count,
14232                threads,
14233            );
14234        }
14235        assert_eq!(rust_sa, c_sa);
14236        assert_eq!(rust_buckets, c_buckets);
14237
14238        let mut rust_sa = vec![0; t.len()];
14239        let mut rust_buckets = vec![0; 4 * k as usize];
14240        let mut rust_state = alloc_thread_state(threads).unwrap();
14241        let m = count_and_gather_lms_suffixes_32s_2k_omp(
14242            &t,
14243            &mut rust_sa,
14244            n,
14245            k,
14246            &mut rust_buckets,
14247            1,
14248            threads,
14249            &mut rust_state,
14250        );
14251        assert!(m > 1);
14252        let first_lms_suffix = rust_sa[(n - m) as usize];
14253        initialize_buckets_for_radix_and_partial_sorting_32s_4k(
14254            &t,
14255            k,
14256            &mut rust_buckets,
14257            first_lms_suffix,
14258        );
14259        let (_, induction_bucket) = rust_buckets.split_at_mut(1);
14260        radix_sort_lms_suffixes_32s_2k_omp(&t, &mut rust_sa, n, m, induction_bucket, threads);
14261        radix_sort_set_markers_32s_4k_omp(&mut rust_sa, k, induction_bucket, threads);
14262        place_lms_suffixes_interval_32s_4k(&mut rust_sa, n, k, m - 1, &rust_buckets);
14263        let mut c_sa = rust_sa.clone();
14264        let mut c_buckets = rust_buckets.clone();
14265        induce_partial_order_32s_4k_omp(
14266            &t,
14267            &mut rust_sa,
14268            n,
14269            k,
14270            &mut rust_buckets,
14271            threads,
14272            &mut rust_state,
14273        );
14274        unsafe {
14275            probe_libsais16x64_induce_partial_order_32s_4k_omp(
14276                t.as_ptr(),
14277                c_sa.as_mut_ptr(),
14278                n,
14279                k,
14280                c_buckets.as_mut_ptr(),
14281                threads,
14282            );
14283        }
14284        assert_eq!(rust_sa, c_sa);
14285        assert_eq!(rust_buckets, c_buckets);
14286
14287        let mut rust_sa = vec![0; t.len()];
14288        let mut rust_buckets = vec![0; 2 * k as usize];
14289        let mut rust_state = alloc_thread_state(threads).unwrap();
14290        let m = count_and_gather_lms_suffixes_32s_2k_omp(
14291            &t,
14292            &mut rust_sa,
14293            n,
14294            k,
14295            &mut rust_buckets,
14296            1,
14297            threads,
14298            &mut rust_state,
14299        );
14300        assert!(m > 1);
14301        let first_lms_suffix = rust_sa[(n - m) as usize];
14302        initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
14303            &t,
14304            k,
14305            &mut rust_buckets,
14306            first_lms_suffix,
14307        );
14308        let (_, induction_bucket) = rust_buckets.split_at_mut(1);
14309        radix_sort_lms_suffixes_32s_2k_omp(&t, &mut rust_sa, n, m, induction_bucket, threads);
14310        place_lms_suffixes_interval_32s_2k(&mut rust_sa, n, k, m - 1, &rust_buckets);
14311        initialize_buckets_start_and_end_32s_2k(k, &mut rust_buckets);
14312        let mut c_sa = rust_sa.clone();
14313        let mut c_buckets = rust_buckets.clone();
14314        induce_partial_order_32s_2k_omp(
14315            &t,
14316            &mut rust_sa,
14317            n,
14318            k,
14319            &mut rust_buckets,
14320            threads,
14321            &mut rust_state,
14322        );
14323        unsafe {
14324            probe_libsais16x64_induce_partial_order_32s_2k_omp(
14325                t.as_ptr(),
14326                c_sa.as_mut_ptr(),
14327                n,
14328                k,
14329                c_buckets.as_mut_ptr(),
14330                threads,
14331            );
14332        }
14333        assert_eq!(rust_sa, c_sa);
14334        assert_eq!(rust_buckets, c_buckets);
14335
14336        let mut rust_sa = vec![0; t.len()];
14337        let mut rust_buckets = vec![0; k as usize];
14338        let mut rust_state = alloc_thread_state(threads).unwrap();
14339        count_suffixes_32s(&t, n, k, &mut rust_buckets);
14340        initialize_buckets_end_32s_1k(k, &mut rust_buckets);
14341        let m = radix_sort_lms_suffixes_32s_1k(&t, &mut rust_sa, n, &mut rust_buckets);
14342        assert!(m > 1);
14343        let mut c_sa = rust_sa.clone();
14344        let mut c_buckets = rust_buckets.clone();
14345        induce_partial_order_32s_1k_omp(
14346            &t,
14347            &mut rust_sa,
14348            n,
14349            k,
14350            &mut rust_buckets,
14351            threads,
14352            &mut rust_state,
14353        );
14354        unsafe {
14355            probe_libsais16x64_induce_partial_order_32s_1k_omp(
14356                t.as_ptr(),
14357                c_sa.as_mut_ptr(),
14358                n,
14359                k,
14360                c_buckets.as_mut_ptr(),
14361                threads,
14362            );
14363        }
14364        assert_eq!(rust_sa, c_sa);
14365        assert_eq!(rust_buckets, c_buckets);
14366    }
14367
14368    #[test]
14369    fn libsais16x64_induce_partial_order_16u_omp_matches_c() {
14370        let text = [3, 1, 2, 1, 0, 4, 1, 0];
14371        let n = text.len() as SaSint;
14372        let flags = 0;
14373        let threads = 1;
14374        let mut rust_sa = vec![0; text.len()];
14375        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
14376
14377        let m = count_and_gather_lms_suffixes_16u_omp(
14378            &text,
14379            &mut rust_sa,
14380            n,
14381            &mut rust_buckets[..4 * ALPHABET_SIZE],
14382            threads,
14383            &mut [],
14384        );
14385        let k = initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
14386        assert!(m > 0);
14387        let first_lms_suffix = rust_sa[(n - m) as usize];
14388        let left_suffixes_count = initialize_buckets_for_lms_suffixes_radix_sort_16u(
14389            &text,
14390            &mut rust_buckets,
14391            first_lms_suffix,
14392        );
14393        radix_sort_lms_suffixes_16u_omp(
14394            &text,
14395            &mut rust_sa,
14396            n,
14397            m,
14398            flags,
14399            &mut rust_buckets,
14400            threads,
14401            &mut [],
14402        );
14403        initialize_buckets_for_partial_sorting_16u(
14404            &text,
14405            &mut rust_buckets,
14406            first_lms_suffix,
14407            left_suffixes_count,
14408        );
14409
14410        let mut c_sa = rust_sa.clone();
14411        let mut c_buckets = rust_buckets.clone();
14412        induce_partial_order_16u_omp(
14413            &text,
14414            &mut rust_sa,
14415            n,
14416            k,
14417            flags,
14418            &mut rust_buckets,
14419            first_lms_suffix,
14420            left_suffixes_count,
14421            threads,
14422        );
14423        unsafe {
14424            probe_libsais16x64_induce_partial_order_16u_omp(
14425                text.as_ptr(),
14426                c_sa.as_mut_ptr(),
14427                n,
14428                k,
14429                flags,
14430                c_buckets.as_mut_ptr(),
14431                first_lms_suffix,
14432                left_suffixes_count,
14433                threads,
14434            );
14435        }
14436
14437        assert_eq!(rust_sa, c_sa);
14438        assert_eq!(rust_buckets, c_buckets);
14439    }
14440
14441    fn final_order_32s_fixture() -> (Vec<SaSint>, Vec<SaSint>) {
14442        (
14443            vec![0, 1, 2, 1, 0, 1, 2, 1, 0],
14444            vec![1, 0, 2, 0, 0, 0, 0, 0, 0],
14445        )
14446    }
14447
14448    fn seed_final_order_bucket_sections(buckets: &mut [SaSint], k: usize, branch_k: usize) {
14449        let left = [0, 1, 3];
14450        let right = [1, 2, 3];
14451        let left_section = match branch_k {
14452            6 => 4 * k,
14453            4 => 2 * k,
14454            2 => k,
14455            _ => 0,
14456        };
14457        let right_section = match branch_k {
14458            6 => 5 * k,
14459            4 => 3 * k,
14460            2 => 0,
14461            _ => 0,
14462        };
14463        buckets[left_section..left_section + k].copy_from_slice(&left);
14464        buckets[right_section..right_section + k].copy_from_slice(&right);
14465    }
14466
14467    #[test]
14468    fn libsais16x64_induce_final_order_32s_wrappers_match_c() {
14469        let (t, sa) = final_order_32s_fixture();
14470        let n = t.len() as SaSint;
14471        let k = 3;
14472        let threads = 1;
14473
14474        let mut rust_sa = sa.clone();
14475        let mut rust_buckets = vec![0; 6 * k as usize];
14476        seed_final_order_bucket_sections(&mut rust_buckets, k as usize, 6);
14477        let mut c_sa = rust_sa.clone();
14478        let mut c_buckets = rust_buckets.clone();
14479        let mut rust_state = alloc_thread_state(threads).unwrap();
14480        induce_final_order_32s_6k(
14481            &t,
14482            &mut rust_sa,
14483            n,
14484            k,
14485            &mut rust_buckets,
14486            threads,
14487            &mut rust_state,
14488        );
14489        unsafe {
14490            probe_libsais16x64_induce_final_order_32s_6k(
14491                t.as_ptr(),
14492                c_sa.as_mut_ptr(),
14493                n,
14494                k,
14495                c_buckets.as_mut_ptr(),
14496                threads,
14497            );
14498        }
14499        assert_eq!(rust_sa, c_sa);
14500        assert_eq!(rust_buckets, c_buckets);
14501
14502        let mut rust_sa = sa.clone();
14503        let mut rust_buckets = vec![0; 4 * k as usize];
14504        seed_final_order_bucket_sections(&mut rust_buckets, k as usize, 4);
14505        let mut c_sa = rust_sa.clone();
14506        let mut c_buckets = rust_buckets.clone();
14507        let mut rust_state = alloc_thread_state(threads).unwrap();
14508        induce_final_order_32s_4k(
14509            &t,
14510            &mut rust_sa,
14511            n,
14512            k,
14513            &mut rust_buckets,
14514            threads,
14515            &mut rust_state,
14516        );
14517        unsafe {
14518            probe_libsais16x64_induce_final_order_32s_4k(
14519                t.as_ptr(),
14520                c_sa.as_mut_ptr(),
14521                n,
14522                k,
14523                c_buckets.as_mut_ptr(),
14524                threads,
14525            );
14526        }
14527        assert_eq!(rust_sa, c_sa);
14528        assert_eq!(rust_buckets, c_buckets);
14529
14530        let mut rust_sa = sa.clone();
14531        let mut rust_buckets = vec![0; 2 * k as usize];
14532        seed_final_order_bucket_sections(&mut rust_buckets, k as usize, 2);
14533        let mut c_sa = rust_sa.clone();
14534        let mut c_buckets = rust_buckets.clone();
14535        let mut rust_state = alloc_thread_state(threads).unwrap();
14536        induce_final_order_32s_2k(
14537            &t,
14538            &mut rust_sa,
14539            n,
14540            k,
14541            &mut rust_buckets,
14542            threads,
14543            &mut rust_state,
14544        );
14545        unsafe {
14546            probe_libsais16x64_induce_final_order_32s_2k(
14547                t.as_ptr(),
14548                c_sa.as_mut_ptr(),
14549                n,
14550                k,
14551                c_buckets.as_mut_ptr(),
14552                threads,
14553            );
14554        }
14555        assert_eq!(rust_sa, c_sa);
14556        assert_eq!(rust_buckets, c_buckets);
14557
14558        let mut rust_sa = sa;
14559        let mut rust_buckets = vec![0; k as usize];
14560        let mut c_sa = rust_sa.clone();
14561        let mut c_buckets = rust_buckets.clone();
14562        let mut rust_state = alloc_thread_state(threads).unwrap();
14563        induce_final_order_32s_1k(
14564            &t,
14565            &mut rust_sa,
14566            n,
14567            k,
14568            &mut rust_buckets,
14569            threads,
14570            &mut rust_state,
14571        );
14572        unsafe {
14573            probe_libsais16x64_induce_final_order_32s_1k(
14574                t.as_ptr(),
14575                c_sa.as_mut_ptr(),
14576                n,
14577                k,
14578                c_buckets.as_mut_ptr(),
14579                threads,
14580            );
14581        }
14582        assert_eq!(rust_sa, c_sa);
14583        assert_eq!(rust_buckets, c_buckets);
14584    }
14585
14586    #[test]
14587    fn libsais16x64_induce_final_order_16u_omp_matches_manual_sequence() {
14588        let (text, mut wrapped_sa, induction_bucket) = final_scan_fixture();
14589        let mut wrapped_buckets = final_order_buckets(&induction_bucket);
14590        let mut c_sa = wrapped_sa.clone();
14591        let mut c_buckets = wrapped_buckets.clone();
14592        let mut wrapped_state = alloc_thread_state(1).unwrap();
14593        let wrapped_index = induce_final_order_16u_omp(
14594            &text,
14595            &mut wrapped_sa,
14596            text.len() as SaSint,
14597            8,
14598            0,
14599            0,
14600            None,
14601            &mut wrapped_buckets,
14602            1,
14603            &mut wrapped_state,
14604        );
14605        let c_index = unsafe {
14606            probe_libsais16x64_induce_final_order_16u_omp(
14607                text.as_ptr(),
14608                c_sa.as_mut_ptr(),
14609                text.len() as SaSint,
14610                8,
14611                0,
14612                0,
14613                std::ptr::null_mut(),
14614                c_buckets.as_mut_ptr(),
14615                1,
14616            )
14617        };
14618
14619        let (text, mut manual_sa, induction_bucket) = final_scan_fixture();
14620        let mut manual_buckets = final_order_buckets(&induction_bucket);
14621        {
14622            let (left_buckets, right_tail) = manual_buckets.split_at_mut(7 * ALPHABET_SIZE);
14623            final_sorting_scan_left_to_right_16u_omp(
14624                &text,
14625                &mut manual_sa,
14626                text.len() as SaSint,
14627                8,
14628                &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE],
14629                1,
14630            );
14631            final_sorting_scan_right_to_left_16u_omp(
14632                &text,
14633                &mut manual_sa,
14634                0,
14635                text.len() as SaSint,
14636                8,
14637                &mut right_tail[..ALPHABET_SIZE],
14638                1,
14639            );
14640        }
14641
14642        assert_eq!(wrapped_index, 0);
14643        assert_eq!(wrapped_index, c_index);
14644        assert_eq!(wrapped_sa, manual_sa);
14645        assert_eq!(wrapped_sa, c_sa);
14646        assert_eq!(wrapped_buckets, manual_buckets);
14647        assert_eq!(wrapped_buckets, c_buckets);
14648
14649        let (text, mut wrapped_sa, induction_bucket) = final_scan_fixture();
14650        let mut wrapped_buckets = final_order_buckets(&induction_bucket);
14651        let mut c_sa = wrapped_sa.clone();
14652        let mut c_buckets = wrapped_buckets.clone();
14653        let mut wrapped_state = alloc_thread_state(1).unwrap();
14654        let wrapped_index = induce_final_order_16u_omp(
14655            &text,
14656            &mut wrapped_sa,
14657            text.len() as SaSint,
14658            8,
14659            LIBSAIS_FLAGS_BWT,
14660            0,
14661            None,
14662            &mut wrapped_buckets,
14663            1,
14664            &mut wrapped_state,
14665        );
14666        let c_index = unsafe {
14667            probe_libsais16x64_induce_final_order_16u_omp(
14668                text.as_ptr(),
14669                c_sa.as_mut_ptr(),
14670                text.len() as SaSint,
14671                8,
14672                LIBSAIS_FLAGS_BWT,
14673                0,
14674                std::ptr::null_mut(),
14675                c_buckets.as_mut_ptr(),
14676                1,
14677            )
14678        };
14679
14680        let (text, mut manual_sa, induction_bucket) = final_scan_fixture();
14681        let mut manual_buckets = final_order_buckets(&induction_bucket);
14682        let manual_index = {
14683            let (left_buckets, right_tail) = manual_buckets.split_at_mut(7 * ALPHABET_SIZE);
14684            final_bwt_scan_left_to_right_16u_omp(
14685                &text,
14686                &mut manual_sa,
14687                text.len() as SaSint,
14688                8,
14689                &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE],
14690                1,
14691            );
14692            final_bwt_scan_right_to_left_16u_omp(
14693                &text,
14694                &mut manual_sa,
14695                text.len() as SaSint,
14696                8,
14697                &mut right_tail[..ALPHABET_SIZE],
14698                1,
14699            )
14700        };
14701
14702        assert_eq!(wrapped_index, manual_index);
14703        assert_eq!(wrapped_index, c_index);
14704        assert_eq!(wrapped_sa, manual_sa);
14705        assert_eq!(wrapped_sa, c_sa);
14706        assert_eq!(wrapped_buckets, manual_buckets);
14707        assert_eq!(wrapped_buckets, c_buckets);
14708
14709        let (text, mut wrapped_sa, induction_bucket) = final_scan_fixture();
14710        let mut wrapped_buckets = final_order_buckets(&induction_bucket);
14711        let mut c_sa = wrapped_sa.clone();
14712        let mut c_buckets = wrapped_buckets.clone();
14713        let mut wrapped_state = alloc_thread_state(1).unwrap();
14714        let mut wrapped_i = vec![-1; 8];
14715        let mut c_i = wrapped_i.clone();
14716        let wrapped_index = induce_final_order_16u_omp(
14717            &text,
14718            &mut wrapped_sa,
14719            text.len() as SaSint,
14720            8,
14721            LIBSAIS_FLAGS_BWT,
14722            2,
14723            Some(&mut wrapped_i),
14724            &mut wrapped_buckets,
14725            1,
14726            &mut wrapped_state,
14727        );
14728        let c_index = unsafe {
14729            probe_libsais16x64_induce_final_order_16u_omp(
14730                text.as_ptr(),
14731                c_sa.as_mut_ptr(),
14732                text.len() as SaSint,
14733                8,
14734                LIBSAIS_FLAGS_BWT,
14735                2,
14736                c_i.as_mut_ptr(),
14737                c_buckets.as_mut_ptr(),
14738                1,
14739            )
14740        };
14741
14742        let (text, mut manual_sa, induction_bucket) = final_scan_fixture();
14743        let mut manual_buckets = final_order_buckets(&induction_bucket);
14744        let mut manual_i = vec![-1; 8];
14745        {
14746            let (left_buckets, right_tail) = manual_buckets.split_at_mut(7 * ALPHABET_SIZE);
14747            final_bwt_aux_scan_left_to_right_16u_omp(
14748                &text,
14749                &mut manual_sa,
14750                text.len() as SaSint,
14751                8,
14752                1,
14753                &mut manual_i,
14754                &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE],
14755                1,
14756            );
14757            final_bwt_aux_scan_right_to_left_16u_omp(
14758                &text,
14759                &mut manual_sa,
14760                text.len() as SaSint,
14761                8,
14762                1,
14763                &mut manual_i,
14764                &mut right_tail[..ALPHABET_SIZE],
14765                1,
14766            );
14767        }
14768
14769        assert_eq!(wrapped_index, 0);
14770        assert_eq!(wrapped_index, c_index);
14771        assert_eq!(wrapped_sa, manual_sa);
14772        assert_eq!(wrapped_sa, c_sa);
14773        assert_eq!(wrapped_buckets, manual_buckets);
14774        assert_eq!(wrapped_buckets, c_buckets);
14775        assert_eq!(wrapped_i, manual_i);
14776        assert_eq!(wrapped_i, c_i);
14777    }
14778
14779    #[test]
14780    fn libsais16x64_main_16u_matches_public_c_suffix_array_paths() {
14781        let text = [3, 1, 4, 1, 5, 9, 0, 2];
14782        let n = text.len() as SaSint;
14783        let fs = 32;
14784        let mut rust_sa = vec![0; text.len() + fs as usize];
14785        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
14786        let mut rust_freq = vec![0; ALPHABET_SIZE];
14787        let mut rust_state = alloc_thread_state(1).unwrap();
14788        let rust_index = main_16u(
14789            &text,
14790            &mut rust_sa,
14791            n,
14792            &mut rust_buckets,
14793            0,
14794            0,
14795            None,
14796            fs,
14797            Some(&mut rust_freq),
14798            1,
14799            &mut rust_state,
14800        );
14801
14802        let mut c_sa = vec![0; text.len() + fs as usize];
14803        let mut c_freq = vec![0; ALPHABET_SIZE];
14804        let c_index = unsafe {
14805            probe_public_libsais16x64_freq(
14806                text.as_ptr(),
14807                c_sa.as_mut_ptr(),
14808                n,
14809                fs,
14810                c_freq.as_mut_ptr(),
14811            )
14812        };
14813
14814        assert_eq!(rust_index, c_index);
14815        assert_eq!(&rust_sa[..text.len()], &c_sa[..text.len()]);
14816        assert_eq!(rust_freq, c_freq);
14817
14818        let text = [2, 1, 0, 2, 0];
14819        let n = text.len() as SaSint;
14820        let fs = 24;
14821        let mut rust_sa = vec![0; text.len() + fs as usize];
14822        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
14823        let mut rust_freq = vec![0; ALPHABET_SIZE];
14824        let mut rust_state = alloc_thread_state(1).unwrap();
14825        let rust_index = main_16u(
14826            &text,
14827            &mut rust_sa,
14828            n,
14829            &mut rust_buckets,
14830            LIBSAIS_FLAGS_GSA,
14831            0,
14832            None,
14833            fs,
14834            Some(&mut rust_freq),
14835            1,
14836            &mut rust_state,
14837        );
14838
14839        let mut c_sa = vec![0; text.len() + fs as usize];
14840        let mut c_freq = vec![0; ALPHABET_SIZE];
14841        let c_index = unsafe {
14842            probe_public_libsais16x64_gsa_freq(
14843                text.as_ptr(),
14844                c_sa.as_mut_ptr(),
14845                n,
14846                fs,
14847                c_freq.as_mut_ptr(),
14848            )
14849        };
14850
14851        assert_eq!(rust_index, c_index);
14852        assert_eq!(&rust_sa[..text.len()], &c_sa[..text.len()]);
14853        assert_eq!(rust_freq, c_freq);
14854    }
14855
14856    #[test]
14857    fn libsais16x64_final_bwt_scan_left_to_right_16u_matches_c() {
14858        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14859        let mut c_sa = rust_sa.clone();
14860        let mut c_bucket = rust_bucket.clone();
14861
14862        final_bwt_scan_left_to_right_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
14863        unsafe {
14864            probe_libsais16x64_final_bwt_scan_left_to_right_16u(
14865                text.as_ptr(),
14866                c_sa.as_mut_ptr(),
14867                c_bucket.as_mut_ptr(),
14868                0,
14869                6,
14870            );
14871        }
14872
14873        assert_eq!(rust_sa, c_sa);
14874        assert_eq!(rust_bucket, c_bucket);
14875    }
14876
14877    #[test]
14878    fn libsais16x64_final_bwt_scan_right_to_left_16u_matches_c() {
14879        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14880        let mut c_sa = rust_sa.clone();
14881        let mut c_bucket = rust_bucket.clone();
14882
14883        let rust_index =
14884            final_bwt_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
14885        let c_index = unsafe {
14886            probe_libsais16x64_final_bwt_scan_right_to_left_16u(
14887                text.as_ptr(),
14888                c_sa.as_mut_ptr(),
14889                c_bucket.as_mut_ptr(),
14890                0,
14891                6,
14892            )
14893        };
14894
14895        assert_eq!(rust_index, c_index);
14896        assert_eq!(rust_sa, c_sa);
14897        assert_eq!(rust_bucket, c_bucket);
14898    }
14899
14900    #[test]
14901    fn libsais16x64_final_bwt_aux_scan_left_to_right_16u_matches_c() {
14902        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14903        let mut c_sa = rust_sa.clone();
14904        let mut c_bucket = rust_bucket.clone();
14905        let mut rust_i = vec![-1; 8];
14906        let mut c_i = rust_i.clone();
14907
14908        final_bwt_aux_scan_left_to_right_16u(
14909            &text,
14910            &mut rust_sa,
14911            1,
14912            &mut rust_i,
14913            &mut rust_bucket,
14914            0,
14915            6,
14916        );
14917        unsafe {
14918            probe_libsais16x64_final_bwt_aux_scan_left_to_right_16u(
14919                text.as_ptr(),
14920                c_sa.as_mut_ptr(),
14921                1,
14922                c_i.as_mut_ptr(),
14923                c_bucket.as_mut_ptr(),
14924                0,
14925                6,
14926            );
14927        }
14928
14929        assert_eq!(rust_sa, c_sa);
14930        assert_eq!(rust_bucket, c_bucket);
14931        assert_eq!(rust_i, c_i);
14932    }
14933
14934    #[test]
14935    fn libsais16x64_final_bwt_aux_scan_right_to_left_16u_matches_c() {
14936        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14937        let mut c_sa = rust_sa.clone();
14938        let mut c_bucket = rust_bucket.clone();
14939        let mut rust_i = vec![-1; 8];
14940        let mut c_i = rust_i.clone();
14941
14942        final_bwt_aux_scan_right_to_left_16u(
14943            &text,
14944            &mut rust_sa,
14945            1,
14946            &mut rust_i,
14947            &mut rust_bucket,
14948            0,
14949            6,
14950        );
14951        unsafe {
14952            probe_libsais16x64_final_bwt_aux_scan_right_to_left_16u(
14953                text.as_ptr(),
14954                c_sa.as_mut_ptr(),
14955                1,
14956                c_i.as_mut_ptr(),
14957                c_bucket.as_mut_ptr(),
14958                0,
14959                6,
14960            );
14961        }
14962
14963        assert_eq!(rust_sa, c_sa);
14964        assert_eq!(rust_bucket, c_bucket);
14965        assert_eq!(rust_i, c_i);
14966    }
14967
14968    #[test]
14969    fn libsais16x64_renumber_lms_suffixes_16u_matches_c() {
14970        let m = 6;
14971        let mut rust_sa = vec![0; 20];
14972        rust_sa[..m].copy_from_slice(&[2, 4 | SAINT_MIN, 6, 8 | SAINT_MIN, 10, 12 | SAINT_MIN]);
14973        let mut c_sa = rust_sa.clone();
14974
14975        let rust_name = renumber_lms_suffixes_16u(&mut rust_sa, m as SaSint, 5, 0, m as SaSint);
14976        let c_name = unsafe {
14977            probe_libsais16x64_renumber_lms_suffixes_16u(
14978                c_sa.as_mut_ptr(),
14979                m as SaSint,
14980                5,
14981                0,
14982                m as SaSint,
14983            )
14984        };
14985
14986        assert_eq!(rust_name, c_name);
14987        assert_eq!(rust_sa, c_sa);
14988    }
14989
14990    fn lms_interval_fixture() -> (Vec<SaSint>, Vec<SaSint>) {
14991        let mut sa = vec![-7; 16];
14992        sa[4..8].copy_from_slice(&[41, 42, 61, 62]);
14993
14994        let mut buckets = vec![0; 8 * ALPHABET_SIZE];
14995        buckets[buckets_index2(2, 1)] = 0;
14996        buckets[buckets_index2(3, 1)] = 2;
14997        buckets[buckets_index2(4, 1)] = 2;
14998        buckets[buckets_index2(5, 1)] = 2;
14999        buckets[buckets_index2(6, 1)] = 4;
15000        buckets[buckets_index2(7, 1)] = 4;
15001        buckets[7 * ALPHABET_SIZE + 2] = 6;
15002        buckets[7 * ALPHABET_SIZE + 5] = 12;
15003
15004        (sa, buckets)
15005    }
15006
15007    #[test]
15008    fn libsais16x64_place_lms_suffixes_interval_16u_matches_c() {
15009        for flags in [0, LIBSAIS_FLAGS_GSA] {
15010            let (mut rust_sa, mut rust_buckets) = lms_interval_fixture();
15011            let mut c_sa = rust_sa.clone();
15012            let mut c_buckets = rust_buckets.clone();
15013
15014            place_lms_suffixes_interval_16u(&mut rust_sa, 16, 8, flags, &mut rust_buckets);
15015            unsafe {
15016                probe_libsais16x64_place_lms_suffixes_interval_16u(
15017                    c_sa.as_mut_ptr(),
15018                    16,
15019                    8,
15020                    flags,
15021                    c_buckets.as_mut_ptr(),
15022                );
15023            }
15024
15025            assert_eq!(rust_sa, c_sa);
15026            assert_eq!(rust_buckets, c_buckets);
15027        }
15028    }
15029
15030    #[test]
15031    fn libsais16x64_bwt_copy_16u_matches_c() {
15032        let mut a = vec![0, 1, 65535, 65536, -1, -2, 70000, 17, 131071, -65536];
15033        let mut rust_u = vec![999; a.len()];
15034        let mut c_u = rust_u.clone();
15035
15036        bwt_copy_16u(&mut rust_u, &a, a.len() as SaSint);
15037        unsafe {
15038            probe_libsais16x64_bwt_copy_16u(c_u.as_mut_ptr(), a.as_mut_ptr(), a.len() as SaSint);
15039        }
15040
15041        assert_eq!(rust_u, c_u);
15042    }
15043
15044    #[test]
15045    fn libsais16x64_early_omp_wrappers_match_c() {
15046        let text = [3, 1, 2, 1, 0, 4, 1, 0];
15047        let n = text.len() as SaSint;
15048
15049        let mut rust_sa = vec![-99; text.len()];
15050        let mut c_sa = rust_sa.clone();
15051        gather_lms_suffixes_16u_omp(&text, &mut rust_sa, n, 1, &mut []);
15052        unsafe {
15053            probe_libsais16x64_gather_lms_suffixes_16u_omp(text.as_ptr(), c_sa.as_mut_ptr(), n, 1);
15054        }
15055        assert_eq!(rust_sa, c_sa);
15056
15057        let mut rust_sa = vec![-99; text.len()];
15058        let mut c_sa = rust_sa.clone();
15059        let mut rust_buckets = vec![-1; 4 * ALPHABET_SIZE];
15060        let mut c_buckets = rust_buckets.clone();
15061        let rust_m = count_and_gather_lms_suffixes_16u_omp(
15062            &text,
15063            &mut rust_sa,
15064            n,
15065            &mut rust_buckets,
15066            1,
15067            &mut [],
15068        );
15069        let c_m = unsafe {
15070            probe_libsais16x64_count_and_gather_lms_suffixes_16u_omp(
15071                text.as_ptr(),
15072                c_sa.as_mut_ptr(),
15073                n,
15074                c_buckets.as_mut_ptr(),
15075                1,
15076            )
15077        };
15078        assert_eq!(rust_m, c_m);
15079        assert_eq!(rust_sa, c_sa);
15080        assert_eq!(rust_buckets, c_buckets);
15081
15082        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
15083        let m = count_and_gather_lms_suffixes_16u(
15084            &text,
15085            &mut rust_sa,
15086            n,
15087            &mut rust_buckets[..4 * ALPHABET_SIZE],
15088            0,
15089            n,
15090        );
15091        initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
15092        let first_lms_suffix = rust_sa[(n - m) as usize];
15093        initialize_buckets_for_lms_suffixes_radix_sort_16u(
15094            &text,
15095            &mut rust_buckets,
15096            first_lms_suffix,
15097        );
15098        let mut c_sa = rust_sa.clone();
15099        let mut c_buckets = rust_buckets.clone();
15100        radix_sort_lms_suffixes_16u_omp(
15101            &text,
15102            &mut rust_sa,
15103            n,
15104            m,
15105            0,
15106            &mut rust_buckets,
15107            1,
15108            &mut [],
15109        );
15110        unsafe {
15111            probe_libsais16x64_radix_sort_lms_suffixes_16u_omp(
15112                text.as_ptr(),
15113                c_sa.as_mut_ptr(),
15114                n,
15115                m,
15116                0,
15117                c_buckets.as_mut_ptr(),
15118                1,
15119            );
15120        }
15121        assert_eq!(rust_sa, c_sa);
15122        assert_eq!(rust_buckets, c_buckets);
15123    }
15124
15125    #[test]
15126    fn libsais16x64_early_omp_wrappers_use_block_partition_for_large_inputs() {
15127        let n = 65_600usize;
15128        let text: Vec<u16> = (0..n)
15129            .map(|i| 1 + ((i * 37 + i / 17) % 509) as u16)
15130            .collect();
15131
15132        let mut gathered_threaded = vec![-99; n];
15133        let mut gathered_scalar = vec![-99; n];
15134        let mut thread_state = alloc_thread_state(4).unwrap();
15135        let mut count_sa = vec![-99; n];
15136        let mut count_buckets = vec![0; 4 * ALPHABET_SIZE];
15137        count_and_gather_lms_suffixes_16u_omp(
15138            &text,
15139            &mut count_sa,
15140            n as SaSint,
15141            &mut count_buckets,
15142            4,
15143            &mut thread_state,
15144        );
15145        gather_lms_suffixes_16u_omp(
15146            &text,
15147            &mut gathered_threaded,
15148            n as SaSint,
15149            4,
15150            &mut thread_state,
15151        );
15152        gather_lms_suffixes_16u(
15153            &text,
15154            &mut gathered_scalar,
15155            n as SaSint,
15156            n as SaSint - 1,
15157            0,
15158            n as SaSint,
15159        );
15160        assert_eq!(gathered_threaded, gathered_scalar);
15161
15162        let mut sa_threaded = vec![-99; n];
15163        let mut sa_scalar = vec![-99; n];
15164        let mut buckets_threaded = vec![0; 4 * ALPHABET_SIZE];
15165        let mut buckets_scalar = vec![0; 4 * ALPHABET_SIZE];
15166        let m_threaded = count_and_gather_lms_suffixes_16u_omp(
15167            &text,
15168            &mut sa_threaded,
15169            n as SaSint,
15170            &mut buckets_threaded,
15171            4,
15172            &mut thread_state,
15173        );
15174        let m_scalar = count_and_gather_lms_suffixes_16u(
15175            &text,
15176            &mut sa_scalar,
15177            n as SaSint,
15178            &mut buckets_scalar,
15179            0,
15180            n as SaSint,
15181        );
15182        assert_eq!(m_threaded, m_scalar);
15183        assert_eq!(
15184            &sa_threaded[n - m_threaded as usize..],
15185            &sa_scalar[n - m_scalar as usize..]
15186        );
15187        assert_eq!(buckets_threaded, buckets_scalar);
15188    }
15189
15190    #[test]
15191    fn libsais16x64_late_omp_wrappers_match_c() {
15192        let m = 6;
15193        let mut rust_sa = vec![0; 20];
15194        rust_sa[..m].copy_from_slice(&[2, 4 | SAINT_MIN, 6, 8 | SAINT_MIN, 10, 12 | SAINT_MIN]);
15195        let mut c_sa = rust_sa.clone();
15196        let mut rust_thread_state = alloc_thread_state(1).unwrap();
15197        let rust_name =
15198            renumber_lms_suffixes_16u_omp(&mut rust_sa, m as SaSint, 1, &mut rust_thread_state);
15199        let c_name = unsafe {
15200            probe_libsais16x64_renumber_lms_suffixes_16u_omp(c_sa.as_mut_ptr(), m as SaSint, 1)
15201        };
15202        assert_eq!(rust_name, c_name);
15203        assert_eq!(rust_sa, c_sa);
15204
15205        let mut a = vec![0, 1, 65535, 65536, -1, -2, 70000, 17, 131071, -65536];
15206        let mut rust_u = vec![999; a.len()];
15207        let mut c_u = rust_u.clone();
15208        bwt_copy_16u_omp(&mut rust_u, &a, a.len() as SaSint, 1);
15209        unsafe {
15210            probe_libsais16x64_bwt_copy_16u_omp(
15211                c_u.as_mut_ptr(),
15212                a.as_mut_ptr(),
15213                a.len() as SaSint,
15214                1,
15215            );
15216        }
15217        assert_eq!(rust_u, c_u);
15218    }
15219
15220    #[test]
15221    fn libsais16x64_gather_marked_lms_suffixes_matches_c() {
15222        let mut rust_sa = vec![0, 0, 3 | SAINT_MIN, 4, 5 | SAINT_MIN, 6, -7, 8];
15223        let mut c_sa = rust_sa.clone();
15224
15225        let rust_l = gather_marked_lms_suffixes(&mut rust_sa, 2, 8, 0, 4) as SaSint;
15226        let c_l =
15227            unsafe { probe_libsais16x64_gather_marked_lms_suffixes(c_sa.as_mut_ptr(), 2, 8, 0, 4) };
15228
15229        assert_eq!(rust_l, c_l);
15230        assert_eq!(rust_sa, c_sa);
15231    }
15232
15233    #[test]
15234    fn libsais16x64_gather_marked_lms_suffixes_omp_matches_c() {
15235        let mut rust_sa = vec![0; 10];
15236        rust_sa[4..8].copy_from_slice(&[2 | SAINT_MIN, 4, 6 | SAINT_MIN, 8]);
15237        let mut c_sa = rust_sa.clone();
15238
15239        let mut rust_thread_state = alloc_thread_state(1).unwrap();
15240        gather_marked_lms_suffixes_omp(&mut rust_sa, 8, 4, 2, 1, &mut rust_thread_state);
15241        unsafe {
15242            probe_libsais16x64_gather_marked_lms_suffixes_omp(c_sa.as_mut_ptr(), 8, 4, 2, 1);
15243        }
15244
15245        assert_eq!(rust_sa, c_sa);
15246    }
15247
15248    #[test]
15249    fn libsais16x64_renumber_and_gather_lms_suffixes_omp_matches_c() {
15250        let mut rust_sa = vec![0; 10];
15251        rust_sa[..4].copy_from_slice(&[2, 4 | SAINT_MIN, 6, 8 | SAINT_MIN]);
15252        let mut c_sa = rust_sa.clone();
15253
15254        let mut rust_thread_state = alloc_thread_state(1).unwrap();
15255        let rust_name =
15256            renumber_and_gather_lms_suffixes_omp(&mut rust_sa, 8, 4, 2, 1, &mut rust_thread_state);
15257        let c_name = unsafe {
15258            probe_libsais16x64_renumber_and_gather_lms_suffixes_omp(c_sa.as_mut_ptr(), 8, 4, 2, 1)
15259        };
15260
15261        assert_eq!(rust_name, c_name);
15262        assert_eq!(rust_sa, c_sa);
15263    }
15264
15265    #[test]
15266    fn libsais16x64_reconstruct_lms_suffixes_matches_c() {
15267        let mut rust_sa = vec![2, 0, 1, 77, 88, 10, 11, 12];
15268        let mut c_sa = rust_sa.clone();
15269
15270        reconstruct_lms_suffixes(&mut rust_sa, 8, 3, 0, 3);
15271        unsafe {
15272            probe_libsais16x64_reconstruct_lms_suffixes(c_sa.as_mut_ptr(), 8, 3, 0, 3);
15273        }
15274
15275        assert_eq!(rust_sa, c_sa);
15276
15277        let mut rust_sa = vec![2, 0, 1, 77, 88, 10, 11, 12];
15278        let mut c_sa = rust_sa.clone();
15279        reconstruct_lms_suffixes_omp(&mut rust_sa, 8, 3, 1);
15280        unsafe {
15281            probe_libsais16x64_reconstruct_lms_suffixes_omp(c_sa.as_mut_ptr(), 8, 3, 1);
15282        }
15283
15284        assert_eq!(rust_sa, c_sa);
15285    }
15286
15287    #[test]
15288    fn libsais16x64_lms_late_omp_wrappers_use_block_partition() {
15289        let m = 65_536usize;
15290        let mut scalar = vec![0; 2 * m + 8];
15291        for i in 0..m {
15292            let value = (2 * i) as SaSint;
15293            scalar[i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
15294        }
15295        let mut threaded = scalar.clone();
15296
15297        let mut scalar_state = alloc_thread_state(1).unwrap();
15298        let mut threaded_state = alloc_thread_state(4).unwrap();
15299        let scalar_name =
15300            renumber_lms_suffixes_16u_omp(&mut scalar, m as SaSint, 1, &mut scalar_state);
15301        let threaded_name =
15302            renumber_lms_suffixes_16u_omp(&mut threaded, m as SaSint, 4, &mut threaded_state);
15303        assert_eq!(threaded_name, scalar_name);
15304        assert_eq!(threaded, scalar);
15305
15306        let n = 131_072usize;
15307        let m = 65_536usize;
15308        let fs = 128usize;
15309        let mut scalar = vec![0; n + fs];
15310        for i in 0..(n >> 1) {
15311            let value = (i as SaSint + 1) & SAINT_MAX;
15312            scalar[m + i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
15313        }
15314        let marked_count = (0..(n >> 1)).filter(|i| i % 7 == 0).count();
15315        let mut threaded = scalar.clone();
15316
15317        let mut scalar_state = alloc_thread_state(1).unwrap();
15318        let mut threaded_state = alloc_thread_state(4).unwrap();
15319        gather_marked_lms_suffixes_omp(
15320            &mut scalar,
15321            n as SaSint,
15322            m as SaSint,
15323            fs as SaSint,
15324            1,
15325            &mut scalar_state,
15326        );
15327        gather_marked_lms_suffixes_omp(
15328            &mut threaded,
15329            n as SaSint,
15330            m as SaSint,
15331            fs as SaSint,
15332            4,
15333            &mut threaded_state,
15334        );
15335        assert_eq!(
15336            &threaded[n + fs - marked_count..n + fs],
15337            &scalar[n + fs - marked_count..n + fs]
15338        );
15339
15340        let m = 65_536usize;
15341        let n = 2 * m;
15342        let mut scalar = vec![0; n];
15343        for i in 0..m {
15344            scalar[i] = i as SaSint;
15345            scalar[n - m + i] = 1_000_000 + i as SaSint;
15346        }
15347        let mut threaded = scalar.clone();
15348
15349        reconstruct_lms_suffixes_omp(&mut scalar, n as SaSint, m as SaSint, 1);
15350        reconstruct_lms_suffixes_omp(&mut threaded, n as SaSint, m as SaSint, 4);
15351        assert_eq!(threaded, scalar);
15352    }
15353
15354    #[test]
15355    fn libsais16x64_distinct_lms_helpers_match_c() {
15356        let m = 6;
15357        let mut rust_sa = vec![0; 18];
15358        rust_sa[..m].copy_from_slice(&[
15359            2 | SAINT_MIN,
15360            4 | SAINT_MIN,
15361            6,
15362            8 | SAINT_MIN,
15363            10,
15364            12 | SAINT_MIN,
15365        ]);
15366        let mut c_sa = rust_sa.clone();
15367        let rust_name =
15368            renumber_distinct_lms_suffixes_32s_4k(&mut rust_sa, m as SaSint, 1, 0, m as isize);
15369        let c_name = unsafe {
15370            probe_libsais16x64_renumber_distinct_lms_suffixes_32s_4k(
15371                c_sa.as_mut_ptr(),
15372                m as SaSint,
15373                1,
15374                0,
15375                m as SaSint,
15376            )
15377        };
15378        assert_eq!(rust_name, c_name);
15379        assert_eq!(rust_sa, c_sa);
15380
15381        let mut rust_sa = vec![0; 12];
15382        rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 0, SAINT_MIN | 2, 0, 3, 0]);
15383        let mut c_sa = rust_sa.clone();
15384        mark_distinct_lms_suffixes_32s(&mut rust_sa, m as SaSint, 0, 6);
15385        unsafe {
15386            probe_libsais16x64_mark_distinct_lms_suffixes_32s(c_sa.as_mut_ptr(), m as SaSint, 0, 6);
15387        }
15388        assert_eq!(rust_sa, c_sa);
15389
15390        let mut rust_sa = vec![0; 12];
15391        rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 7, SAINT_MIN | 2, 0, -5, 9]);
15392        let mut c_sa = rust_sa.clone();
15393        clamp_lms_suffixes_length_32s(&mut rust_sa, m as SaSint, 0, 6);
15394        unsafe {
15395            probe_libsais16x64_clamp_lms_suffixes_length_32s(c_sa.as_mut_ptr(), m as SaSint, 0, 6);
15396        }
15397        assert_eq!(rust_sa, c_sa);
15398    }
15399
15400    #[test]
15401    fn libsais16x64_distinct_lms_omp_wrappers_match_c() {
15402        let n = 12;
15403        let m = 6;
15404        let mut rust_sa = vec![0; 18];
15405        rust_sa[..m].copy_from_slice(&[
15406            2 | SAINT_MIN,
15407            4 | SAINT_MIN,
15408            6,
15409            8 | SAINT_MIN,
15410            10,
15411            12 | SAINT_MIN,
15412        ]);
15413        let mut c_sa = rust_sa.clone();
15414        let mut rust_thread_state = alloc_thread_state(1).unwrap();
15415        let rust_name = renumber_distinct_lms_suffixes_32s_4k_omp(
15416            &mut rust_sa,
15417            m as SaSint,
15418            1,
15419            &mut rust_thread_state,
15420        );
15421        let c_name = unsafe {
15422            probe_libsais16x64_renumber_distinct_lms_suffixes_32s_4k_omp(
15423                c_sa.as_mut_ptr(),
15424                m as SaSint,
15425                1,
15426            )
15427        };
15428        assert_eq!(rust_name, c_name);
15429        assert_eq!(rust_sa, c_sa);
15430
15431        let mut rust_sa = vec![0; 18];
15432        rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 0, SAINT_MIN | 2, 0, 3, 0]);
15433        let mut c_sa = rust_sa.clone();
15434        mark_distinct_lms_suffixes_32s_omp(&mut rust_sa, n, m as SaSint, 1);
15435        unsafe {
15436            probe_libsais16x64_mark_distinct_lms_suffixes_32s_omp(
15437                c_sa.as_mut_ptr(),
15438                n,
15439                m as SaSint,
15440                1,
15441            );
15442        }
15443        assert_eq!(rust_sa, c_sa);
15444
15445        let mut rust_sa = vec![0; 18];
15446        rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 7, SAINT_MIN | 2, 0, -5, 9]);
15447        let mut c_sa = rust_sa.clone();
15448        clamp_lms_suffixes_length_32s_omp(&mut rust_sa, n, m as SaSint, 1);
15449        unsafe {
15450            probe_libsais16x64_clamp_lms_suffixes_length_32s_omp(
15451                c_sa.as_mut_ptr(),
15452                n,
15453                m as SaSint,
15454                1,
15455            );
15456        }
15457        assert_eq!(rust_sa, c_sa);
15458
15459        let mut rust_sa = vec![0; 18];
15460        rust_sa[..m].copy_from_slice(&[
15461            2 | SAINT_MIN,
15462            4 | SAINT_MIN,
15463            6,
15464            8 | SAINT_MIN,
15465            10,
15466            12 | SAINT_MIN,
15467        ]);
15468        let mut c_sa = rust_sa.clone();
15469        let mut rust_thread_state = alloc_thread_state(1).unwrap();
15470        let rust_name = renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
15471            &mut rust_sa,
15472            n,
15473            m as SaSint,
15474            1,
15475            &mut rust_thread_state,
15476        );
15477        let c_name = unsafe {
15478            probe_libsais16x64_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
15479                c_sa.as_mut_ptr(),
15480                n,
15481                m as SaSint,
15482                1,
15483            )
15484        };
15485        assert_eq!(rust_name, c_name);
15486        assert_eq!(rust_sa, c_sa);
15487    }
15488
15489    #[test]
15490    fn libsais16x64_distinct_lms_omp_wrappers_use_block_partition() {
15491        let m = 65_536usize;
15492        let mut scalar = vec![0; 2 * m];
15493        for i in 0..m {
15494            let value = (2 * i) as SaSint;
15495            scalar[i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
15496        }
15497        let mut threaded = scalar.clone();
15498
15499        let mut scalar_state = alloc_thread_state(1).unwrap();
15500        let mut threaded_state = alloc_thread_state(4).unwrap();
15501        let scalar_name = renumber_distinct_lms_suffixes_32s_4k_omp(
15502            &mut scalar,
15503            m as SaSint,
15504            1,
15505            &mut scalar_state,
15506        );
15507        let threaded_name = renumber_distinct_lms_suffixes_32s_4k_omp(
15508            &mut threaded,
15509            m as SaSint,
15510            4,
15511            &mut threaded_state,
15512        );
15513        assert_eq!(threaded_name, scalar_name);
15514        assert_eq!(threaded, scalar);
15515
15516        let n = 131_072usize;
15517        let m = 65_536usize;
15518        let mut scalar = vec![0; n];
15519        for i in 0..(n >> 1) {
15520            scalar[m + i] = if i % 5 == 0 {
15521                SAINT_MIN | (i as SaSint + 1)
15522            } else if i % 11 == 0 {
15523                0
15524            } else {
15525                i as SaSint + 1
15526            };
15527        }
15528        let mut threaded = scalar.clone();
15529        mark_distinct_lms_suffixes_32s_omp(&mut scalar, n as SaSint, m as SaSint, 1);
15530        mark_distinct_lms_suffixes_32s_omp(&mut threaded, n as SaSint, m as SaSint, 4);
15531        assert_eq!(&threaded[m..n], &scalar[m..n]);
15532
15533        let mut scalar = vec![0; n];
15534        for i in 0..(n >> 1) {
15535            scalar[m + i] = if i % 5 == 0 {
15536                SAINT_MIN | (i as SaSint + 1)
15537            } else {
15538                i as SaSint + 1
15539            };
15540        }
15541        let mut threaded = scalar.clone();
15542        clamp_lms_suffixes_length_32s_omp(&mut scalar, n as SaSint, m as SaSint, 1);
15543        clamp_lms_suffixes_length_32s_omp(&mut threaded, n as SaSint, m as SaSint, 4);
15544        assert_eq!(&threaded[m..n], &scalar[m..n]);
15545    }
15546
15547    #[test]
15548    fn libsais16x64_unique_nonunique_lms_helpers_match_c() {
15549        let m = 4;
15550        let mut rust_t = vec![0; 12];
15551        let mut rust_sa = vec![0; 12];
15552        rust_sa[..m].copy_from_slice(&[2, 4, 6, 8]);
15553        rust_sa[m + 1] = SAINT_MIN | 11;
15554        rust_sa[m + 2] = 22;
15555        rust_sa[m + 3] = SAINT_MIN | 33;
15556        rust_sa[m + 4] = 44;
15557        let mut c_t = rust_t.clone();
15558        let mut c_sa = rust_sa.clone();
15559
15560        let rust_f = renumber_unique_and_nonunique_lms_suffixes_32s(
15561            &mut rust_t,
15562            &mut rust_sa,
15563            m as SaSint,
15564            0,
15565            0,
15566            m as isize,
15567        );
15568        let c_f = unsafe {
15569            probe_libsais16x64_renumber_unique_and_nonunique_lms_suffixes_32s(
15570                c_t.as_mut_ptr(),
15571                c_sa.as_mut_ptr(),
15572                m as SaSint,
15573                0,
15574                0,
15575                m as SaSint,
15576            )
15577        };
15578        assert_eq!(rust_f, c_f);
15579        assert_eq!(rust_t, c_t);
15580        assert_eq!(rust_sa, c_sa);
15581
15582        let mut rust_sa = vec![0; 10];
15583        rust_sa[m..m + 4].copy_from_slice(&[SAINT_MIN | 3, 4, SAINT_MIN | 5, 6]);
15584        let mut c_sa = rust_sa.clone();
15585        let mut rust_l = m as isize;
15586        let mut rust_r = 10isize;
15587        let mut c_l = rust_l as SaSint;
15588        let mut c_r = rust_r as SaSint;
15589        compact_unique_and_nonunique_lms_suffixes_32s(
15590            &mut rust_sa,
15591            m as SaSint,
15592            &mut rust_l,
15593            &mut rust_r,
15594            0,
15595            4,
15596        );
15597        unsafe {
15598            probe_libsais16x64_compact_unique_and_nonunique_lms_suffixes_32s(
15599                c_sa.as_mut_ptr(),
15600                m as SaSint,
15601                &mut c_l,
15602                &mut c_r,
15603                0,
15604                4,
15605            );
15606        }
15607        assert_eq!(rust_l as SaSint, c_l);
15608        assert_eq!(rust_r as SaSint, c_r);
15609        assert_eq!(rust_sa, c_sa);
15610    }
15611
15612    #[test]
15613    fn libsais16x64_unique_nonunique_lms_omp_wrappers_match_c() {
15614        let n = 8;
15615        let m = 4;
15616        let fs = 4;
15617        let mut rust_t = vec![0; 12];
15618        let mut rust_sa = vec![0; 12];
15619        rust_sa[..m].copy_from_slice(&[2, 4, 6, 8]);
15620        rust_sa[m + 1] = SAINT_MIN | 11;
15621        rust_sa[m + 2] = 22;
15622        rust_sa[m + 3] = SAINT_MIN | 33;
15623        rust_sa[m + 4] = 44;
15624        let mut c_t = rust_t.clone();
15625        let mut c_sa = rust_sa.clone();
15626
15627        let rust_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15628            &mut rust_t,
15629            &mut rust_sa,
15630            m as SaSint,
15631            1,
15632        );
15633        let c_f = unsafe {
15634            probe_libsais16x64_renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15635                c_t.as_mut_ptr(),
15636                c_sa.as_mut_ptr(),
15637                m as SaSint,
15638                1,
15639            )
15640        };
15641        assert_eq!(rust_f, c_f);
15642        assert_eq!(rust_t, c_t);
15643        assert_eq!(rust_sa, c_sa);
15644
15645        let mut rust_sa = vec![0; 12];
15646        rust_sa[m..m + 4].copy_from_slice(&[SAINT_MIN | 3, 4, SAINT_MIN | 5, 6]);
15647        rust_sa[m - 2..m].copy_from_slice(&[101, 102]);
15648        let mut c_sa = rust_sa.clone();
15649        compact_unique_and_nonunique_lms_suffixes_32s_omp(&mut rust_sa, n, m as SaSint, fs, 2, 1);
15650        unsafe {
15651            probe_libsais16x64_compact_unique_and_nonunique_lms_suffixes_32s_omp(
15652                c_sa.as_mut_ptr(),
15653                n,
15654                m as SaSint,
15655                fs,
15656                2,
15657                1,
15658            );
15659        }
15660        assert_eq!(rust_sa, c_sa);
15661
15662        let mut rust_t = vec![0; 12];
15663        let mut rust_sa = vec![0; 12];
15664        rust_sa[..m].copy_from_slice(&[2, 4, 6, 8]);
15665        rust_sa[m + 1] = SAINT_MIN | 11;
15666        rust_sa[m + 2] = 22;
15667        rust_sa[m + 3] = SAINT_MIN | 33;
15668        rust_sa[m + 4] = 44;
15669        let mut c_t = rust_t.clone();
15670        let mut c_sa = rust_sa.clone();
15671        let rust_f = compact_lms_suffixes_32s_omp(&mut rust_t, &mut rust_sa, n, m as SaSint, fs, 1);
15672        let c_f = unsafe {
15673            probe_libsais16x64_compact_lms_suffixes_32s_omp(
15674                c_t.as_mut_ptr(),
15675                c_sa.as_mut_ptr(),
15676                n,
15677                m as SaSint,
15678                fs,
15679                1,
15680            )
15681        };
15682        assert_eq!(rust_f, c_f);
15683        assert_eq!(rust_t, c_t);
15684        assert_eq!(rust_sa, c_sa);
15685    }
15686
15687    #[test]
15688    fn libsais16x64_unique_nonunique_lms_omp_wrappers_use_block_partition() {
15689        let m = 65_536usize;
15690        let mut scalar_t = vec![0; 2 * m];
15691        let mut scalar_sa = vec![0; 2 * m];
15692        for i in 0..m {
15693            scalar_sa[i] = (2 * i) as SaSint;
15694            scalar_sa[m + i] = if i % 5 == 0 {
15695                SAINT_MIN | (i as SaSint + 3)
15696            } else {
15697                i as SaSint + 3
15698            };
15699        }
15700        let mut threaded_t = scalar_t.clone();
15701        let mut threaded_sa = scalar_sa.clone();
15702
15703        let scalar_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15704            &mut scalar_t,
15705            &mut scalar_sa,
15706            m as SaSint,
15707            1,
15708        );
15709        let threaded_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15710            &mut threaded_t,
15711            &mut threaded_sa,
15712            m as SaSint,
15713            4,
15714        );
15715        assert_eq!(threaded_f, scalar_f);
15716        assert_eq!(threaded_t, scalar_t);
15717        assert_eq!(threaded_sa, scalar_sa);
15718
15719        let n = 131_072usize;
15720        let m = 4_096usize;
15721        let fs = 8_192usize;
15722        let mut scalar_sa = vec![0; n + fs];
15723        for i in 0..(n >> 1) {
15724            scalar_sa[m + i] = if i % 32 == 0 {
15725                SAINT_MIN | (i as SaSint + 1)
15726            } else {
15727                i as SaSint + 1
15728            };
15729        }
15730        let f = 1_024usize;
15731        for i in 0..f {
15732            scalar_sa[m - f + i] = 1_000_000 + i as SaSint;
15733        }
15734        let mut threaded_sa = scalar_sa.clone();
15735
15736        compact_unique_and_nonunique_lms_suffixes_32s_omp(
15737            &mut scalar_sa,
15738            n as SaSint,
15739            m as SaSint,
15740            fs as SaSint,
15741            f as SaSint,
15742            1,
15743        );
15744        compact_unique_and_nonunique_lms_suffixes_32s_omp(
15745            &mut threaded_sa,
15746            n as SaSint,
15747            m as SaSint,
15748            fs as SaSint,
15749            f as SaSint,
15750            4,
15751        );
15752        assert_eq!(&threaded_sa[..m], &scalar_sa[..m]);
15753        assert_eq!(
15754            &threaded_sa[n + fs - m..n + fs],
15755            &scalar_sa[n + fs - m..n + fs]
15756        );
15757    }
15758
15759    #[test]
15760    fn libsais16x64_merge_lms_helpers_match_c() {
15761        let n = 10;
15762        let m = 3;
15763        let mut rust_t = vec![0; n as usize];
15764        rust_t[1] = SAINT_MIN | 11;
15765        rust_t[3] = SAINT_MIN | 22;
15766        rust_t[7] = SAINT_MIN | 33;
15767        let mut rust_sa = vec![0; n as usize];
15768        rust_sa[6..10].copy_from_slice(&[2, 5, 8, 9]);
15769        let mut c_t = rust_t.clone();
15770        let mut c_sa = rust_sa.clone();
15771        merge_unique_lms_suffixes_32s(&mut rust_t, &mut rust_sa, n, m, 0, 0, n as isize);
15772        unsafe {
15773            probe_libsais16x64_merge_unique_lms_suffixes_32s(
15774                c_t.as_mut_ptr(),
15775                c_sa.as_mut_ptr(),
15776                n,
15777                m,
15778                0,
15779                0,
15780                n,
15781            );
15782        }
15783        assert_eq!(rust_t, c_t);
15784        assert_eq!(rust_sa, c_sa);
15785
15786        let n = 10;
15787        let m = 5;
15788        let mut rust_sa = vec![9, 0, 8, 0, 0, 7, 31, 32, 33, 34];
15789        let mut c_sa = rust_sa.clone();
15790        merge_nonunique_lms_suffixes_32s(&mut rust_sa, n, m, 2, 0, m as isize);
15791        unsafe {
15792            probe_libsais16x64_merge_nonunique_lms_suffixes_32s(c_sa.as_mut_ptr(), n, m, 2, 0, m);
15793        }
15794        assert_eq!(rust_sa, c_sa);
15795    }
15796
15797    #[test]
15798    fn libsais16x64_merge_lms_omp_wrappers_match_c() {
15799        let n = 12;
15800        let m = 4;
15801        let f = 2;
15802        let mut rust_t = vec![0; n as usize];
15803        rust_t[1] = SAINT_MIN | 11;
15804        rust_t[5] = SAINT_MIN | 22;
15805        let mut rust_sa = vec![0; n as usize];
15806        rust_sa[1] = 41;
15807        rust_sa[7..12].copy_from_slice(&[2, 6, 21, 22, 23]);
15808        let mut c_t = rust_t.clone();
15809        let mut c_sa = rust_sa.clone();
15810        merge_unique_lms_suffixes_32s_omp(&mut rust_t, &mut rust_sa, n, m, 1);
15811        unsafe {
15812            probe_libsais16x64_merge_unique_lms_suffixes_32s_omp(
15813                c_t.as_mut_ptr(),
15814                c_sa.as_mut_ptr(),
15815                n,
15816                m,
15817                1,
15818            );
15819        }
15820        assert_eq!(rust_t, c_t);
15821        assert_eq!(rust_sa, c_sa);
15822
15823        let mut rust_sa = vec![0, 41, 1, 0, 55, 66, 77, 2, 6, 21, 22, 23];
15824        let mut c_sa = rust_sa.clone();
15825        merge_nonunique_lms_suffixes_32s_omp(&mut rust_sa, n, m, f, 1);
15826        unsafe {
15827            probe_libsais16x64_merge_nonunique_lms_suffixes_32s_omp(c_sa.as_mut_ptr(), n, m, f, 1);
15828        }
15829        assert_eq!(rust_sa, c_sa);
15830
15831        let mut rust_t = vec![0; n as usize];
15832        rust_t[1] = SAINT_MIN | 11;
15833        rust_t[5] = SAINT_MIN | 22;
15834        let mut rust_sa = vec![0; n as usize];
15835        rust_sa[1] = 41;
15836        rust_sa[7..12].copy_from_slice(&[2, 6, 21, 22, 23]);
15837        let mut c_t = rust_t.clone();
15838        let mut c_sa = rust_sa.clone();
15839        merge_compacted_lms_suffixes_32s_omp(&mut rust_t, &mut rust_sa, n, m, f, 1);
15840        unsafe {
15841            probe_libsais16x64_merge_compacted_lms_suffixes_32s_omp(
15842                c_t.as_mut_ptr(),
15843                c_sa.as_mut_ptr(),
15844                n,
15845                m,
15846                f,
15847                1,
15848            );
15849        }
15850        assert_eq!(rust_t, c_t);
15851        assert_eq!(rust_sa, c_sa);
15852    }
15853
15854    #[test]
15855    fn libsais16x64_merge_lms_omp_wrappers_use_block_partition() {
15856        let n = 65_536usize;
15857        let m = 10_000usize;
15858        let mut scalar_t = vec![0; n];
15859        for i in (0..n).step_by(17) {
15860            scalar_t[i] = SAINT_MIN | (i as SaSint + 1);
15861        }
15862        let unique_count = scalar_t.iter().filter(|&&value| value < 0).count();
15863        let mut scalar_sa = vec![0; n];
15864        let source = n - m - 1;
15865        for i in 0..=unique_count {
15866            scalar_sa[source + i] = ((i * 13 + 7) % n) as SaSint;
15867        }
15868        let mut threaded_t = scalar_t.clone();
15869        let mut threaded_sa = scalar_sa.clone();
15870
15871        merge_unique_lms_suffixes_32s_omp(
15872            &mut scalar_t,
15873            &mut scalar_sa,
15874            n as SaSint,
15875            m as SaSint,
15876            1,
15877        );
15878        merge_unique_lms_suffixes_32s_omp(
15879            &mut threaded_t,
15880            &mut threaded_sa,
15881            n as SaSint,
15882            m as SaSint,
15883            4,
15884        );
15885        assert_eq!(threaded_t, scalar_t);
15886        assert_eq!(threaded_sa, scalar_sa);
15887
15888        let n = 131_072usize;
15889        let m = 65_536usize;
15890        let f = 100usize;
15891        let mut scalar_sa = vec![1; n];
15892        for i in (0..m).step_by(9) {
15893            scalar_sa[i] = 0;
15894        }
15895        let zero_count = scalar_sa[..m].iter().filter(|&&value| value == 0).count();
15896        let source = n - m - 1 + f;
15897        for i in 0..=zero_count {
15898            scalar_sa[source + i] = 2_000_000 + i as SaSint;
15899        }
15900        let mut threaded_sa = scalar_sa.clone();
15901
15902        merge_nonunique_lms_suffixes_32s_omp(
15903            &mut scalar_sa,
15904            n as SaSint,
15905            m as SaSint,
15906            f as SaSint,
15907            1,
15908        );
15909        merge_nonunique_lms_suffixes_32s_omp(
15910            &mut threaded_sa,
15911            n as SaSint,
15912            m as SaSint,
15913            f as SaSint,
15914            4,
15915        );
15916        assert_eq!(threaded_sa, scalar_sa);
15917    }
15918
15919    #[test]
15920    fn libsais16x64_radix_sort_lms_suffixes_32s_match_c() {
15921        let t = vec![0, 1, 2, 3, 1, 2, 3, 0];
15922        let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15923        let mut c_sa = rust_sa.clone();
15924        let mut rust_bucket = vec![0, 6, 7, 8];
15925        let mut c_bucket = rust_bucket.clone();
15926        radix_sort_lms_suffixes_32s_6k(&t, &mut rust_sa, &mut rust_bucket, 5, 3);
15927        unsafe {
15928            probe_libsais16x64_radix_sort_lms_suffixes_32s_6k(
15929                t.as_ptr(),
15930                c_sa.as_mut_ptr(),
15931                c_bucket.as_mut_ptr(),
15932                5,
15933                3,
15934            );
15935        }
15936        assert_eq!(rust_sa, c_sa);
15937        assert_eq!(rust_bucket, c_bucket);
15938
15939        let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15940        let mut c_sa = rust_sa.clone();
15941        let mut rust_bucket = vec![0, 0, 6, 0, 7, 0, 8, 0];
15942        let mut c_bucket = rust_bucket.clone();
15943        radix_sort_lms_suffixes_32s_2k(&t, &mut rust_sa, &mut rust_bucket, 5, 3);
15944        unsafe {
15945            probe_libsais16x64_radix_sort_lms_suffixes_32s_2k(
15946                t.as_ptr(),
15947                c_sa.as_mut_ptr(),
15948                c_bucket.as_mut_ptr(),
15949                5,
15950                3,
15951            );
15952        }
15953        assert_eq!(rust_sa, c_sa);
15954        assert_eq!(rust_bucket, c_bucket);
15955
15956        let mut cache = vec![ThreadCache::default(); 8];
15957        let sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15958        radix_sort_lms_suffixes_32s_block_gather(&t, &sa, &mut cache, 5, 3);
15959        assert_eq!(cache[5].index, 1);
15960        assert_eq!(cache[5].symbol, 1);
15961        assert_eq!(cache[6].index, 2);
15962        assert_eq!(cache[6].symbol, 2);
15963        assert_eq!(cache[7].index, 3);
15964        assert_eq!(cache[7].symbol, 3);
15965
15966        let mut bucket = vec![0, 6, 7, 8];
15967        radix_sort_lms_suffixes_32s_6k_block_sort(&mut bucket, &mut cache, 5, 3);
15968        assert_eq!(bucket, vec![0, 5, 6, 7]);
15969        assert_eq!(cache[5].symbol, 5);
15970        assert_eq!(cache[6].symbol, 6);
15971        assert_eq!(cache[7].symbol, 7);
15972
15973        let mut cache = vec![ThreadCache::default(); 8];
15974        radix_sort_lms_suffixes_32s_block_gather(&t, &sa, &mut cache, 5, 3);
15975        let mut bucket = vec![0, 0, 6, 0, 7, 0, 8, 0];
15976        radix_sort_lms_suffixes_32s_2k_block_sort(&mut bucket, &mut cache, 5, 3);
15977        assert_eq!(bucket, vec![0, 0, 5, 0, 6, 0, 7, 0]);
15978        assert_eq!(cache[5].symbol, 5);
15979        assert_eq!(cache[6].symbol, 6);
15980        assert_eq!(cache[7].symbol, 7);
15981
15982        let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15983        let mut c_sa = rust_sa.clone();
15984        let mut rust_bucket = vec![0, 6, 7, 8];
15985        let mut c_bucket = rust_bucket.clone();
15986        radix_sort_lms_suffixes_32s_6k_omp(&t, &mut rust_sa, 8, 4, &mut rust_bucket, 1);
15987        unsafe {
15988            probe_libsais16x64_radix_sort_lms_suffixes_32s_6k_omp(
15989                t.as_ptr(),
15990                c_sa.as_mut_ptr(),
15991                8,
15992                4,
15993                c_bucket.as_mut_ptr(),
15994                1,
15995            );
15996        }
15997        assert_eq!(rust_sa, c_sa);
15998        assert_eq!(rust_bucket, c_bucket);
15999
16000        let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
16001        let mut c_sa = rust_sa.clone();
16002        let mut rust_bucket = vec![0, 0, 6, 0, 7, 0, 8, 0];
16003        let mut c_bucket = rust_bucket.clone();
16004        radix_sort_lms_suffixes_32s_2k_omp(&t, &mut rust_sa, 8, 4, &mut rust_bucket, 1);
16005        unsafe {
16006            probe_libsais16x64_radix_sort_lms_suffixes_32s_2k_omp(
16007                t.as_ptr(),
16008                c_sa.as_mut_ptr(),
16009                8,
16010                4,
16011                c_bucket.as_mut_ptr(),
16012                1,
16013            );
16014        }
16015        assert_eq!(rust_sa, c_sa);
16016        assert_eq!(rust_bucket, c_bucket);
16017
16018        let t = vec![2, 1, 3, 1, 0];
16019        let mut rust_sa = vec![0; t.len()];
16020        let mut c_sa = rust_sa.clone();
16021        let mut rust_bucket = vec![0, 2, 4, 5];
16022        let mut c_bucket = rust_bucket.clone();
16023        let rust_m =
16024            radix_sort_lms_suffixes_32s_1k(&t, &mut rust_sa, t.len() as SaSint, &mut rust_bucket);
16025        let c_m = unsafe {
16026            probe_libsais16x64_radix_sort_lms_suffixes_32s_1k(
16027                t.as_ptr(),
16028                c_sa.as_mut_ptr(),
16029                t.len() as SaSint,
16030                c_bucket.as_mut_ptr(),
16031            )
16032        };
16033        assert_eq!(rust_m, c_m);
16034        assert_eq!(rust_sa, c_sa);
16035        assert_eq!(rust_bucket, c_bucket);
16036    }
16037
16038    #[test]
16039    fn libsais16x64_radix_sort_set_markers_32s_match_c() {
16040        let mut rust_sa = vec![0; 8];
16041        let mut c_sa = rust_sa.clone();
16042        let mut induction_bucket = vec![1, 3, 5, 7];
16043        radix_sort_set_markers_32s_6k(&mut rust_sa, &induction_bucket, 0, 4);
16044        unsafe {
16045            probe_libsais16x64_radix_sort_set_markers_32s_6k(
16046                c_sa.as_mut_ptr(),
16047                induction_bucket.as_mut_ptr(),
16048                0,
16049                4,
16050            );
16051        }
16052        assert_eq!(rust_sa, c_sa);
16053
16054        let mut rust_sa = vec![0; 8];
16055        let mut c_sa = rust_sa.clone();
16056        radix_sort_set_markers_32s_6k_omp(&mut rust_sa, 5, &induction_bucket, 1);
16057        unsafe {
16058            probe_libsais16x64_radix_sort_set_markers_32s_6k_omp(
16059                c_sa.as_mut_ptr(),
16060                5,
16061                induction_bucket.as_mut_ptr(),
16062                1,
16063            );
16064        }
16065        assert_eq!(rust_sa, c_sa);
16066
16067        let mut rust_sa = vec![0; 8];
16068        let mut c_sa = rust_sa.clone();
16069        let mut induction_bucket = vec![1, 0, 3, 0, 5, 0, 7, 0];
16070        radix_sort_set_markers_32s_4k(&mut rust_sa, &induction_bucket, 0, 4);
16071        unsafe {
16072            probe_libsais16x64_radix_sort_set_markers_32s_4k(
16073                c_sa.as_mut_ptr(),
16074                induction_bucket.as_mut_ptr(),
16075                0,
16076                4,
16077            );
16078        }
16079        assert_eq!(rust_sa, c_sa);
16080
16081        let mut rust_sa = vec![0; 8];
16082        let mut c_sa = rust_sa.clone();
16083        radix_sort_set_markers_32s_4k_omp(&mut rust_sa, 5, &induction_bucket, 1);
16084        unsafe {
16085            probe_libsais16x64_radix_sort_set_markers_32s_4k_omp(
16086                c_sa.as_mut_ptr(),
16087                5,
16088                induction_bucket.as_mut_ptr(),
16089                1,
16090            );
16091        }
16092        assert_eq!(rust_sa, c_sa);
16093    }
16094
16095    #[test]
16096    fn libsais16x64_radix_sort_set_markers_32s_omp_partitions_large_inputs() {
16097        let k = 65_600usize;
16098        let induction_bucket_6k: Vec<SaSint> = (0..k).map(|i| i as SaSint).collect();
16099        let mut single = vec![0; k];
16100        let mut threaded = vec![0; k];
16101        radix_sort_set_markers_32s_6k_omp(&mut single, k as SaSint, &induction_bucket_6k, 1);
16102        radix_sort_set_markers_32s_6k_omp(&mut threaded, k as SaSint, &induction_bucket_6k, 4);
16103        assert_eq!(threaded, single);
16104
16105        let mut induction_bucket_4k = vec![0; 2 * k];
16106        for i in 0..k {
16107            induction_bucket_4k[buckets_index2(i, 0)] = i as SaSint;
16108        }
16109        let mut single = vec![0; k];
16110        let mut threaded = vec![0; k];
16111        radix_sort_set_markers_32s_4k_omp(&mut single, k as SaSint, &induction_bucket_4k, 1);
16112        radix_sort_set_markers_32s_4k_omp(&mut threaded, k as SaSint, &induction_bucket_4k, 4);
16113        assert_eq!(threaded, single);
16114    }
16115
16116    #[test]
16117    fn libsais16x64_partial_sorting_32s_helpers_match_c() {
16118        let k = 3;
16119        let mut rust_sa = vec![0, SAINT_MIN, 2, SAINT_MIN, 4, SAINT_MIN];
16120        let mut c_sa = rust_sa.clone();
16121        let mut buckets = vec![0; 6 * k as usize];
16122        buckets[buckets_index4(1, 0)] = 3;
16123        buckets[buckets_index4(2, 0)] = 6;
16124        buckets[4 * k as usize + buckets_index2(0, 0)] = 0;
16125        buckets[4 * k as usize + buckets_index2(1, 0)] = 1;
16126        partial_sorting_shift_markers_32s_6k_omp(&mut rust_sa, k, &buckets, 1);
16127        unsafe {
16128            probe_libsais16x64_partial_sorting_shift_markers_32s_6k_omp(
16129                c_sa.as_mut_ptr(),
16130                k,
16131                buckets.as_ptr(),
16132                1,
16133            );
16134        }
16135        assert_eq!(rust_sa, c_sa);
16136
16137        let mut rust_sa = vec![
16138            1 | SUFFIX_GROUP_MARKER,
16139            2,
16140            3 | SUFFIX_GROUP_MARKER,
16141            4 | SUFFIX_GROUP_MARKER,
16142            5,
16143            6,
16144        ];
16145        let mut c_sa = rust_sa.clone();
16146        partial_sorting_shift_markers_32s_4k(&mut rust_sa, 6);
16147        unsafe { probe_libsais16x64_partial_sorting_shift_markers_32s_4k(c_sa.as_mut_ptr(), 6) };
16148        assert_eq!(rust_sa, c_sa);
16149
16150        let mut rust_buckets = vec![0; 6 * k as usize];
16151        for (i, value) in rust_buckets[4 * k as usize..].iter_mut().enumerate() {
16152            *value = 100 + i as SaSint;
16153        }
16154        let mut c_buckets = rust_buckets.clone();
16155        partial_sorting_shift_buckets_32s_6k(k, &mut rust_buckets);
16156        unsafe {
16157            probe_libsais16x64_partial_sorting_shift_buckets_32s_6k(k, c_buckets.as_mut_ptr())
16158        };
16159        assert_eq!(rust_buckets, c_buckets);
16160
16161        let mut rust_sa = vec![1 | SUFFIX_GROUP_MARKER, -3, 5 | SUFFIX_GROUP_MARKER, -7];
16162        let mut c_sa = rust_sa.clone();
16163        let rust_l = partial_sorting_gather_lms_suffixes_32s_4k(&mut rust_sa, 0, 4);
16164        let c_l = unsafe {
16165            probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_4k(c_sa.as_mut_ptr(), 0, 4)
16166        };
16167        assert_eq!(rust_l, c_l);
16168        assert_eq!(rust_sa, c_sa);
16169
16170        let mut rust_sa = vec![1, -3, 5, -7];
16171        let mut c_sa = rust_sa.clone();
16172        let rust_l = partial_sorting_gather_lms_suffixes_32s_1k(&mut rust_sa, 0, 4);
16173        let c_l = unsafe {
16174            probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_1k(c_sa.as_mut_ptr(), 0, 4)
16175        };
16176        assert_eq!(rust_l, c_l);
16177        assert_eq!(rust_sa, c_sa);
16178
16179        let mut rust_state = alloc_thread_state(1).unwrap();
16180        let mut rust_sa = vec![1 | SUFFIX_GROUP_MARKER, -3, 5 | SUFFIX_GROUP_MARKER, -7];
16181        let mut c_sa = rust_sa.clone();
16182        partial_sorting_gather_lms_suffixes_32s_4k_omp(&mut rust_sa, 4, 1, &mut rust_state);
16183        unsafe {
16184            probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_4k_omp(
16185                c_sa.as_mut_ptr(),
16186                4,
16187                1,
16188            );
16189        }
16190        assert_eq!(rust_sa, c_sa);
16191
16192        let mut rust_state = alloc_thread_state(1).unwrap();
16193        let mut rust_sa = vec![1, -3, 5, -7];
16194        let mut c_sa = rust_sa.clone();
16195        partial_sorting_gather_lms_suffixes_32s_1k_omp(&mut rust_sa, 4, 1, &mut rust_state);
16196        unsafe {
16197            probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_1k_omp(
16198                c_sa.as_mut_ptr(),
16199                4,
16200                1,
16201            );
16202        }
16203        assert_eq!(rust_sa, c_sa);
16204    }
16205
16206    #[test]
16207    fn libsais16x64_partial_sorting_gather_lms_suffixes_32s_omp_uses_block_partition() {
16208        let n = 65_536usize;
16209        let mut base_4k = vec![0; n];
16210        let mut base_1k = vec![0; n];
16211        for i in 0..n {
16212            let value = (i as SaSint + 1) & SAINT_MAX;
16213            base_4k[i] = if i % 7 == 0 {
16214                value | SAINT_MIN | SUFFIX_GROUP_MARKER
16215            } else if i % 11 == 0 {
16216                value | SUFFIX_GROUP_MARKER
16217            } else {
16218                value
16219            };
16220            base_1k[i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
16221        }
16222        let lms_count = base_1k.iter().filter(|&&v| v < 0).count();
16223
16224        let mut scalar = base_4k.clone();
16225        let mut threaded = base_4k;
16226        let mut scalar_state = alloc_thread_state(1).unwrap();
16227        let mut threaded_state = alloc_thread_state(4).unwrap();
16228        partial_sorting_gather_lms_suffixes_32s_4k_omp(
16229            &mut scalar,
16230            n as SaSint,
16231            1,
16232            &mut scalar_state,
16233        );
16234        partial_sorting_gather_lms_suffixes_32s_4k_omp(
16235            &mut threaded,
16236            n as SaSint,
16237            4,
16238            &mut threaded_state,
16239        );
16240        assert_eq!(&threaded[..lms_count], &scalar[..lms_count]);
16241
16242        let mut scalar = base_1k.clone();
16243        let mut threaded = base_1k;
16244        partial_sorting_gather_lms_suffixes_32s_1k_omp(
16245            &mut scalar,
16246            n as SaSint,
16247            1,
16248            &mut scalar_state,
16249        );
16250        partial_sorting_gather_lms_suffixes_32s_1k_omp(
16251            &mut threaded,
16252            n as SaSint,
16253            4,
16254            &mut threaded_state,
16255        );
16256        assert_eq!(&threaded[..lms_count], &scalar[..lms_count]);
16257    }
16258
16259    #[test]
16260    fn libsais16x64_partial_sorting_32s_block_helpers_behave_like_upstream_shapes() {
16261        let t = vec![0, 1, 2, 1, 0];
16262        let k = 3;
16263
16264        let mut sa = vec![0, 4 | SAINT_MIN, 0];
16265        let mut cache = vec![ThreadCache::default(); sa.len()];
16266        partial_sorting_scan_right_to_left_32s_6k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16267        assert_eq!(cache[1].index, 4 | SAINT_MIN);
16268        assert_eq!(cache[1].symbol, buckets_index4(1, 1) as SaSint);
16269
16270        let mut sa = vec![0, 4 | SUFFIX_GROUP_MARKER, 0];
16271        let mut cache = vec![ThreadCache::default(); sa.len()];
16272        partial_sorting_scan_right_to_left_32s_4k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16273        assert_eq!(sa[1], 0);
16274        assert_eq!(cache[1].index, 4 | SUFFIX_GROUP_MARKER);
16275        assert_eq!(cache[1].symbol, buckets_index2(1, 1) as SaSint);
16276
16277        let mut sa = vec![0, 4, 0];
16278        let mut cache = vec![ThreadCache::default(); sa.len()];
16279        partial_sorting_scan_right_to_left_32s_1k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16280        assert_eq!(sa[1], 0);
16281        assert_eq!(cache[1].index, 3 | SAINT_MIN);
16282        assert_eq!(cache[1].symbol, 1);
16283
16284        let mut sa = vec![4 | SAINT_MIN, 0, 0];
16285        let mut cache = vec![ThreadCache::default(); sa.len()];
16286        partial_sorting_scan_left_to_right_32s_6k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16287        assert_eq!(cache[0].index, 4 | SAINT_MIN);
16288        assert_eq!(cache[0].symbol, buckets_index4(1, 1) as SaSint);
16289
16290        let mut sa = vec![4 | SUFFIX_GROUP_MARKER, 0, 0];
16291        let mut cache = vec![ThreadCache::default(); sa.len()];
16292        partial_sorting_scan_left_to_right_32s_4k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16293        assert_eq!(sa[0], 0);
16294        assert_eq!(cache[0].index, 4 | SUFFIX_GROUP_MARKER);
16295        assert_eq!(cache[0].symbol, buckets_index2(1, 0) as SaSint);
16296
16297        let mut sa = vec![4, 0, 0];
16298        let mut cache = vec![ThreadCache::default(); sa.len()];
16299        partial_sorting_scan_left_to_right_32s_1k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16300        assert_eq!(sa[0], 0);
16301        assert_eq!(cache[0].index, 3);
16302        assert_eq!(cache[0].symbol, 1);
16303
16304        let mut cache = vec![ThreadCache::default(); 3];
16305        cache[1].index = 4 | SAINT_MIN;
16306        cache[1].symbol = buckets_index4(1, 1) as SaSint;
16307        let mut buckets = vec![0; 4 * k];
16308        buckets[buckets_index4(1, 1)] = 2;
16309        let d = partial_sorting_scan_right_to_left_32s_6k_block_sort(
16310            &t,
16311            &mut buckets,
16312            0,
16313            &mut cache,
16314            1,
16315            1,
16316        );
16317        assert_eq!(d, 1);
16318        assert_eq!(cache[1].index, 3 | SAINT_MIN);
16319        assert_eq!(buckets[buckets_index4(1, 1)], 1);
16320        assert_eq!(buckets[buckets_index4(1, 1) + 2], 1);
16321
16322        let mut cache = vec![ThreadCache::default(); 3];
16323        cache[0].index = 4 | SAINT_MIN;
16324        cache[0].symbol = buckets_index4(1, 1) as SaSint;
16325        let mut buckets = vec![0; 4 * k];
16326        buckets[buckets_index4(1, 1)] = 1;
16327        let d = partial_sorting_scan_left_to_right_32s_6k_block_sort(
16328            &t,
16329            &mut buckets,
16330            0,
16331            &mut cache,
16332            0,
16333            1,
16334        );
16335        assert_eq!(d, 1);
16336        assert_eq!(cache[0].index, 3 | SAINT_MIN);
16337        assert_eq!(buckets[buckets_index4(1, 1)], 2);
16338        assert_eq!(buckets[buckets_index4(1, 1) + 2], 1);
16339
16340        let mut cache = vec![ThreadCache::default(); 3];
16341        cache[1].index = 4 | SUFFIX_GROUP_MARKER;
16342        cache[1].symbol = buckets_index2(1, 1) as SaSint;
16343        let mut buckets = vec![0; 4 * k];
16344        buckets[3 * k + 1] = 2;
16345        let d = partial_sorting_scan_right_to_left_32s_4k_block_sort(
16346            &t,
16347            k as SaSint,
16348            &mut buckets,
16349            0,
16350            &mut cache,
16351            1,
16352            1,
16353        );
16354        assert_eq!(d, 1);
16355        assert_eq!(cache[1].symbol, 1);
16356        assert_eq!(buckets[3 * k + 1], 1);
16357
16358        let mut cache = vec![ThreadCache::default(); 3];
16359        cache[0].index = 4 | SUFFIX_GROUP_MARKER;
16360        cache[0].symbol = buckets_index2(1, 0) as SaSint;
16361        let mut buckets = vec![0; 4 * k];
16362        buckets[2 * k + 1] = 1;
16363        let d = partial_sorting_scan_left_to_right_32s_4k_block_sort(
16364            &t,
16365            k as SaSint,
16366            &mut buckets,
16367            0,
16368            &mut cache,
16369            0,
16370            1,
16371        );
16372        assert_eq!(d, 1);
16373        assert_eq!(cache[0].symbol, 1);
16374        assert_eq!(buckets[2 * k + 1], 2);
16375
16376        let mut cache = vec![ThreadCache::default(); 3];
16377        cache[1].index = 4;
16378        cache[1].symbol = 1;
16379        let mut buckets = vec![0; k];
16380        buckets[1] = 2;
16381        partial_sorting_scan_right_to_left_32s_1k_block_sort(&t, &mut buckets, &mut cache, 1, 1);
16382        assert_eq!(cache[1].symbol, 1);
16383        assert_eq!(buckets[1], 1);
16384
16385        let mut cache = vec![ThreadCache::default(); 3];
16386        cache[0].index = 4;
16387        cache[0].symbol = 1;
16388        let mut buckets = vec![0; k];
16389        buckets[1] = 1;
16390        partial_sorting_scan_left_to_right_32s_1k_block_sort(&t, &mut buckets, &mut cache, 0, 1);
16391        assert_eq!(cache[0].symbol, 1);
16392        assert_eq!(buckets[1], 2);
16393    }
16394
16395    #[test]
16396    fn libsais16x64_partial_sorting_scan_32s_match_c() {
16397        let t = vec![0, 1, 2, 1, 3, 0];
16398        let k = 4;
16399
16400        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16401        let mut c_sa = rust_sa.clone();
16402        let mut rust_buckets = vec![0; 6 * k as usize];
16403        rust_buckets[buckets_index4(2, 0)] = 4;
16404        rust_buckets[buckets_index4(1, 1)] = 5;
16405        let mut c_buckets = rust_buckets.clone();
16406        let rust_d =
16407            partial_sorting_scan_left_to_right_32s_6k(&t, &mut rust_sa, &mut rust_buckets, 0, 0, 2);
16408        let c_d = unsafe {
16409            probe_libsais16x64_partial_sorting_scan_left_to_right_32s_6k(
16410                t.as_ptr(),
16411                c_sa.as_mut_ptr(),
16412                c_buckets.as_mut_ptr(),
16413                0,
16414                0,
16415                2,
16416            )
16417        };
16418        assert_eq!(rust_d, c_d);
16419        assert_eq!(rust_sa, c_sa);
16420        assert_eq!(rust_buckets, c_buckets);
16421
16422        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16423        let mut c_sa = rust_sa.clone();
16424        let mut rust_buckets = vec![0; 4 * k as usize];
16425        rust_buckets[2 * k as usize + 2] = 4;
16426        rust_buckets[2 * k as usize + 1] = 5;
16427        let mut c_buckets = rust_buckets.clone();
16428        let rust_d = partial_sorting_scan_left_to_right_32s_4k(
16429            &t,
16430            &mut rust_sa,
16431            k,
16432            &mut rust_buckets,
16433            0,
16434            0,
16435            2,
16436        );
16437        let c_d = unsafe {
16438            probe_libsais16x64_partial_sorting_scan_left_to_right_32s_4k(
16439                t.as_ptr(),
16440                c_sa.as_mut_ptr(),
16441                k,
16442                c_buckets.as_mut_ptr(),
16443                0,
16444                0,
16445                2,
16446            )
16447        };
16448        assert_eq!(rust_d, c_d);
16449        assert_eq!(rust_sa, c_sa);
16450        assert_eq!(rust_buckets, c_buckets);
16451
16452        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16453        let mut c_sa = rust_sa.clone();
16454        let mut rust_buckets = vec![0, 5, 4, 0];
16455        let mut c_buckets = rust_buckets.clone();
16456        partial_sorting_scan_left_to_right_32s_1k(&t, &mut rust_sa, &mut rust_buckets, 0, 2);
16457        unsafe {
16458            probe_libsais16x64_partial_sorting_scan_left_to_right_32s_1k(
16459                t.as_ptr(),
16460                c_sa.as_mut_ptr(),
16461                c_buckets.as_mut_ptr(),
16462                0,
16463                2,
16464            );
16465        }
16466        assert_eq!(rust_sa, c_sa);
16467        assert_eq!(rust_buckets, c_buckets);
16468
16469        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16470        let mut c_sa = rust_sa.clone();
16471        let mut rust_buckets = vec![0; 6 * k as usize];
16472        rust_buckets[buckets_index4(2, 0)] = 7;
16473        rust_buckets[buckets_index4(1, 1)] = 6;
16474        let mut c_buckets = rust_buckets.clone();
16475        let rust_d =
16476            partial_sorting_scan_right_to_left_32s_6k(&t, &mut rust_sa, &mut rust_buckets, 0, 0, 2);
16477        let c_d = unsafe {
16478            probe_libsais16x64_partial_sorting_scan_right_to_left_32s_6k(
16479                t.as_ptr(),
16480                c_sa.as_mut_ptr(),
16481                c_buckets.as_mut_ptr(),
16482                0,
16483                0,
16484                2,
16485            )
16486        };
16487        assert_eq!(rust_d, c_d);
16488        assert_eq!(rust_sa, c_sa);
16489        assert_eq!(rust_buckets, c_buckets);
16490
16491        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16492        let mut c_sa = rust_sa.clone();
16493        let mut rust_buckets = vec![0; 4 * k as usize];
16494        rust_buckets[3 * k as usize + 2] = 7;
16495        rust_buckets[3 * k as usize + 1] = 6;
16496        let mut c_buckets = rust_buckets.clone();
16497        let rust_d = partial_sorting_scan_right_to_left_32s_4k(
16498            &t,
16499            &mut rust_sa,
16500            k,
16501            &mut rust_buckets,
16502            0,
16503            0,
16504            2,
16505        );
16506        let c_d = unsafe {
16507            probe_libsais16x64_partial_sorting_scan_right_to_left_32s_4k(
16508                t.as_ptr(),
16509                c_sa.as_mut_ptr(),
16510                k,
16511                c_buckets.as_mut_ptr(),
16512                0,
16513                0,
16514                2,
16515            )
16516        };
16517        assert_eq!(rust_d, c_d);
16518        assert_eq!(rust_sa, c_sa);
16519        assert_eq!(rust_buckets, c_buckets);
16520
16521        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16522        let mut c_sa = rust_sa.clone();
16523        let mut rust_buckets = vec![0, 6, 7, 0];
16524        let mut c_buckets = rust_buckets.clone();
16525        partial_sorting_scan_right_to_left_32s_1k(&t, &mut rust_sa, &mut rust_buckets, 0, 2);
16526        unsafe {
16527            probe_libsais16x64_partial_sorting_scan_right_to_left_32s_1k(
16528                t.as_ptr(),
16529                c_sa.as_mut_ptr(),
16530                c_buckets.as_mut_ptr(),
16531                0,
16532                2,
16533            );
16534        }
16535        assert_eq!(rust_sa, c_sa);
16536        assert_eq!(rust_buckets, c_buckets);
16537
16538        let mut state = alloc_thread_state(1).unwrap();
16539        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 7, 9];
16540        let mut c_sa = rust_sa.clone();
16541        let mut rust_buckets = vec![0; 6 * k as usize];
16542        rust_buckets[buckets_index4(2, 0)] = 4;
16543        rust_buckets[buckets_index4(1, 1)] = 5;
16544        rust_buckets[buckets_index4(3, 0)] = 6;
16545        let mut c_buckets = rust_buckets.clone();
16546        let rust_d = partial_sorting_scan_left_to_right_32s_6k_omp(
16547            &t,
16548            &mut rust_sa,
16549            5,
16550            &mut rust_buckets,
16551            2,
16552            0,
16553            1,
16554            &mut state,
16555        );
16556        let c_d = unsafe {
16557            probe_libsais16x64_partial_sorting_scan_left_to_right_32s_6k_omp(
16558                t.as_ptr(),
16559                c_sa.as_mut_ptr(),
16560                5,
16561                c_buckets.as_mut_ptr(),
16562                2,
16563                0,
16564                1,
16565            )
16566        };
16567        assert_eq!(rust_d, c_d);
16568        assert_eq!(rust_sa, c_sa);
16569        assert_eq!(rust_buckets, c_buckets);
16570
16571        let mut state = alloc_thread_state(1).unwrap();
16572        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 7, 9];
16573        let mut c_sa = rust_sa.clone();
16574        let mut rust_buckets = vec![0; 4 * k as usize];
16575        rust_buckets[2 * k as usize + 2] = 4;
16576        rust_buckets[2 * k as usize + 1] = 5;
16577        rust_buckets[2 * k as usize + 3] = 6;
16578        let mut c_buckets = rust_buckets.clone();
16579        let rust_d = partial_sorting_scan_left_to_right_32s_4k_omp(
16580            &t,
16581            &mut rust_sa,
16582            5,
16583            k,
16584            &mut rust_buckets,
16585            0,
16586            1,
16587            &mut state,
16588        );
16589        let c_d = unsafe {
16590            probe_libsais16x64_partial_sorting_scan_left_to_right_32s_4k_omp(
16591                t.as_ptr(),
16592                c_sa.as_mut_ptr(),
16593                5,
16594                k,
16595                c_buckets.as_mut_ptr(),
16596                0,
16597                1,
16598            )
16599        };
16600        assert_eq!(rust_d, c_d);
16601        assert_eq!(rust_sa, c_sa);
16602        assert_eq!(rust_buckets, c_buckets);
16603
16604        let mut state = alloc_thread_state(1).unwrap();
16605        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 7, 9];
16606        let mut c_sa = rust_sa.clone();
16607        let mut rust_buckets = vec![0, 5, 4, 6];
16608        let mut c_buckets = rust_buckets.clone();
16609        partial_sorting_scan_left_to_right_32s_1k_omp(
16610            &t,
16611            &mut rust_sa,
16612            5,
16613            &mut rust_buckets,
16614            1,
16615            &mut state,
16616        );
16617        unsafe {
16618            probe_libsais16x64_partial_sorting_scan_left_to_right_32s_1k_omp(
16619                t.as_ptr(),
16620                c_sa.as_mut_ptr(),
16621                5,
16622                c_buckets.as_mut_ptr(),
16623                1,
16624            );
16625        }
16626        assert_eq!(rust_sa, c_sa);
16627        assert_eq!(rust_buckets, c_buckets);
16628
16629        let mut state = alloc_thread_state(1).unwrap();
16630        let mut rust_sa = vec![0, 0, 3, 4, 9, 9, 9, 9];
16631        let mut c_sa = rust_sa.clone();
16632        let mut rust_buckets = vec![0; 6 * k as usize];
16633        rust_buckets[buckets_index4(2, 0)] = 7;
16634        rust_buckets[buckets_index4(1, 1)] = 6;
16635        let mut c_buckets = rust_buckets.clone();
16636        let rust_d = partial_sorting_scan_right_to_left_32s_6k_omp(
16637            &t,
16638            &mut rust_sa,
16639            5,
16640            &mut rust_buckets,
16641            1,
16642            1,
16643            0,
16644            1,
16645            &mut state,
16646        );
16647        let c_d = unsafe {
16648            probe_libsais16x64_partial_sorting_scan_right_to_left_32s_6k_omp(
16649                t.as_ptr(),
16650                c_sa.as_mut_ptr(),
16651                5,
16652                c_buckets.as_mut_ptr(),
16653                1,
16654                1,
16655                0,
16656                1,
16657            )
16658        };
16659        assert_eq!(rust_d, c_d);
16660        assert_eq!(rust_sa, c_sa);
16661        assert_eq!(rust_buckets, c_buckets);
16662
16663        let mut state = alloc_thread_state(1).unwrap();
16664        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16665        let mut c_sa = rust_sa.clone();
16666        let mut rust_buckets = vec![0; 4 * k as usize];
16667        rust_buckets[3 * k as usize + 2] = 7;
16668        rust_buckets[3 * k as usize + 1] = 6;
16669        let mut c_buckets = rust_buckets.clone();
16670        let rust_d = partial_sorting_scan_right_to_left_32s_4k_omp(
16671            &t,
16672            &mut rust_sa,
16673            2,
16674            k,
16675            &mut rust_buckets,
16676            0,
16677            1,
16678            &mut state,
16679        );
16680        let c_d = unsafe {
16681            probe_libsais16x64_partial_sorting_scan_right_to_left_32s_4k_omp(
16682                t.as_ptr(),
16683                c_sa.as_mut_ptr(),
16684                2,
16685                k,
16686                c_buckets.as_mut_ptr(),
16687                0,
16688                1,
16689            )
16690        };
16691        assert_eq!(rust_d, c_d);
16692        assert_eq!(rust_sa, c_sa);
16693        assert_eq!(rust_buckets, c_buckets);
16694
16695        let mut state = alloc_thread_state(1).unwrap();
16696        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16697        let mut c_sa = rust_sa.clone();
16698        let mut rust_buckets = vec![0, 6, 7, 0];
16699        let mut c_buckets = rust_buckets.clone();
16700        partial_sorting_scan_right_to_left_32s_1k_omp(
16701            &t,
16702            &mut rust_sa,
16703            2,
16704            &mut rust_buckets,
16705            1,
16706            &mut state,
16707        );
16708        unsafe {
16709            probe_libsais16x64_partial_sorting_scan_right_to_left_32s_1k_omp(
16710                t.as_ptr(),
16711                c_sa.as_mut_ptr(),
16712                2,
16713                c_buckets.as_mut_ptr(),
16714                1,
16715            );
16716        }
16717        assert_eq!(rust_sa, c_sa);
16718        assert_eq!(rust_buckets, c_buckets);
16719    }
16720
16721    #[test]
16722    fn libsais16x64_place_lms_suffixes_histogram_32s_match_c() {
16723        let n = 12;
16724        let k = 4;
16725        let m = 4;
16726        let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
16727        let mut c_sa = rust_sa.clone();
16728        let mut buckets = vec![0; 2 * k as usize];
16729        buckets[buckets_index2(1, 0)] = 7;
16730        buckets[buckets_index2(1, 1)] = 2;
16731        buckets[buckets_index2(2, 0)] = 10;
16732        buckets[buckets_index2(2, 1)] = 1;
16733        place_lms_suffixes_histogram_32s_2k(&mut rust_sa, n, k, m, &buckets);
16734        unsafe {
16735            probe_libsais16x64_place_lms_suffixes_histogram_32s_2k(
16736                c_sa.as_mut_ptr(),
16737                n,
16738                k,
16739                m,
16740                buckets.as_ptr(),
16741            );
16742        }
16743        assert_eq!(rust_sa, c_sa);
16744
16745        let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
16746        let mut c_sa = rust_sa.clone();
16747        let mut buckets = vec![0; 4 * k as usize];
16748        buckets[buckets_index2(1, 1)] = 2;
16749        buckets[buckets_index2(2, 1)] = 1;
16750        buckets[3 * k as usize + 1] = 7;
16751        buckets[3 * k as usize + 2] = 10;
16752        place_lms_suffixes_histogram_32s_4k(&mut rust_sa, n, k, m, &buckets);
16753        unsafe {
16754            probe_libsais16x64_place_lms_suffixes_histogram_32s_4k(
16755                c_sa.as_mut_ptr(),
16756                n,
16757                k,
16758                m,
16759                buckets.as_ptr(),
16760            );
16761        }
16762        assert_eq!(rust_sa, c_sa);
16763
16764        let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
16765        let mut c_sa = rust_sa.clone();
16766        let mut buckets = vec![0; 6 * k as usize];
16767        buckets[buckets_index4(1, 1)] = 2;
16768        buckets[buckets_index4(2, 1)] = 1;
16769        buckets[5 * k as usize + 1] = 7;
16770        buckets[5 * k as usize + 2] = 10;
16771        place_lms_suffixes_histogram_32s_6k(&mut rust_sa, n, k, m, &buckets);
16772        unsafe {
16773            probe_libsais16x64_place_lms_suffixes_histogram_32s_6k(
16774                c_sa.as_mut_ptr(),
16775                n,
16776                k,
16777                m,
16778                buckets.as_ptr(),
16779            );
16780        }
16781        assert_eq!(rust_sa, c_sa);
16782    }
16783
16784    #[test]
16785    fn libsais16x64_count_gather_lms_suffixes_32s_match_c() {
16786        let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
16787        let n = t.len() as SaSint;
16788        let k = 4;
16789
16790        let mut rust_sa = vec![0; t.len()];
16791        let mut c_sa = rust_sa.clone();
16792        let rust_m = gather_lms_suffixes_32s(&t, &mut rust_sa, n);
16793        let c_m =
16794            unsafe { probe_libsais16x64_gather_lms_suffixes_32s(t.as_ptr(), c_sa.as_mut_ptr(), n) };
16795        assert_eq!(rust_m, c_m);
16796        assert_eq!(rust_sa, c_sa);
16797
16798        let compact_t = vec![2, SAINT_MIN | 1, 3, 1, SAINT_MIN | 2, 0, 1, 0];
16799        let mut rust_sa = vec![0; compact_t.len()];
16800        let mut c_sa = rust_sa.clone();
16801        let rust_m = gather_compacted_lms_suffixes_32s(&compact_t, &mut rust_sa, n);
16802        let c_m = unsafe {
16803            probe_libsais16x64_gather_compacted_lms_suffixes_32s(
16804                compact_t.as_ptr(),
16805                c_sa.as_mut_ptr(),
16806                n,
16807            )
16808        };
16809        assert_eq!(rust_m, c_m);
16810        assert_eq!(rust_sa, c_sa);
16811
16812        let mut rust_buckets = vec![99; 2 * k as usize];
16813        let mut c_buckets = rust_buckets.clone();
16814        count_lms_suffixes_32s_2k(&t, n, k, &mut rust_buckets);
16815        unsafe {
16816            probe_libsais16x64_count_lms_suffixes_32s_2k(t.as_ptr(), n, k, c_buckets.as_mut_ptr());
16817        }
16818        assert_eq!(rust_buckets, c_buckets);
16819
16820        let mut rust_sa = vec![0; t.len()];
16821        let mut c_sa = rust_sa.clone();
16822        let mut rust_buckets = vec![0; 2 * k as usize];
16823        let mut c_buckets = rust_buckets.clone();
16824        let rust_m = count_and_gather_lms_suffixes_32s_2k(
16825            &t,
16826            &mut rust_sa,
16827            n,
16828            k,
16829            &mut rust_buckets,
16830            0,
16831            n as isize,
16832        );
16833        let c_m = unsafe {
16834            probe_libsais16x64_count_and_gather_lms_suffixes_32s_2k(
16835                t.as_ptr(),
16836                c_sa.as_mut_ptr(),
16837                n,
16838                k,
16839                c_buckets.as_mut_ptr(),
16840                0,
16841                n,
16842            )
16843        };
16844        assert_eq!(rust_m, c_m);
16845        assert_eq!(rust_sa, c_sa);
16846        assert_eq!(rust_buckets, c_buckets);
16847
16848        let mut rust_sa = vec![0; compact_t.len()];
16849        let mut c_sa = rust_sa.clone();
16850        let mut rust_buckets = vec![0; 2 * k as usize];
16851        let mut c_buckets = rust_buckets.clone();
16852        let rust_m = count_and_gather_compacted_lms_suffixes_32s_2k(
16853            &compact_t,
16854            &mut rust_sa,
16855            n,
16856            k,
16857            &mut rust_buckets,
16858            0,
16859            n as isize,
16860        );
16861        let c_m = unsafe {
16862            probe_libsais16x64_count_and_gather_compacted_lms_suffixes_32s_2k(
16863                compact_t.as_ptr(),
16864                c_sa.as_mut_ptr(),
16865                n,
16866                k,
16867                c_buckets.as_mut_ptr(),
16868                0,
16869                n,
16870            )
16871        };
16872        assert_eq!(rust_m, c_m);
16873        assert_eq!(rust_sa, c_sa);
16874        assert_eq!(rust_buckets, c_buckets);
16875    }
16876
16877    #[test]
16878    fn libsais16x64_small_openmp_leaf_helpers_match_upstream_shapes() {
16879        let sa = [-1, 0, 3, SAINT_MIN, 0, 7, -5];
16880        assert_eq!(count_negative_marked_suffixes(&sa, 1, 5), 1);
16881        assert_eq!(count_zero_marked_suffixes(&sa, 1, 5), 2);
16882
16883        let mut buckets = vec![1, 2, 3, 0, 4, 5, 6, 0, 7, 8, 9, 0, 10, 11, 12, 0];
16884        accumulate_counts_s32_4(&mut buckets, 12, 3, 4);
16885        assert_eq!(&buckets[12..15], &[22, 26, 30]);
16886
16887        let mut many = Vec::new();
16888        for bucket in 0..10 {
16889            many.extend([bucket, bucket + 1, bucket + 2, 0]);
16890        }
16891        accumulate_counts_s32(&mut many, 36, 3, 4, 10);
16892        assert_eq!(&many[36..39], &[45, 55, 65]);
16893
16894        let t = [1, SAINT_MIN | 2, 0];
16895        let mut compacted_buckets = vec![0; 6];
16896        count_compacted_lms_suffixes_32s_2k(&t, t.len() as SaSint, 3, &mut compacted_buckets);
16897        assert_eq!(compacted_buckets, vec![1, 0, 1, 0, 0, 1]);
16898
16899        let unique_sa = [0, 2, 4, 6, 0, -10, 20, -30];
16900        assert_eq!(count_unique_suffixes(&unique_sa, 4, 0, 4), 2);
16901
16902        let s = [10u32, 11, 12, 13];
16903        let mut d = [0u64; 4];
16904        convert_32u_to_64u(&s, &mut d, 1, 2);
16905        assert_eq!(d, [0, 11, 12, 0]);
16906
16907        let mut words = [10u32, 11, 12, 13, 99, 99, 99, 99];
16908        convert_inplace_32u_to_64u(&mut words, 0, 4);
16909        assert_eq!(words, [10, 0, 11, 0, 12, 0, 13, 0]);
16910        convert_inplace_64u_to_32u(&mut words, 0, 4);
16911        assert_eq!(&words[..4], &[10, 11, 12, 13]);
16912
16913        let mut words = [20u32, 21, 22, 23, 99, 99, 99, 99];
16914        convert_inplace_32u_to_64u_omp(&mut words, 4, 2);
16915        assert_eq!(words, [20, 0, 21, 0, 22, 0, 23, 0]);
16916
16917        assert_eq!(get_bucket_stride(20_000, 1000, 4), 1024);
16918        assert_eq!(get_bucket_stride(3024, 1001, 4), 1008);
16919        assert_eq!(get_bucket_stride(3000, 1001, 4), 1001);
16920    }
16921
16922    #[test]
16923    fn libsais16x64_count_gather_lms_suffixes_32s_omp_wrappers_match_c() {
16924        let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
16925        let n = t.len() as SaSint;
16926        let k = 4;
16927        let mut rust_sa = vec![0; t.len()];
16928        let mut c_sa = rust_sa.clone();
16929        let mut rust_buckets = vec![0; 2 * k as usize];
16930        let mut c_buckets = rust_buckets.clone();
16931        let mut rust_state = alloc_thread_state(1).unwrap();
16932        let rust_m = count_and_gather_lms_suffixes_32s_2k_omp(
16933            &t,
16934            &mut rust_sa,
16935            n,
16936            k,
16937            &mut rust_buckets,
16938            0,
16939            1,
16940            &mut rust_state,
16941        );
16942        let c_m = unsafe {
16943            probe_libsais16x64_count_and_gather_lms_suffixes_32s_2k_omp(
16944                t.as_ptr(),
16945                c_sa.as_mut_ptr(),
16946                n,
16947                k,
16948                c_buckets.as_mut_ptr(),
16949                0,
16950                1,
16951            )
16952        };
16953        assert_eq!(rust_m, c_m);
16954        assert_eq!(rust_sa, c_sa);
16955        assert_eq!(rust_buckets, c_buckets);
16956
16957        let compact_t = vec![2, SAINT_MIN | 1, 3, 1, SAINT_MIN | 2, 0, 1, 0];
16958        let mut rust_sa = vec![0; compact_t.len()];
16959        let mut c_sa = rust_sa.clone();
16960        let mut rust_buckets = vec![0; 2 * k as usize];
16961        let mut c_buckets = rust_buckets.clone();
16962        let mut rust_state = alloc_thread_state(1).unwrap();
16963        count_and_gather_compacted_lms_suffixes_32s_2k_omp(
16964            &compact_t,
16965            &mut rust_sa,
16966            n,
16967            k,
16968            &mut rust_buckets,
16969            0,
16970            1,
16971            &mut rust_state,
16972        );
16973        unsafe {
16974            probe_libsais16x64_count_and_gather_compacted_lms_suffixes_32s_2k_omp(
16975                compact_t.as_ptr(),
16976                c_sa.as_mut_ptr(),
16977                n,
16978                k,
16979                c_buckets.as_mut_ptr(),
16980                0,
16981                1,
16982            );
16983        }
16984        assert_eq!(rust_sa, c_sa);
16985        assert_eq!(rust_buckets, c_buckets);
16986    }
16987
16988    #[test]
16989    fn libsais16x64_count_gather_lms_suffixes_32s_4k_match_c() {
16990        let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
16991        let n = t.len() as SaSint;
16992        let k = 4;
16993
16994        let mut rust_buckets = vec![77; 4 * k as usize];
16995        let mut c_buckets = vec![0; 4 * k as usize];
16996        let mut c_sa_for_count = vec![0; t.len()];
16997        count_lms_suffixes_32s_4k(&t, n, k, &mut rust_buckets);
16998        unsafe {
16999            probe_libsais16x64_count_and_gather_lms_suffixes_32s_4k(
17000                t.as_ptr(),
17001                c_sa_for_count.as_mut_ptr(),
17002                n,
17003                k,
17004                c_buckets.as_mut_ptr(),
17005                0,
17006                n,
17007            );
17008        }
17009        assert_eq!(rust_buckets, c_buckets);
17010
17011        let mut rust_sa = vec![0; t.len()];
17012        let mut c_sa = rust_sa.clone();
17013        let mut rust_buckets = vec![0; 4 * k as usize];
17014        let mut c_buckets = rust_buckets.clone();
17015        let rust_m = count_and_gather_lms_suffixes_32s_4k(
17016            &t,
17017            &mut rust_sa,
17018            n,
17019            k,
17020            &mut rust_buckets,
17021            0,
17022            n as isize,
17023        );
17024        let c_m = unsafe {
17025            probe_libsais16x64_count_and_gather_lms_suffixes_32s_4k(
17026                t.as_ptr(),
17027                c_sa.as_mut_ptr(),
17028                n,
17029                k,
17030                c_buckets.as_mut_ptr(),
17031                0,
17032                n,
17033            )
17034        };
17035        assert_eq!(rust_m, c_m);
17036        assert_eq!(rust_sa, c_sa);
17037        assert_eq!(rust_buckets, c_buckets);
17038
17039        let mut rust_sa = vec![0; t.len()];
17040        let mut c_sa = rust_sa.clone();
17041        let mut rust_buckets = vec![0; 4 * k as usize];
17042        let mut c_buckets = rust_buckets.clone();
17043        let mut rust_state = alloc_thread_state(1).unwrap();
17044        let rust_m = count_and_gather_lms_suffixes_32s_4k_omp(
17045            &t,
17046            &mut rust_sa,
17047            n,
17048            k,
17049            &mut rust_buckets,
17050            0,
17051            1,
17052            &mut rust_state,
17053        );
17054        let c_m = unsafe {
17055            probe_libsais16x64_count_and_gather_lms_suffixes_32s_4k_omp(
17056                t.as_ptr(),
17057                c_sa.as_mut_ptr(),
17058                n,
17059                k,
17060                c_buckets.as_mut_ptr(),
17061                0,
17062                1,
17063            )
17064        };
17065        assert_eq!(rust_m, c_m);
17066        assert_eq!(rust_sa, c_sa);
17067        assert_eq!(rust_buckets, c_buckets);
17068
17069        let mut rust_buckets = vec![91; k as usize];
17070        let mut c_buckets = rust_buckets.clone();
17071        count_suffixes_32s(&t, n, k, &mut rust_buckets);
17072        unsafe {
17073            probe_libsais16x64_count_suffixes_32s(t.as_ptr(), n, k, c_buckets.as_mut_ptr());
17074        }
17075        assert_eq!(rust_buckets, c_buckets);
17076    }
17077
17078    #[test]
17079    fn libsais16x64_initialize_buckets_32s_match_c() {
17080        let k = 4;
17081
17082        let base_6k = vec![
17083            1, 2, 0, 1, 0, 1, 2, 0, 3, 0, 1, 1, 2, 1, 0, 0, 9, 9, 9, 9, 8, 8, 8, 8,
17084        ];
17085        let mut rust = base_6k.clone();
17086        let mut c = base_6k.clone();
17087        initialize_buckets_start_and_end_32s_6k(k, &mut rust);
17088        unsafe { probe_libsais16x64_initialize_buckets_start_and_end_32s_6k(k, c.as_mut_ptr()) };
17089        assert_eq!(rust, c);
17090
17091        let base_4k = vec![1, 2, 0, 1, 3, 0, 2, 1, 9, 9, 9, 9, 8, 8, 8, 8];
17092        let mut rust = base_4k.clone();
17093        let mut c = base_4k.clone();
17094        initialize_buckets_start_and_end_32s_4k(k, &mut rust);
17095        unsafe { probe_libsais16x64_initialize_buckets_start_and_end_32s_4k(k, c.as_mut_ptr()) };
17096        assert_eq!(rust, c);
17097
17098        let base_2k = vec![1, 2, 0, 1, 3, 0, 2, 1];
17099        let mut rust = base_2k.clone();
17100        let mut c = base_2k.clone();
17101        initialize_buckets_end_32s_2k(k, &mut rust);
17102        unsafe { probe_libsais16x64_initialize_buckets_end_32s_2k(k, c.as_mut_ptr()) };
17103        assert_eq!(rust, c);
17104
17105        let mut rust = base_2k.clone();
17106        let mut c = base_2k.clone();
17107        initialize_buckets_start_and_end_32s_2k(k, &mut rust);
17108        unsafe { probe_libsais16x64_initialize_buckets_start_and_end_32s_2k(k, c.as_mut_ptr()) };
17109        assert_eq!(rust, c);
17110
17111        let base_1k = vec![2, 1, 3, 2];
17112        let mut rust = base_1k.clone();
17113        let mut c = base_1k.clone();
17114        initialize_buckets_start_32s_1k(k, &mut rust);
17115        unsafe { probe_libsais16x64_initialize_buckets_start_32s_1k(k, c.as_mut_ptr()) };
17116        assert_eq!(rust, c);
17117
17118        let mut rust = base_1k.clone();
17119        let mut c = base_1k.clone();
17120        initialize_buckets_end_32s_1k(k, &mut rust);
17121        unsafe { probe_libsais16x64_initialize_buckets_end_32s_1k(k, c.as_mut_ptr()) };
17122        assert_eq!(rust, c);
17123
17124        let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17125        let mut rust = vec![1, 2, 0, 1, 3, 0, 2, 1];
17126        let mut c = rust.clone();
17127        initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(&t, k, &mut rust, 4);
17128        unsafe {
17129            probe_libsais16x64_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
17130                t.as_ptr(),
17131                k,
17132                c.as_mut_ptr(),
17133                4,
17134            );
17135        }
17136        assert_eq!(rust, c);
17137
17138        let mut rust = vec![
17139            1, 2, 0, 1, 3, 0, 2, 1, 1, 0, 2, 0, 0, 1, 1, 0, 9, 9, 9, 9, 8, 8, 8, 8,
17140        ];
17141        let mut c = rust.clone();
17142        let rust_sum = initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(&t, k, &mut rust, 4);
17143        let c_sum = unsafe {
17144            probe_libsais16x64_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
17145                t.as_ptr(),
17146                k,
17147                c.as_mut_ptr(),
17148                4,
17149            )
17150        };
17151        assert_eq!(rust_sum, c_sum);
17152        assert_eq!(rust, c);
17153
17154        let mut rust = base_4k.clone();
17155        let mut c = base_4k;
17156        initialize_buckets_for_radix_and_partial_sorting_32s_4k(&t, k, &mut rust, 4);
17157        unsafe {
17158            probe_libsais16x64_initialize_buckets_for_radix_and_partial_sorting_32s_4k(
17159                t.as_ptr(),
17160                k,
17161                c.as_mut_ptr(),
17162                4,
17163            );
17164        }
17165        assert_eq!(rust, c);
17166    }
17167
17168    #[test]
17169    fn libsais16x64_place_lms_suffixes_interval_32s_match_c() {
17170        let n = 12;
17171        let k = 4;
17172        let m = 4;
17173
17174        let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
17175        let mut c_sa = rust_sa.clone();
17176        let mut buckets = vec![0; 4 * k as usize];
17177        buckets[buckets_index2(0, 1)] = 2;
17178        buckets[buckets_index2(1, 1)] = 2;
17179        buckets[buckets_index2(2, 1)] = 3;
17180        buckets[buckets_index2(2, 1) + buckets_index2(1, 0)] = 4;
17181        buckets[3 * k as usize + 1] = 7;
17182        buckets[3 * k as usize + 2] = 10;
17183        place_lms_suffixes_interval_32s_4k(&mut rust_sa, n, k, m, &buckets);
17184        unsafe {
17185            probe_libsais16x64_place_lms_suffixes_interval_32s_4k(
17186                c_sa.as_mut_ptr(),
17187                n,
17188                k,
17189                m,
17190                buckets.as_ptr(),
17191            );
17192        }
17193        assert_eq!(rust_sa, c_sa);
17194
17195        let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
17196        let mut c_sa = rust_sa.clone();
17197        let mut buckets = vec![0; 2 * k as usize];
17198        buckets[buckets_index2(1, 0)] = 7;
17199        buckets[buckets_index2(0, 1)] = 1;
17200        buckets[buckets_index2(1, 1)] = 1;
17201        buckets[buckets_index2(2, 0)] = 10;
17202        buckets[buckets_index2(2, 1)] = 2;
17203        buckets[buckets_index2(3, 1)] = 3;
17204        place_lms_suffixes_interval_32s_2k(&mut rust_sa, n, k, m, &buckets);
17205        unsafe {
17206            probe_libsais16x64_place_lms_suffixes_interval_32s_2k(
17207                c_sa.as_mut_ptr(),
17208                n,
17209                k,
17210                m,
17211                buckets.as_ptr(),
17212            );
17213        }
17214        assert_eq!(rust_sa, c_sa);
17215
17216        let t = vec![0, 1, 2, 1, 2, 3, 1, 3, 0, 0, 0, 0];
17217        let mut rust_sa = vec![1, 3, 4, 7, 9, 9, 9, 9, 9, 9, 9, 9];
17218        let mut c_sa = rust_sa.clone();
17219        let rust_buckets = vec![0, 3, 6, 10];
17220        let mut c_buckets = rust_buckets.clone();
17221        place_lms_suffixes_interval_32s_1k(&t, &mut rust_sa, k, m, &rust_buckets);
17222        unsafe {
17223            probe_libsais16x64_place_lms_suffixes_interval_32s_1k(
17224                t.as_ptr(),
17225                c_sa.as_mut_ptr(),
17226                k,
17227                m,
17228                c_buckets.as_mut_ptr(),
17229            );
17230        }
17231        assert_eq!(rust_sa, c_sa);
17232        assert_eq!(rust_buckets, c_buckets);
17233    }
17234
17235    #[test]
17236    fn libsais16x64_renumber_and_mark_distinct_lms_suffixes_32s_1k_matches_c() {
17237        let rust_t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17238        let n = rust_t.len() as SaSint;
17239        let mut probe_sa = vec![0; rust_t.len()];
17240        let m = gather_lms_suffixes_32s(&rust_t, &mut probe_sa, n);
17241        let mut rust_sa = vec![0; rust_t.len()];
17242        let mut c_t = rust_t.clone();
17243        let mut c_sa = rust_sa.clone();
17244
17245        let rust_name =
17246            renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(&rust_t, &mut rust_sa, n, m, 1);
17247        let c_name = unsafe {
17248            probe_libsais16x64_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
17249                c_t.as_mut_ptr(),
17250                c_sa.as_mut_ptr(),
17251                n,
17252                m,
17253                1,
17254            )
17255        };
17256        assert_eq!(rust_name, c_name);
17257        assert_eq!(rust_t, c_t);
17258        assert_eq!(rust_sa, c_sa);
17259    }
17260
17261    #[test]
17262    fn libsais16x64_reconstruct_compacted_lms_suffixes_32s_match_c() {
17263        let n = 8;
17264        let k = 4;
17265        let fs = 0;
17266        let f = 0;
17267        let mut m_probe_sa = vec![0; n as usize];
17268        let m = gather_lms_suffixes_32s(&[2, 1, 3, 1, 2, 0, 1, 0], &mut m_probe_sa, n);
17269
17270        let mut rust_t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17271        let mut c_t = rust_t.clone();
17272        let mut rust_sa = vec![0; n as usize];
17273        let mut c_sa = rust_sa.clone();
17274        let mut rust_buckets = vec![0; 2 * k as usize];
17275        let mut c_buckets = rust_buckets.clone();
17276        let mut rust_thread_state = alloc_thread_state(1).unwrap();
17277        reconstruct_compacted_lms_suffixes_32s_2k_omp(
17278            &mut rust_t,
17279            &mut rust_sa,
17280            n,
17281            k,
17282            m,
17283            fs,
17284            f,
17285            &mut rust_buckets,
17286            0,
17287            1,
17288            &mut rust_thread_state,
17289        );
17290        unsafe {
17291            probe_libsais16x64_reconstruct_compacted_lms_suffixes_32s_2k_omp(
17292                c_t.as_mut_ptr(),
17293                c_sa.as_mut_ptr(),
17294                n,
17295                k,
17296                m,
17297                fs,
17298                f,
17299                c_buckets.as_mut_ptr(),
17300                0,
17301                1,
17302            );
17303        }
17304        assert_eq!(rust_t, c_t);
17305        assert_eq!(rust_sa, c_sa);
17306        assert_eq!(rust_buckets, c_buckets);
17307
17308        let mut rust_t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17309        let mut c_t = rust_t.clone();
17310        let mut rust_sa = vec![0; n as usize];
17311        let mut c_sa = rust_sa.clone();
17312        reconstruct_compacted_lms_suffixes_32s_1k_omp(&mut rust_t, &mut rust_sa, n, m, fs, f, 1);
17313        unsafe {
17314            probe_libsais16x64_reconstruct_compacted_lms_suffixes_32s_1k_omp(
17315                c_t.as_mut_ptr(),
17316                c_sa.as_mut_ptr(),
17317                n,
17318                m,
17319                fs,
17320                f,
17321                1,
17322            );
17323        }
17324        assert_eq!(rust_t, c_t);
17325        assert_eq!(rust_sa, c_sa);
17326    }
17327
17328    #[test]
17329    fn libsais16x64_partial_omp_wrappers_match_c() {
17330        let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
17331        let mut c_sa = rust_sa.clone();
17332        let mut c_buckets = rust_buckets.clone();
17333
17334        let rust_d = partial_sorting_scan_left_to_right_16u_omp(
17335            &text,
17336            &mut rust_sa,
17337            text.len() as SaSint,
17338            8,
17339            &mut rust_buckets,
17340            5,
17341            3,
17342            1,
17343        );
17344        let c_d = unsafe {
17345            probe_libsais16x64_partial_sorting_scan_left_to_right_16u_omp(
17346                text.as_ptr(),
17347                c_sa.as_mut_ptr(),
17348                text.len() as SaSint,
17349                8,
17350                c_buckets.as_mut_ptr(),
17351                5,
17352                3,
17353                1,
17354            )
17355        };
17356        assert_eq!(rust_d, c_d);
17357        assert_eq!(rust_sa, c_sa);
17358        assert_eq!(rust_buckets, c_buckets);
17359
17360        let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
17361        rust_sa[6..10].copy_from_slice(&[3, 5 | SAINT_MIN, 7, 9 | SAINT_MIN]);
17362        let mut c_sa = rust_sa.clone();
17363        let mut c_buckets = rust_buckets.clone();
17364        partial_sorting_scan_right_to_left_16u_omp(
17365            &text,
17366            &mut rust_sa,
17367            text.len() as SaSint,
17368            8,
17369            &mut rust_buckets,
17370            0,
17371            5,
17372            3,
17373            1,
17374        );
17375        unsafe {
17376            probe_libsais16x64_partial_sorting_scan_right_to_left_16u_omp(
17377                text.as_ptr(),
17378                c_sa.as_mut_ptr(),
17379                text.len() as SaSint,
17380                8,
17381                c_buckets.as_mut_ptr(),
17382                0,
17383                5,
17384                3,
17385                1,
17386            );
17387        }
17388        assert_eq!(rust_sa, c_sa);
17389        assert_eq!(rust_buckets, c_buckets);
17390
17391        let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
17392        rust_sa[6..10].copy_from_slice(&[3, 5 | SAINT_MIN, 7, 9 | SAINT_MIN]);
17393        let mut c_sa = rust_sa.clone();
17394        let mut c_buckets = rust_buckets.clone();
17395        partial_gsa_scan_right_to_left_16u_omp(
17396            &text,
17397            &mut rust_sa,
17398            text.len() as SaSint,
17399            8,
17400            &mut rust_buckets,
17401            0,
17402            5,
17403            3,
17404            1,
17405        );
17406        unsafe {
17407            probe_libsais16x64_partial_gsa_scan_right_to_left_16u_omp(
17408                text.as_ptr(),
17409                c_sa.as_mut_ptr(),
17410                text.len() as SaSint,
17411                8,
17412                c_buckets.as_mut_ptr(),
17413                0,
17414                5,
17415                3,
17416                1,
17417            );
17418        }
17419        assert_eq!(rust_sa, c_sa);
17420        assert_eq!(rust_buckets, c_buckets);
17421    }
17422
17423    #[test]
17424    fn libsais16x64_final_omp_wrappers_match_c() {
17425        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17426        let mut c_sa = rust_sa.clone();
17427        let mut c_bucket = rust_bucket.clone();
17428        final_bwt_scan_left_to_right_16u_omp(
17429            &text,
17430            &mut rust_sa,
17431            text.len() as SaSint,
17432            8,
17433            &mut rust_bucket,
17434            1,
17435        );
17436        unsafe {
17437            probe_libsais16x64_final_bwt_scan_left_to_right_16u_omp(
17438                text.as_ptr(),
17439                c_sa.as_mut_ptr(),
17440                text.len() as SaSint,
17441                8,
17442                c_bucket.as_mut_ptr(),
17443                1,
17444            );
17445        }
17446        assert_eq!(rust_sa, c_sa);
17447        assert_eq!(rust_bucket, c_bucket);
17448
17449        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17450        let mut c_sa = rust_sa.clone();
17451        let mut c_bucket = rust_bucket.clone();
17452        let mut rust_i = vec![-1; 8];
17453        let mut c_i = rust_i.clone();
17454        final_bwt_aux_scan_left_to_right_16u_omp(
17455            &text,
17456            &mut rust_sa,
17457            text.len() as SaSint,
17458            8,
17459            1,
17460            &mut rust_i,
17461            &mut rust_bucket,
17462            1,
17463        );
17464        unsafe {
17465            probe_libsais16x64_final_bwt_aux_scan_left_to_right_16u_omp(
17466                text.as_ptr(),
17467                c_sa.as_mut_ptr(),
17468                text.len() as SaSint,
17469                8,
17470                1,
17471                c_i.as_mut_ptr(),
17472                c_bucket.as_mut_ptr(),
17473                1,
17474            );
17475        }
17476        assert_eq!(rust_sa, c_sa);
17477        assert_eq!(rust_bucket, c_bucket);
17478        assert_eq!(rust_i, c_i);
17479
17480        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17481        let mut c_sa = rust_sa.clone();
17482        let mut c_bucket = rust_bucket.clone();
17483        final_sorting_scan_left_to_right_16u_omp(
17484            &text,
17485            &mut rust_sa,
17486            text.len() as SaSint,
17487            8,
17488            &mut rust_bucket,
17489            1,
17490        );
17491        unsafe {
17492            probe_libsais16x64_final_sorting_scan_left_to_right_16u_omp(
17493                text.as_ptr(),
17494                c_sa.as_mut_ptr(),
17495                text.len() as SaSint,
17496                8,
17497                c_bucket.as_mut_ptr(),
17498                1,
17499            );
17500        }
17501        assert_eq!(rust_sa, c_sa);
17502        assert_eq!(rust_bucket, c_bucket);
17503
17504        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17505        let mut c_sa = rust_sa.clone();
17506        let mut c_bucket = rust_bucket.clone();
17507        let rust_index = final_bwt_scan_right_to_left_16u_omp(
17508            &text,
17509            &mut rust_sa,
17510            text.len() as SaSint,
17511            8,
17512            &mut rust_bucket,
17513            1,
17514        );
17515        let c_index = unsafe {
17516            probe_libsais16x64_final_bwt_scan_right_to_left_16u_omp(
17517                text.as_ptr(),
17518                c_sa.as_mut_ptr(),
17519                text.len() as SaSint,
17520                8,
17521                c_bucket.as_mut_ptr(),
17522                1,
17523            )
17524        };
17525        assert_eq!(rust_index, c_index);
17526        assert_eq!(rust_sa, c_sa);
17527        assert_eq!(rust_bucket, c_bucket);
17528
17529        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17530        let mut c_sa = rust_sa.clone();
17531        let mut c_bucket = rust_bucket.clone();
17532        let mut rust_i = vec![-1; 8];
17533        let mut c_i = rust_i.clone();
17534        final_bwt_aux_scan_right_to_left_16u_omp(
17535            &text,
17536            &mut rust_sa,
17537            text.len() as SaSint,
17538            8,
17539            1,
17540            &mut rust_i,
17541            &mut rust_bucket,
17542            1,
17543        );
17544        unsafe {
17545            probe_libsais16x64_final_bwt_aux_scan_right_to_left_16u_omp(
17546                text.as_ptr(),
17547                c_sa.as_mut_ptr(),
17548                text.len() as SaSint,
17549                8,
17550                1,
17551                c_i.as_mut_ptr(),
17552                c_bucket.as_mut_ptr(),
17553                1,
17554            );
17555        }
17556        assert_eq!(rust_sa, c_sa);
17557        assert_eq!(rust_bucket, c_bucket);
17558        assert_eq!(rust_i, c_i);
17559
17560        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17561        let mut c_sa = rust_sa.clone();
17562        let mut c_bucket = rust_bucket.clone();
17563        final_sorting_scan_right_to_left_16u_omp(&text, &mut rust_sa, 0, 6, 8, &mut rust_bucket, 1);
17564        unsafe {
17565            probe_libsais16x64_final_sorting_scan_right_to_left_16u_omp(
17566                text.as_ptr(),
17567                c_sa.as_mut_ptr(),
17568                0,
17569                6,
17570                8,
17571                c_bucket.as_mut_ptr(),
17572                1,
17573            );
17574        }
17575        assert_eq!(rust_sa, c_sa);
17576        assert_eq!(rust_bucket, c_bucket);
17577
17578        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17579        let mut c_sa = rust_sa.clone();
17580        let mut c_bucket = rust_bucket.clone();
17581        final_gsa_scan_right_to_left_16u_omp(&text, &mut rust_sa, 0, 6, 8, &mut rust_bucket, 1);
17582        unsafe {
17583            probe_libsais16x64_final_gsa_scan_right_to_left_16u_omp(
17584                text.as_ptr(),
17585                c_sa.as_mut_ptr(),
17586                0,
17587                6,
17588                8,
17589                c_bucket.as_mut_ptr(),
17590                1,
17591            );
17592        }
17593        assert_eq!(rust_sa, c_sa);
17594        assert_eq!(rust_bucket, c_bucket);
17595    }
17596
17597    #[test]
17598    fn libsais16x64_matches_bruteforce() {
17599        let t = [3, 1, 4, 1, 5, 9, 0, 2];
17600        let mut sa = vec![0; t.len()];
17601        let mut freq = vec![0; ALPHABET_SIZE];
17602        assert_eq!(libsais16x64(&t, &mut sa, 0, Some(&mut freq)), 0);
17603        assert_eq!(sa, brute_sa(&t));
17604        assert_eq!(freq[1], 2);
17605        assert_eq!(freq[9], 1);
17606    }
17607
17608    #[test]
17609    fn libsais16x64_bwt_round_trips() {
17610        let t = [2, 1, 3, 1, 2, 4, 1, 0];
17611        let mut bwt = vec![0; t.len()];
17612        let mut work = vec![0; t.len()];
17613        let primary = libsais16x64_bwt(&t, &mut bwt, &mut work, 0, None);
17614        assert!(primary > 0);
17615
17616        let mut restored = vec![0; t.len()];
17617        assert_eq!(
17618            libsais16x64_unbwt(&bwt, &mut restored, &mut work, None, primary),
17619            0
17620        );
17621        assert_eq!(restored, t);
17622    }
17623
17624    #[test]
17625    fn libsais16x64_plcp_lcp_are_consistent() {
17626        let t = [2, 1, 2, 1, 0];
17627        let sa = brute_sa(&t);
17628        let mut plcp = vec![0; t.len()];
17629        let mut lcp = vec![0; t.len()];
17630        assert_eq!(libsais16x64_plcp(&t, &sa, &mut plcp), 0);
17631        assert_eq!(libsais16x64_lcp(&plcp, &sa, &mut lcp), 0);
17632        assert_eq!(lcp[0], 0);
17633
17634        let mut named_plcp = vec![0; t.len()];
17635        assert_eq!(
17636            compute_phi_omp(&sa, &mut named_plcp, t.len() as SaSint, 1),
17637            0
17638        );
17639        assert_eq!(
17640            compute_plcp_omp(&t, &mut named_plcp, t.len() as SaSint, 1),
17641            0
17642        );
17643        assert_eq!(named_plcp, plcp);
17644
17645        let mut named_lcp = vec![0; t.len()];
17646        assert_eq!(
17647            compute_lcp_omp(&named_plcp, &sa, &mut named_lcp, t.len() as SaSint, 1),
17648            0
17649        );
17650        assert_eq!(named_lcp, lcp);
17651
17652        let mut gsa_plcp = vec![0; t.len()];
17653        let mut named_gsa_plcp = vec![0; t.len()];
17654        assert_eq!(libsais16x64_plcp_gsa(&t, &sa, &mut gsa_plcp), 0);
17655        assert_eq!(
17656            compute_phi_omp(&sa, &mut named_gsa_plcp, t.len() as SaSint, 1),
17657            0
17658        );
17659        assert_eq!(
17660            compute_plcp_gsa_omp(&t, &mut named_gsa_plcp, t.len() as SaSint, 1),
17661            0
17662        );
17663        assert_eq!(named_gsa_plcp, gsa_plcp);
17664    }
17665
17666    #[test]
17667    fn libsais16x64_bwt_copy_16u_omp_uses_block_partition_for_large_inputs() {
17668        let n = 65_600usize;
17669        let a: Vec<SaSint> = (0..n).map(|i| (i * 17) as SaSint).collect();
17670        let mut threaded = vec![0; n];
17671        let mut sequential = vec![0; n];
17672
17673        bwt_copy_16u_omp(&mut threaded, &a, n as SaSint, 4);
17674        bwt_copy_16u(&mut sequential, &a, n as SaSint);
17675
17676        assert_eq!(threaded, sequential);
17677    }
17678
17679    #[test]
17680    fn libsais16x64_plcp_lcp_omp_wrappers_match_single_thread_on_large_inputs() {
17681        let n = 65_600usize;
17682        let text: Vec<u16> = (0..n).map(|i| 1 + (i % 251) as u16).collect();
17683        let sa: Vec<SaSint> = (0..n as SaSint).collect();
17684
17685        let mut plcp_single = vec![0; n];
17686        let mut plcp_threaded = vec![0; n];
17687        assert_eq!(compute_phi_omp(&sa, &mut plcp_single, n as SaSint, 1), 0);
17688        assert_eq!(compute_phi_omp(&sa, &mut plcp_threaded, n as SaSint, 4), 0);
17689        assert_eq!(plcp_threaded, plcp_single);
17690
17691        assert_eq!(compute_plcp_omp(&text, &mut plcp_single, n as SaSint, 1), 0);
17692        assert_eq!(
17693            compute_plcp_omp(&text, &mut plcp_threaded, n as SaSint, 4),
17694            0
17695        );
17696        assert_eq!(plcp_threaded, plcp_single);
17697
17698        let mut lcp_single = vec![0; n];
17699        let mut lcp_threaded = vec![0; n];
17700        assert_eq!(
17701            compute_lcp_omp(&plcp_single, &sa, &mut lcp_single, n as SaSint, 1),
17702            0
17703        );
17704        assert_eq!(
17705            compute_lcp_omp(&plcp_threaded, &sa, &mut lcp_threaded, n as SaSint, 4),
17706            0
17707        );
17708        assert_eq!(lcp_threaded, lcp_single);
17709    }
17710
17711    #[test]
17712    fn libsais16x64_context_allocates_upstream_shaped_buffers() {
17713        let ctx = create_ctx().unwrap();
17714        assert_eq!(ctx.threads, 1);
17715        assert_eq!(ctx.buckets.len(), 8 * ALPHABET_SIZE);
17716        assert!(ctx.thread_state.is_none());
17717
17718        let ctx = create_ctx_omp(2).unwrap();
17719        assert_eq!(ctx.threads, 2);
17720        assert_eq!(ctx.buckets.len(), 8 * ALPHABET_SIZE);
17721        let thread_state = ctx.thread_state.as_ref().unwrap();
17722        assert_eq!(thread_state.len(), 2);
17723        assert_eq!(thread_state[0].buckets.len(), 4 * ALPHABET_SIZE);
17724        assert_eq!(thread_state[0].cache_entries, PER_THREAD_CACHE_SIZE);
17725
17726        let ctx = create_ctx_omp(0).unwrap();
17727        assert_eq!(ctx.threads, 1);
17728        assert!(ctx.thread_state.is_none());
17729    }
17730
17731    #[test]
17732    fn libsais16x64_unbwt_context_allocates_upstream_shaped_buffers() {
17733        let ctx = unbwt_create_ctx().unwrap();
17734        assert_eq!(ctx.threads, 1);
17735        assert_eq!(ctx.bucket2.len(), ALPHABET_SIZE);
17736        assert_eq!(ctx.fastbits.len(), 1 + (1 << UNBWT_FASTBITS));
17737        assert!(ctx.buckets.is_none());
17738
17739        let ctx = unbwt_create_ctx_omp(3).unwrap();
17740        assert_eq!(ctx.threads, 3);
17741        assert_eq!(ctx.bucket2.len(), ALPHABET_SIZE);
17742        assert_eq!(ctx.fastbits.len(), 1 + (1 << UNBWT_FASTBITS));
17743        assert_eq!(ctx.buckets.as_ref().unwrap().len(), 3 * ALPHABET_SIZE);
17744    }
17745
17746    #[test]
17747    fn libsais16x64_named_unbwt_helpers_follow_decode_shapes() {
17748        let t = [0, 1, 2];
17749        let mut p = vec![usize::MAX; 4];
17750        let mut bucket2 = vec![0; ALPHABET_SIZE];
17751        bucket2[0] = 1;
17752        bucket2[1] = 2;
17753        bucket2[2] = 3;
17754        unbwt_calculate_P(&t, &mut p, &mut bucket2, 2, 1, 3);
17755        assert_eq!(p[2], 1);
17756        assert_eq!(p[3], 3);
17757
17758        let p = [1usize, 2, 0];
17759        let mut bucket2 = vec![3; ALPHABET_SIZE];
17760        bucket2[0] = 1;
17761        bucket2[1] = 2;
17762        bucket2[2] = 3;
17763        let fastbits = vec![0; 3];
17764
17765        let mut u = vec![99; 3];
17766        let mut i0 = 0;
17767        unbwt_decode_1(&mut u, &p, &bucket2, &fastbits, 0, &mut i0, 3);
17768        assert_eq!(u, vec![0, 1, 2]);
17769        assert_eq!(i0, 0);
17770
17771        let mut u = vec![99; 6];
17772        let (mut i0, mut i1) = (0, 1);
17773        unbwt_decode_2(&mut u, &p, &bucket2, &fastbits, 0, 3, &mut i0, &mut i1, 2);
17774        assert_eq!(&u[..2], &[0, 1]);
17775        assert_eq!(&u[3..5], &[1, 2]);
17776        assert_eq!((i0, i1), (2, 0));
17777
17778        let mut u = vec![99; 8];
17779        let mut cursors = [0; 8];
17780        unbwt_decode_8(&mut u, &p, &bucket2, &fastbits, 0, 1, &mut cursors, 1);
17781        assert_eq!(u, vec![0; 8]);
17782        assert_eq!(cursors, [1; 8]);
17783    }
17784
17785    #[test]
17786    fn libsais16x64_unbwt_init_parallel_uses_block_partition() {
17787        let n = 70_003usize;
17788        let t: Vec<u16> = (0..n)
17789            .map(|i| ((i.wrapping_mul(37).wrapping_add(i >> 3)) % 251) as u16)
17790            .collect();
17791        let i = [12_345];
17792
17793        let mut single_p = vec![0; n + 1];
17794        let mut threaded_p = vec![0; n + 1];
17795        let mut single_bucket2 = vec![0; ALPHABET_SIZE];
17796        let mut threaded_bucket2 = vec![0; ALPHABET_SIZE];
17797        let mut single_fastbits = vec![0; 1 + (1 << UNBWT_FASTBITS)];
17798        let mut threaded_fastbits = vec![0; 1 + (1 << UNBWT_FASTBITS)];
17799        let mut buckets = vec![0; 4 * ALPHABET_SIZE];
17800
17801        unbwt_init_single(
17802            &t,
17803            &mut single_p,
17804            None,
17805            &i,
17806            &mut single_bucket2,
17807            &mut single_fastbits,
17808        );
17809        unbwt_init_parallel(
17810            &t,
17811            &mut threaded_p,
17812            None,
17813            &i,
17814            &mut threaded_bucket2,
17815            &mut threaded_fastbits,
17816            &mut buckets,
17817            4,
17818        );
17819
17820        assert_eq!(threaded_p, single_p);
17821        assert_eq!(threaded_bucket2, single_bucket2);
17822        assert_eq!(threaded_fastbits, single_fastbits);
17823    }
17824
17825    fn assert_libsais16x64_matches_c(text: &[u16]) {
17826        let mut rust_sa = vec![0; text.len()];
17827        let mut c_sa = vec![0; text.len()];
17828
17829        let rust_rc = libsais16x64(text, &mut rust_sa, 0, None);
17830        let c_rc = unsafe {
17831            probe_public_libsais16x64(text.as_ptr(), c_sa.as_mut_ptr(), text.len() as SaSint, 0)
17832        };
17833
17834        assert_eq!(rust_rc, c_rc);
17835        assert_eq!(rust_sa, c_sa);
17836    }
17837
17838    fn assert_libsais16x64_gsa_matches_c(text: &[u16]) {
17839        let mut rust_sa = vec![0; text.len()];
17840        let mut c_sa = vec![0; text.len()];
17841
17842        let rust_rc = libsais16x64_gsa(text, &mut rust_sa, 0, None);
17843        let c_rc = unsafe {
17844            probe_public_libsais16x64_gsa(text.as_ptr(), c_sa.as_mut_ptr(), text.len() as SaSint, 0)
17845        };
17846
17847        assert_eq!(rust_rc, c_rc);
17848        assert_eq!(rust_sa, c_sa);
17849    }
17850
17851    fn assert_libsais16x64_long_matches_c_with_fs(text: &[SaSint], k: SaSint, fs: SaSint) {
17852        let mut rust_t = text.to_vec();
17853        let mut c_t = text.to_vec();
17854        let mut rust_sa = vec![0; text.len() + fs as usize];
17855        let mut c_sa = vec![0; text.len() + fs as usize];
17856
17857        let rust_rc = libsais16x64_long(&mut rust_t, &mut rust_sa, k, fs);
17858        let c_rc = unsafe {
17859            probe_public_libsais16x64_long(
17860                c_t.as_mut_ptr(),
17861                c_sa.as_mut_ptr(),
17862                c_t.len() as SaSint,
17863                k,
17864                fs,
17865            )
17866        };
17867
17868        assert_eq!(rust_rc, c_rc);
17869        assert_eq!(rust_t, c_t);
17870        assert_eq!(rust_sa, c_sa);
17871    }
17872
17873    fn assert_libsais16x64_long_matches_c(text: &[SaSint], k: SaSint) {
17874        assert_libsais16x64_long_matches_c_with_fs(text, k, 0);
17875    }
17876
17877    fn make_main_32s_stress_text(len: usize, alphabet: SaSint) -> Vec<SaSint> {
17878        let mut state: u32 = 0x1357_9bdf;
17879        let mut t = Vec::with_capacity(len + 1);
17880
17881        for i in 0..len {
17882            state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
17883            let mut value = ((state >> 16) % (alphabet as u32 - 1)) as SaSint + 1;
17884            if i % 17 < 8 {
17885                value = ((i / 17) as SaSint % 11) + 1;
17886            }
17887            if i % 29 < 10 {
17888                value = (((i / 29) as SaSint * 3) % 19) + 1;
17889            }
17890            if i % 64 >= 48 {
17891                value = t[i - 48];
17892            }
17893            t.push(value);
17894        }
17895
17896        t.push(0);
17897        t
17898    }
17899
17900    fn make_recursive_main_32s_text(repeats: usize) -> Vec<SaSint> {
17901        let motif = [9, 4, 9, 2, 9, 4, 9, 1];
17902        let mut t = Vec::with_capacity(repeats * motif.len() + 1);
17903        for _ in 0..repeats {
17904            t.extend_from_slice(&motif);
17905        }
17906        t.push(0);
17907        t
17908    }
17909
17910    fn assert_main_32s_entry_matches_c(mut t: Vec<SaSint>, k: SaSint, fs: SaSint) {
17911        let n = t.len() as SaSint;
17912        let threads = 1;
17913        let mut sa = vec![0; t.len() + fs as usize];
17914        let initial_t = t.clone();
17915        let initial_sa = sa.clone();
17916
17917        let c_result = unsafe {
17918            probe_libsais16x64_main_32s_entry(t.as_mut_ptr(), sa.as_mut_ptr(), n, k, fs, threads)
17919        };
17920        let c_t = t.clone();
17921        let c_sa = sa.clone();
17922
17923        t.copy_from_slice(&initial_t);
17924        sa.copy_from_slice(&initial_sa);
17925
17926        let mut thread_state = alloc_thread_state(threads).unwrap();
17927        let rust_result = main_32s_entry(
17928            t.as_mut_ptr(),
17929            &mut sa,
17930            n,
17931            k,
17932            fs,
17933            threads,
17934            &mut thread_state,
17935        );
17936
17937        assert_eq!(rust_result, c_result);
17938        assert_eq!(t, c_t);
17939        assert_eq!(&sa[..n as usize], &c_sa[..n as usize]);
17940        if fs == 0 {
17941            assert_eq!(sa, c_sa);
17942        }
17943    }
17944
17945    #[test]
17946    fn libsais16x64_main_32s_entry_matches_c_for_local_32s_paths() {
17947        assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 300), 300, 2048);
17948        assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 400), 400, 2048);
17949        assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 700), 700, 2048);
17950        assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 1501), 1501, 2048);
17951        assert_main_32s_entry_matches_c(make_recursive_main_32s_text(24), 300, 0);
17952        assert_main_32s_entry_matches_c(make_recursive_main_32s_text(24), 1501, 0);
17953    }
17954
17955    fn assert_libsais16x64_bwt_matches_c(text: &[u16]) {
17956        let mut rust_u = vec![0; text.len()];
17957        let mut rust_a = vec![0; text.len()];
17958        let mut c_u = vec![0; text.len()];
17959        let mut c_a = vec![0; text.len()];
17960
17961        let rust_rc = libsais16x64_bwt(text, &mut rust_u, &mut rust_a, 0, None);
17962        let c_rc = unsafe {
17963            probe_public_libsais16x64_bwt(
17964                text.as_ptr(),
17965                c_u.as_mut_ptr(),
17966                c_a.as_mut_ptr(),
17967                text.len() as SaSint,
17968                0,
17969            )
17970        };
17971
17972        assert_eq!(rust_rc, c_rc);
17973        assert_eq!(rust_u, c_u);
17974    }
17975
17976    fn assert_libsais16x64_bwt_aux_matches_c(text: &[u16], r: SaSint) {
17977        let aux_len = if text.is_empty() {
17978            0
17979        } else {
17980            (text.len() - 1) / r as usize + 1
17981        };
17982        let mut rust_u = vec![0; text.len()];
17983        let mut rust_a = vec![0; text.len()];
17984        let mut rust_i = vec![0; aux_len];
17985        let mut c_u = vec![0; text.len()];
17986        let mut c_a = vec![0; text.len()];
17987        let mut c_i = vec![0; aux_len];
17988
17989        let rust_rc = libsais16x64_bwt_aux(text, &mut rust_u, &mut rust_a, 0, None, r, &mut rust_i);
17990        let c_rc = unsafe {
17991            probe_public_libsais16x64_bwt_aux(
17992                text.as_ptr(),
17993                c_u.as_mut_ptr(),
17994                c_a.as_mut_ptr(),
17995                text.len() as SaSint,
17996                0,
17997                r,
17998                c_i.as_mut_ptr(),
17999            )
18000        };
18001
18002        assert_eq!(rust_rc, c_rc);
18003        assert_eq!(rust_u, c_u);
18004        assert_eq!(rust_i, c_i);
18005    }
18006
18007    fn assert_libsais16x64_freq_outputs_match_c(text: &[u16], gsa_text: &[u16]) {
18008        let mut rust_sa = vec![0; text.len()];
18009        let mut c_sa = vec![0; text.len()];
18010        let mut rust_freq = vec![-1; ALPHABET_SIZE];
18011        let mut c_freq = vec![-1; ALPHABET_SIZE];
18012
18013        let rust_rc = libsais16x64(text, &mut rust_sa, 0, Some(&mut rust_freq));
18014        let c_rc = unsafe {
18015            probe_public_libsais16x64_freq(
18016                text.as_ptr(),
18017                c_sa.as_mut_ptr(),
18018                text.len() as SaSint,
18019                0,
18020                c_freq.as_mut_ptr(),
18021            )
18022        };
18023        assert_eq!(rust_rc, c_rc);
18024        assert_eq!(rust_sa, c_sa);
18025        assert_eq!(rust_freq, c_freq);
18026
18027        let mut rust_gsa = vec![0; gsa_text.len()];
18028        let mut c_gsa = vec![0; gsa_text.len()];
18029        rust_freq.fill(-1);
18030        c_freq.fill(-1);
18031        let rust_rc = libsais16x64_gsa(gsa_text, &mut rust_gsa, 0, Some(&mut rust_freq));
18032        let c_rc = unsafe {
18033            probe_public_libsais16x64_gsa_freq(
18034                gsa_text.as_ptr(),
18035                c_gsa.as_mut_ptr(),
18036                gsa_text.len() as SaSint,
18037                0,
18038                c_freq.as_mut_ptr(),
18039            )
18040        };
18041        assert_eq!(rust_rc, c_rc);
18042        assert_eq!(rust_gsa, c_gsa);
18043        assert_eq!(rust_freq, c_freq);
18044
18045        let mut rust_u = vec![0; text.len()];
18046        let mut rust_a = vec![0; text.len()];
18047        let mut c_u = vec![0; text.len()];
18048        let mut c_a = vec![0; text.len()];
18049        rust_freq.fill(-1);
18050        c_freq.fill(-1);
18051        let rust_rc = libsais16x64_bwt(text, &mut rust_u, &mut rust_a, 0, Some(&mut rust_freq));
18052        let c_rc = unsafe {
18053            probe_public_libsais16x64_bwt_freq(
18054                text.as_ptr(),
18055                c_u.as_mut_ptr(),
18056                c_a.as_mut_ptr(),
18057                text.len() as SaSint,
18058                0,
18059                c_freq.as_mut_ptr(),
18060            )
18061        };
18062        assert_eq!(rust_rc, c_rc);
18063        assert_eq!(rust_u, c_u);
18064        assert_eq!(rust_freq, c_freq);
18065
18066        let r = 4;
18067        let aux_len = (text.len() - 1) / r as usize + 1;
18068        let mut rust_i = vec![0; aux_len];
18069        let mut c_i = vec![0; aux_len];
18070        rust_freq.fill(-1);
18071        c_freq.fill(-1);
18072        let rust_rc = libsais16x64_bwt_aux(
18073            text,
18074            &mut rust_u,
18075            &mut rust_a,
18076            0,
18077            Some(&mut rust_freq),
18078            r,
18079            &mut rust_i,
18080        );
18081        let c_rc = unsafe {
18082            probe_public_libsais16x64_bwt_aux_freq(
18083                text.as_ptr(),
18084                c_u.as_mut_ptr(),
18085                c_a.as_mut_ptr(),
18086                text.len() as SaSint,
18087                0,
18088                c_freq.as_mut_ptr(),
18089                r,
18090                c_i.as_mut_ptr(),
18091            )
18092        };
18093        assert_eq!(rust_rc, c_rc);
18094        assert_eq!(rust_u, c_u);
18095        assert_eq!(rust_i, c_i);
18096        assert_eq!(rust_freq, c_freq);
18097    }
18098
18099    fn assert_libsais16x64_unbwt_matches_c(text: &[u16]) {
18100        let mut bwt = vec![0; text.len()];
18101        let mut work = vec![0; text.len()];
18102        let primary = libsais16x64_bwt(text, &mut bwt, &mut work, 0, None);
18103        assert!(primary >= 0);
18104
18105        let mut rust_u = vec![0; text.len()];
18106        let mut rust_a = vec![0; text.len() + 1];
18107        let mut c_u = vec![0; text.len()];
18108        let mut c_a = vec![0; text.len() + 1];
18109
18110        let rust_rc = libsais16x64_unbwt(&bwt, &mut rust_u, &mut rust_a, None, primary);
18111        let c_rc = unsafe {
18112            probe_public_libsais16x64_unbwt(
18113                bwt.as_ptr(),
18114                c_u.as_mut_ptr(),
18115                c_a.as_mut_ptr(),
18116                bwt.len() as SaSint,
18117                primary,
18118            )
18119        };
18120
18121        assert_eq!(rust_rc, c_rc);
18122        assert_eq!(rust_u, c_u);
18123        assert_eq!(rust_u, text);
18124    }
18125
18126    fn assert_libsais16x64_unbwt_aux_matches_c(text: &[u16], r: SaSint) {
18127        let mut bwt = vec![0; text.len()];
18128        let mut work = vec![0; text.len()];
18129        let mut aux = vec![0; (text.len() - 1) / r as usize + 1];
18130        let bwt_rc = libsais16x64_bwt_aux(text, &mut bwt, &mut work, 0, None, r, &mut aux);
18131        assert_eq!(bwt_rc, 0);
18132
18133        let mut rust_u = vec![0; text.len()];
18134        let mut rust_a = vec![0; text.len() + 1];
18135        let mut c_u = vec![0; text.len()];
18136        let mut c_a = vec![0; text.len() + 1];
18137
18138        let rust_rc = libsais16x64_unbwt_aux(&bwt, &mut rust_u, &mut rust_a, None, r, &aux);
18139        let c_rc = unsafe {
18140            probe_public_libsais16x64_unbwt_aux(
18141                bwt.as_ptr(),
18142                c_u.as_mut_ptr(),
18143                c_a.as_mut_ptr(),
18144                bwt.len() as SaSint,
18145                r,
18146                aux.as_ptr(),
18147            )
18148        };
18149
18150        assert_eq!(rust_rc, c_rc);
18151        assert_eq!(rust_u, c_u);
18152        assert_eq!(rust_u, text);
18153    }
18154
18155    fn assert_libsais16x64_unbwt_freq_matches_c(text: &[u16]) {
18156        let mut freq = vec![0; ALPHABET_SIZE];
18157        let mut bwt = vec![0; text.len()];
18158        let mut work = vec![0; text.len()];
18159        let primary = libsais16x64_bwt(text, &mut bwt, &mut work, 0, Some(&mut freq));
18160        assert!(primary >= 0);
18161
18162        let mut rust_u = vec![0; text.len()];
18163        let mut rust_a = vec![0; text.len() + 1];
18164        let mut c_u = vec![0; text.len()];
18165        let mut c_a = vec![0; text.len() + 1];
18166
18167        let rust_rc = libsais16x64_unbwt(&bwt, &mut rust_u, &mut rust_a, Some(&freq), primary);
18168        let c_rc = unsafe {
18169            probe_public_libsais16x64_unbwt_freq(
18170                bwt.as_ptr(),
18171                c_u.as_mut_ptr(),
18172                c_a.as_mut_ptr(),
18173                bwt.len() as SaSint,
18174                freq.as_ptr(),
18175                primary,
18176            )
18177        };
18178        assert_eq!(rust_rc, c_rc);
18179        assert_eq!(rust_u, c_u);
18180        assert_eq!(rust_u, text);
18181
18182        let r = 4;
18183        let mut aux = vec![0; (text.len() - 1) / r as usize + 1];
18184        let bwt_rc =
18185            libsais16x64_bwt_aux(text, &mut bwt, &mut work, 0, Some(&mut freq), r, &mut aux);
18186        assert_eq!(bwt_rc, 0);
18187
18188        rust_u.fill(0);
18189        rust_a.fill(0);
18190        c_u.fill(0);
18191        c_a.fill(0);
18192        let rust_rc = libsais16x64_unbwt_aux(&bwt, &mut rust_u, &mut rust_a, Some(&freq), r, &aux);
18193        let c_rc = unsafe {
18194            probe_public_libsais16x64_unbwt_aux_freq(
18195                bwt.as_ptr(),
18196                c_u.as_mut_ptr(),
18197                c_a.as_mut_ptr(),
18198                bwt.len() as SaSint,
18199                freq.as_ptr(),
18200                r,
18201                aux.as_ptr(),
18202            )
18203        };
18204        assert_eq!(rust_rc, c_rc);
18205        assert_eq!(rust_u, c_u);
18206        assert_eq!(rust_u, text);
18207    }
18208
18209    fn assert_libsais16x64_plcp_lcp_matches_c(text: &[u16]) {
18210        let mut sa = vec![0; text.len()];
18211        assert_eq!(libsais16x64(text, &mut sa, 0, None), 0);
18212
18213        let mut rust_plcp = vec![0; text.len()];
18214        let mut c_plcp = vec![0; text.len()];
18215        let rust_rc = libsais16x64_plcp(text, &sa, &mut rust_plcp);
18216        let c_rc = unsafe {
18217            probe_public_libsais16x64_plcp(
18218                text.as_ptr(),
18219                sa.as_ptr(),
18220                c_plcp.as_mut_ptr(),
18221                text.len() as SaSint,
18222            )
18223        };
18224        assert_eq!(rust_rc, c_rc);
18225        assert_eq!(rust_plcp, c_plcp);
18226
18227        let mut rust_lcp = vec![0; text.len()];
18228        let mut c_lcp = vec![0; text.len()];
18229        let rust_rc = libsais16x64_lcp(&rust_plcp, &sa, &mut rust_lcp);
18230        let c_rc = unsafe {
18231            probe_public_libsais16x64_lcp(
18232                c_plcp.as_ptr(),
18233                sa.as_ptr(),
18234                c_lcp.as_mut_ptr(),
18235                text.len() as SaSint,
18236            )
18237        };
18238        assert_eq!(rust_rc, c_rc);
18239        assert_eq!(rust_lcp, c_lcp);
18240    }
18241
18242    fn assert_libsais16x64_plcp_gsa_matches_c(text: &[u16]) {
18243        let mut sa = vec![0; text.len()];
18244        assert_eq!(libsais16x64_gsa(text, &mut sa, 0, None), 0);
18245
18246        let mut rust_plcp = vec![0; text.len()];
18247        let mut c_plcp = vec![0; text.len()];
18248        let rust_rc = libsais16x64_plcp_gsa(text, &sa, &mut rust_plcp);
18249        let c_rc = unsafe {
18250            probe_public_libsais16x64_plcp_gsa(
18251                text.as_ptr(),
18252                sa.as_ptr(),
18253                c_plcp.as_mut_ptr(),
18254                text.len() as SaSint,
18255            )
18256        };
18257        assert_eq!(rust_rc, c_rc);
18258        assert_eq!(rust_plcp, c_plcp);
18259    }
18260
18261    #[test]
18262    fn public_libsais16x64_matches_upstream_c() {
18263        for text in [
18264            [].as_slice(),
18265            &[1][..],
18266            &[2, 1, 3, 1, 2, 0],
18267            &[2, 1, 3, 1, 2, 4, 1, 0],
18268            &[65_535, 1, 65_534, 1, 0],
18269            &[7, 7, 7, 7, 7, 0],
18270        ] {
18271            assert_libsais16x64_matches_c(text);
18272        }
18273    }
18274
18275    #[test]
18276    fn public_libsais16x64_bwt_matches_upstream_c() {
18277        for text in [
18278            [].as_slice(),
18279            &[1][..],
18280            &[2, 1, 3, 1, 2, 0],
18281            &[2, 1, 3, 1, 2, 4, 1, 0],
18282            &[65_535, 1, 65_534, 1, 0],
18283            &[7, 7, 7, 7, 7, 0],
18284        ] {
18285            assert_libsais16x64_bwt_matches_c(text);
18286        }
18287    }
18288
18289    #[test]
18290    fn public_libsais16x64_gsa_matches_upstream_c() {
18291        for text in [&[0][..], &[2, 1, 0], &[2, 1, 0, 3, 1, 0], &[7, 7, 0, 7, 0]] {
18292            assert_libsais16x64_gsa_matches_c(text);
18293        }
18294    }
18295
18296    #[test]
18297    fn public_libsais16x64_long_matches_upstream_c() {
18298        for (text, k) in [
18299            (&[][..], 0),
18300            (&[0][..], 1),
18301            (&[1, 2, 1, 0][..], 3),
18302            (&[2, 1, 2, 1, 0][..], 3),
18303            (&[3, 3, 3, 2, 1, 0][..], 4),
18304        ] {
18305            assert_libsais16x64_long_matches_c(text, k);
18306        }
18307
18308        assert_libsais16x64_long_matches_c_with_fs(&[2, 1, 3, 1, 2, 0], 4, 64);
18309    }
18310
18311    #[test]
18312    fn public_libsais16x64_plcp_lcp_matches_upstream_c() {
18313        for text in [
18314            &[2, 1, 3, 1, 2, 0][..],
18315            &[2, 1, 3, 1, 2, 4, 1, 0],
18316            &[65_535, 1, 65_534, 1, 0],
18317            &[7, 7, 7, 7, 7, 0],
18318        ] {
18319            assert_libsais16x64_plcp_lcp_matches_c(text);
18320        }
18321    }
18322
18323    #[test]
18324    fn public_libsais16x64_plcp_gsa_matches_upstream_c() {
18325        for text in [&[0][..], &[2, 1, 0], &[2, 1, 0, 3, 1, 0], &[7, 7, 0, 7, 0]] {
18326            assert_libsais16x64_plcp_gsa_matches_c(text);
18327        }
18328    }
18329
18330    #[test]
18331    fn public_libsais16x64_bwt_aux_matches_upstream_c() {
18332        for text in [
18333            &[2, 1, 3, 1, 2, 0][..],
18334            &[2, 1, 3, 1, 2, 4, 1, 0],
18335            &[65_535, 1, 65_534, 1, 0],
18336            &[7, 7, 7, 7, 7, 0],
18337        ] {
18338            assert_libsais16x64_bwt_aux_matches_c(text, 4);
18339        }
18340    }
18341
18342    #[test]
18343    fn public_libsais16x64_frequency_outputs_match_upstream_c() {
18344        assert_libsais16x64_freq_outputs_match_c(&[65_535, 1, 2, 1, 0], &[65_535, 1, 0, 2, 1, 0]);
18345    }
18346
18347    #[test]
18348    fn public_libsais16x64_unbwt_with_frequency_matches_upstream_c() {
18349        assert_libsais16x64_unbwt_freq_matches_c(&[65_535, 1, 2, 1, 0]);
18350    }
18351
18352    #[test]
18353    fn public_libsais16x64_unbwt_matches_upstream_c() {
18354        for text in [
18355            &[1][..],
18356            &[2, 1, 3, 1, 2, 0],
18357            &[2, 1, 3, 1, 2, 4, 1, 0],
18358            &[65_535, 1, 65_534, 1, 0],
18359            &[7, 7, 7, 7, 7, 0],
18360        ] {
18361            assert_libsais16x64_unbwt_matches_c(text);
18362        }
18363    }
18364
18365    #[test]
18366    fn public_libsais16x64_unbwt_aux_matches_upstream_c() {
18367        for text in [
18368            &[2, 1, 3, 1, 2, 0][..],
18369            &[2, 1, 3, 1, 2, 4, 1, 0],
18370            &[65_535, 1, 65_534, 1, 0],
18371            &[7, 7, 7, 7, 7, 0],
18372        ] {
18373            assert_libsais16x64_unbwt_aux_matches_c(text, 4);
18374        }
18375    }
18376
18377    #[test]
18378    fn public_libsais16x64_unbwt_aux_exercises_decode_dispatch_cases() {
18379        for len in [2usize, 5, 9, 13, 17, 21, 25, 29, 33, 37] {
18380            let text = (0..len)
18381                .map(|i| ((i * 37 + 11) % 65_535 + 1) as u16)
18382                .collect::<Vec<_>>();
18383            assert_libsais16x64_unbwt_aux_matches_c(&text, 4);
18384        }
18385    }
18386
18387    #[test]
18388    fn libsais16x64_lcp_helpers_reject_invalid_suffix_entries() {
18389        let text = [2, 1, 2, 1, 0];
18390        let mut plcp = vec![0; text.len()];
18391        let mut lcp = vec![0; text.len()];
18392
18393        assert_eq!(libsais16x64_plcp(&text, &[0, 1, -1, 3, 4], &mut plcp), -1);
18394        assert_eq!(libsais16x64_plcp(&text, &[0, 1, 2, 3, 5], &mut plcp), -1);
18395        assert_eq!(libsais16x64_lcp(&plcp, &[0, 1, -1, 3, 4], &mut lcp), -1);
18396        assert_eq!(libsais16x64_lcp(&plcp, &[0, 1, 2, 3, 5], &mut lcp), -1);
18397    }
18398
18399    #[test]
18400    fn libsais16x64_rejects_invalid_public_arguments() {
18401        let text = [2, 1, 3, 1, 2, 0];
18402        let int_text = [1, 2, 1, 0];
18403        let mut int_text_for_short_sa = int_text.to_vec();
18404        let mut int_text_for_negative_fs = int_text.to_vec();
18405        let mut int_text_for_alias = int_text.to_vec();
18406        let mut sa = vec![0; text.len() - 1];
18407        let mut int_sa = vec![0; int_text.len() - 1];
18408        let mut full_int_sa = vec![0; int_text.len()];
18409        let mut freq = vec![0; ALPHABET_SIZE - 1];
18410        let mut u = vec![0; text.len() - 1];
18411        let mut a = vec![0; text.len() - 1];
18412        let mut full_u = vec![0; text.len()];
18413        let mut full_a = vec![0; text.len()];
18414        let mut aux = vec![0; 1];
18415
18416        assert_eq!(libsais16x64(&text, &mut sa, 0, None), -1);
18417        assert_eq!(libsais16x64(&text, &mut full_a, 0, Some(&mut freq)), -1);
18418        assert_eq!(libsais16x64_gsa(&[1, 2, 3], &mut full_a[..3], 0, None), -1);
18419        assert_eq!(
18420            libsais16x64_long(&mut int_text_for_short_sa, &mut int_sa, 3, 0),
18421            -1
18422        );
18423        assert_eq!(
18424            libsais16x64_long(&mut int_text_for_negative_fs, &mut full_int_sa, 3, -1),
18425            -1
18426        );
18427        assert_eq!(
18428            libsais16x64_int(&mut int_text_for_alias, &mut full_int_sa, 3, -1),
18429            -1
18430        );
18431        assert_eq!(libsais16x64_bwt(&text, &mut u, &mut full_a, 0, None), -1);
18432        assert_eq!(libsais16x64_bwt(&text, &mut full_u, &mut a, 0, None), -1);
18433        assert_eq!(
18434            libsais16x64_bwt_aux(&text, &mut full_u, &mut full_a, 0, None, 0, &mut aux),
18435            -1
18436        );
18437        assert_eq!(
18438            libsais16x64_bwt_aux(&text, &mut full_u, &mut full_a, 0, None, 3, &mut aux),
18439            -1
18440        );
18441        assert_eq!(
18442            libsais16x64_bwt_aux(&text, &mut full_u, &mut full_a, 0, None, 4, &mut aux),
18443            -1
18444        );
18445        assert_eq!(create_ctx_omp(-1), None);
18446        assert_eq!(unbwt_create_ctx_omp(-1), None);
18447    }
18448
18449    #[test]
18450    fn libsais16x64_unbwt_rejects_invalid_public_arguments() {
18451        let text = [2, 1, 3, 1, 2, 0];
18452        let mut bwt = vec![0; text.len()];
18453        let mut work = vec![0; text.len()];
18454        let primary = libsais16x64_bwt(&text, &mut bwt, &mut work, 0, None);
18455
18456        let mut short_u = vec![0; text.len() - 1];
18457        let mut short_a = vec![0; text.len() - 1];
18458        let mut full_u = vec![0; text.len()];
18459        let mut full_a = vec![0; text.len()];
18460        let short_freq = vec![0; ALPHABET_SIZE - 1];
18461        let short_aux = vec![primary];
18462        let bad_aux = vec![0, 0];
18463        let good_aux = vec![primary, 4];
18464
18465        assert_eq!(
18466            libsais16x64_unbwt(&bwt, &mut short_u, &mut full_a, None, primary),
18467            -1
18468        );
18469        assert_eq!(
18470            libsais16x64_unbwt(&bwt, &mut full_u, &mut short_a, None, primary),
18471            -1
18472        );
18473        assert_eq!(
18474            libsais16x64_unbwt(&bwt, &mut full_u, &mut full_a, Some(&short_freq), primary),
18475            -1
18476        );
18477        assert_eq!(
18478            libsais16x64_unbwt(&bwt, &mut full_u, &mut full_a, None, 0),
18479            -1
18480        );
18481        assert_eq!(
18482            libsais16x64_unbwt(
18483                &bwt,
18484                &mut full_u,
18485                &mut full_a,
18486                None,
18487                text.len() as SaSint + 1
18488            ),
18489            -1
18490        );
18491        assert_eq!(
18492            libsais16x64_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 0, &good_aux),
18493            -1
18494        );
18495        assert_eq!(
18496            libsais16x64_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 3, &good_aux),
18497            -1
18498        );
18499        assert_eq!(
18500            libsais16x64_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 4, &short_aux),
18501            -1
18502        );
18503        assert_eq!(
18504            libsais16x64_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 4, &bad_aux),
18505            -1
18506        );
18507    }
18508
18509    #[test]
18510    fn libsais16x64_ctx_rejects_invalid_public_arguments() {
18511        let text = [2, 1, 3, 1, 2, 0];
18512        let mut ctx = create_ctx().unwrap();
18513        let mut sa = vec![0; text.len() - 1];
18514        let mut freq = vec![0; ALPHABET_SIZE - 1];
18515        let mut u = vec![0; text.len() - 1];
18516        let mut a = vec![0; text.len() - 1];
18517        let mut full_u = vec![0; text.len()];
18518        let mut full_a = vec![0; text.len()];
18519        let mut aux = vec![0; 1];
18520
18521        assert_eq!(libsais16x64_ctx(&mut ctx, &text, &mut sa, 0, None), -1);
18522        assert_eq!(
18523            libsais16x64_ctx(&mut ctx, &text, &mut full_a, 0, Some(&mut freq)),
18524            -1
18525        );
18526        assert_eq!(
18527            libsais16x64_gsa_ctx(&mut ctx, &[1, 2, 3], &mut full_a[..3], 0, None),
18528            -1
18529        );
18530        assert_eq!(
18531            libsais16x64_bwt_ctx(&mut ctx, &text, &mut u, &mut full_a, 0, None),
18532            -1
18533        );
18534        assert_eq!(
18535            libsais16x64_bwt_ctx(&mut ctx, &text, &mut full_u, &mut a, 0, None),
18536            -1
18537        );
18538        assert_eq!(
18539            libsais16x64_bwt_aux_ctx(
18540                &mut ctx,
18541                &text,
18542                &mut full_u,
18543                &mut full_a,
18544                0,
18545                None,
18546                0,
18547                &mut aux
18548            ),
18549            -1
18550        );
18551        assert_eq!(
18552            libsais16x64_bwt_aux_ctx(
18553                &mut ctx,
18554                &text,
18555                &mut full_u,
18556                &mut full_a,
18557                0,
18558                None,
18559                3,
18560                &mut aux
18561            ),
18562            -1
18563        );
18564        assert_eq!(
18565            libsais16x64_bwt_aux_ctx(
18566                &mut ctx,
18567                &text,
18568                &mut full_u,
18569                &mut full_a,
18570                0,
18571                None,
18572                4,
18573                &mut aux
18574            ),
18575            -1
18576        );
18577
18578        let mut default_ctx = Context::default();
18579        assert_eq!(
18580            libsais16x64_ctx(&mut default_ctx, &text, &mut full_a, 0, None),
18581            -2
18582        );
18583
18584        let mut bad_bucket_ctx = create_ctx().unwrap();
18585        bad_bucket_ctx.buckets.clear();
18586        assert_eq!(
18587            libsais16x64_ctx(&mut bad_bucket_ctx, &text, &mut full_a, 0, None),
18588            -2
18589        );
18590
18591        let mut short_thread_state_ctx = create_ctx_omp(2).unwrap();
18592        short_thread_state_ctx
18593            .thread_state
18594            .as_mut()
18595            .unwrap()
18596            .truncate(1);
18597        assert_eq!(
18598            libsais16x64_ctx(&mut short_thread_state_ctx, &text, &mut full_a, 0, None),
18599            -2
18600        );
18601    }
18602
18603    #[test]
18604    fn libsais16x64_unbwt_ctx_rejects_invalid_public_arguments() {
18605        let text = [2, 1, 3, 1, 2, 0];
18606        let mut bwt = vec![0; text.len()];
18607        let mut work = vec![0; text.len()];
18608        let primary = libsais16x64_bwt(&text, &mut bwt, &mut work, 0, None);
18609        let mut ctx = unbwt_create_ctx().unwrap();
18610
18611        let mut short_u = vec![0; text.len() - 1];
18612        let mut short_a = vec![0; text.len() - 1];
18613        let mut full_u = vec![0; text.len()];
18614        let mut full_a = vec![0; text.len()];
18615        let short_freq = vec![0; ALPHABET_SIZE - 1];
18616        let short_aux = vec![primary];
18617        let bad_aux = vec![0, 0];
18618        let good_aux = vec![primary, 4];
18619
18620        assert_eq!(
18621            libsais16x64_unbwt_ctx(&mut ctx, &bwt, &mut short_u, &mut full_a, None, primary),
18622            -1
18623        );
18624        assert_eq!(
18625            libsais16x64_unbwt_ctx(&mut ctx, &bwt, &mut full_u, &mut short_a, None, primary),
18626            -1
18627        );
18628        assert_eq!(
18629            libsais16x64_unbwt_ctx(
18630                &mut ctx,
18631                &bwt,
18632                &mut full_u,
18633                &mut full_a,
18634                Some(&short_freq),
18635                primary
18636            ),
18637            -1
18638        );
18639        assert_eq!(
18640            libsais16x64_unbwt_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 0),
18641            -1
18642        );
18643        assert_eq!(
18644            libsais16x64_unbwt_aux_ctx(
18645                &mut ctx,
18646                &bwt,
18647                &mut full_u,
18648                &mut full_a,
18649                None,
18650                0,
18651                &good_aux
18652            ),
18653            -1
18654        );
18655        assert_eq!(
18656            libsais16x64_unbwt_aux_ctx(
18657                &mut ctx,
18658                &bwt,
18659                &mut full_u,
18660                &mut full_a,
18661                None,
18662                3,
18663                &good_aux
18664            ),
18665            -1
18666        );
18667        assert_eq!(
18668            libsais16x64_unbwt_aux_ctx(
18669                &mut ctx,
18670                &bwt,
18671                &mut full_u,
18672                &mut full_a,
18673                None,
18674                4,
18675                &short_aux
18676            ),
18677            -1
18678        );
18679        assert_eq!(
18680            libsais16x64_unbwt_aux_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 4, &bad_aux),
18681            -1
18682        );
18683    }
18684
18685    #[test]
18686    fn libsais16x64_context_wrappers_match_direct_calls() {
18687        let text = [2, 1, 3, 1, 2, 0];
18688        let mut ctx = create_ctx().unwrap();
18689
18690        let mut direct_sa = vec![0; text.len()];
18691        let mut ctx_sa = vec![0; text.len()];
18692        assert_eq!(libsais16x64(&text, &mut direct_sa, 0, None), 0);
18693        assert_eq!(libsais16x64_ctx(&mut ctx, &text, &mut ctx_sa, 0, None), 0);
18694        assert_eq!(ctx_sa, direct_sa);
18695
18696        let mut direct_bwt = vec![0; text.len()];
18697        let mut direct_work = vec![0; text.len()];
18698        let mut ctx_bwt = vec![0; text.len()];
18699        let mut ctx_work = vec![0; text.len()];
18700        assert_eq!(
18701            libsais16x64_bwt(&text, &mut direct_bwt, &mut direct_work, 0, None),
18702            libsais16x64_bwt_ctx(&mut ctx, &text, &mut ctx_bwt, &mut ctx_work, 0, None)
18703        );
18704        assert_eq!(ctx_bwt, direct_bwt);
18705
18706        let mut direct_aux = vec![0; 2];
18707        let mut ctx_aux = vec![0; 2];
18708        assert_eq!(
18709            libsais16x64_bwt_aux(
18710                &text,
18711                &mut direct_bwt,
18712                &mut direct_work,
18713                0,
18714                None,
18715                4,
18716                &mut direct_aux
18717            ),
18718            libsais16x64_bwt_aux_ctx(
18719                &mut ctx,
18720                &text,
18721                &mut ctx_bwt,
18722                &mut ctx_work,
18723                0,
18724                None,
18725                4,
18726                &mut ctx_aux
18727            )
18728        );
18729        assert_eq!(ctx_bwt, direct_bwt);
18730        assert_eq!(ctx_aux, direct_aux);
18731    }
18732
18733    #[test]
18734    fn libsais16x64_unbwt_context_wrappers_match_direct_calls() {
18735        let text = [2, 1, 3, 1, 2, 0];
18736        let mut bwt = vec![0; text.len()];
18737        let mut work = vec![0; text.len()];
18738        let primary = libsais16x64_bwt(&text, &mut bwt, &mut work, 0, None);
18739
18740        let mut ctx = unbwt_create_ctx().unwrap();
18741        let mut direct = vec![0; text.len()];
18742        let mut direct_work = vec![0; text.len()];
18743        let mut via_ctx = vec![0; text.len()];
18744        let mut ctx_work = vec![0; text.len()];
18745
18746        assert_eq!(
18747            libsais16x64_unbwt(&bwt, &mut direct, &mut direct_work, None, primary),
18748            0
18749        );
18750        assert_eq!(
18751            libsais16x64_unbwt_ctx(&mut ctx, &bwt, &mut via_ctx, &mut ctx_work, None, primary),
18752            0
18753        );
18754        assert_eq!(via_ctx, direct);
18755
18756        let mut aux = vec![0; 2];
18757        assert_eq!(
18758            libsais16x64_bwt_aux(&text, &mut bwt, &mut work, 0, None, 4, &mut aux),
18759            0
18760        );
18761        assert_eq!(
18762            libsais16x64_unbwt_aux(&bwt, &mut direct, &mut direct_work, None, 4, &aux),
18763            0
18764        );
18765        assert_eq!(
18766            libsais16x64_unbwt_aux_ctx(&mut ctx, &bwt, &mut via_ctx, &mut ctx_work, None, 4, &aux),
18767            0
18768        );
18769        assert_eq!(via_ctx, direct);
18770    }
18771
18772    #[test]
18773    fn libsais16x64_ctx_frequency_wrappers_match_direct_calls() {
18774        let text = [2, 1, 3, 1, 2, 0];
18775        let gsa_text = [2, 1, 0, 3, 1, 0];
18776        let mut ctx = create_ctx().unwrap();
18777
18778        let mut direct_sa = vec![0; text.len()];
18779        let mut ctx_sa = vec![0; text.len()];
18780        let mut direct_freq = vec![-1; ALPHABET_SIZE];
18781        let mut ctx_freq = vec![-1; ALPHABET_SIZE];
18782        assert_eq!(
18783            libsais16x64(&text, &mut direct_sa, 0, Some(&mut direct_freq)),
18784            0
18785        );
18786        assert_eq!(
18787            libsais16x64_ctx(&mut ctx, &text, &mut ctx_sa, 0, Some(&mut ctx_freq)),
18788            0
18789        );
18790        assert_eq!(ctx_sa, direct_sa);
18791        assert_eq!(ctx_freq, direct_freq);
18792
18793        let mut direct_gsa = vec![0; gsa_text.len()];
18794        let mut ctx_gsa = vec![0; gsa_text.len()];
18795        direct_freq.fill(-1);
18796        ctx_freq.fill(-1);
18797        assert_eq!(
18798            libsais16x64_gsa(&gsa_text, &mut direct_gsa, 0, Some(&mut direct_freq)),
18799            0
18800        );
18801        assert_eq!(
18802            libsais16x64_gsa_ctx(&mut ctx, &gsa_text, &mut ctx_gsa, 0, Some(&mut ctx_freq)),
18803            0
18804        );
18805        assert_eq!(ctx_gsa, direct_gsa);
18806        assert_eq!(ctx_freq, direct_freq);
18807
18808        let mut direct_bwt = vec![0; text.len()];
18809        let mut direct_work = vec![0; text.len()];
18810        let mut ctx_bwt = vec![0; text.len()];
18811        let mut ctx_work = vec![0; text.len()];
18812        direct_freq.fill(-1);
18813        ctx_freq.fill(-1);
18814        assert_eq!(
18815            libsais16x64_bwt(
18816                &text,
18817                &mut direct_bwt,
18818                &mut direct_work,
18819                0,
18820                Some(&mut direct_freq)
18821            ),
18822            libsais16x64_bwt_ctx(
18823                &mut ctx,
18824                &text,
18825                &mut ctx_bwt,
18826                &mut ctx_work,
18827                0,
18828                Some(&mut ctx_freq)
18829            )
18830        );
18831        assert_eq!(ctx_bwt, direct_bwt);
18832        assert_eq!(ctx_freq, direct_freq);
18833
18834        let mut direct_aux = vec![0; 2];
18835        let mut ctx_aux = vec![0; 2];
18836        direct_freq.fill(-1);
18837        ctx_freq.fill(-1);
18838        assert_eq!(
18839            libsais16x64_bwt_aux(
18840                &text,
18841                &mut direct_bwt,
18842                &mut direct_work,
18843                0,
18844                Some(&mut direct_freq),
18845                4,
18846                &mut direct_aux
18847            ),
18848            libsais16x64_bwt_aux_ctx(
18849                &mut ctx,
18850                &text,
18851                &mut ctx_bwt,
18852                &mut ctx_work,
18853                0,
18854                Some(&mut ctx_freq),
18855                4,
18856                &mut ctx_aux
18857            )
18858        );
18859        assert_eq!(ctx_bwt, direct_bwt);
18860        assert_eq!(ctx_aux, direct_aux);
18861        assert_eq!(ctx_freq, direct_freq);
18862    }
18863
18864    #[test]
18865    fn libsais16x64_unbwt_ctx_frequency_wrappers_match_direct_calls() {
18866        let text = [2, 1, 3, 1, 2, 0];
18867        let mut freq = vec![0; ALPHABET_SIZE];
18868        let mut bwt = vec![0; text.len()];
18869        let mut work = vec![0; text.len()];
18870        let primary = libsais16x64_bwt(&text, &mut bwt, &mut work, 0, Some(&mut freq));
18871        assert!(primary >= 0);
18872
18873        let mut ctx = unbwt_create_ctx().unwrap();
18874        let mut direct = vec![0; text.len()];
18875        let mut direct_work = vec![0; text.len() + 1];
18876        let mut via_ctx = vec![0; text.len()];
18877        let mut ctx_work = vec![0; text.len() + 1];
18878        assert_eq!(
18879            libsais16x64_unbwt(&bwt, &mut direct, &mut direct_work, Some(&freq), primary),
18880            libsais16x64_unbwt_ctx(
18881                &mut ctx,
18882                &bwt,
18883                &mut via_ctx,
18884                &mut ctx_work,
18885                Some(&freq),
18886                primary
18887            )
18888        );
18889        assert_eq!(via_ctx, direct);
18890        assert_eq!(via_ctx, text);
18891
18892        let mut aux = vec![0; (text.len() - 1) / 4 + 1];
18893        assert_eq!(
18894            libsais16x64_bwt_aux(&text, &mut bwt, &mut work, 0, Some(&mut freq), 4, &mut aux),
18895            0
18896        );
18897        direct.fill(0);
18898        direct_work.fill(0);
18899        via_ctx.fill(0);
18900        ctx_work.fill(0);
18901        assert_eq!(
18902            libsais16x64_unbwt_aux(&bwt, &mut direct, &mut direct_work, Some(&freq), 4, &aux),
18903            libsais16x64_unbwt_aux_ctx(
18904                &mut ctx,
18905                &bwt,
18906                &mut via_ctx,
18907                &mut ctx_work,
18908                Some(&freq),
18909                4,
18910                &aux
18911            )
18912        );
18913        assert_eq!(via_ctx, direct);
18914        assert_eq!(via_ctx, text);
18915    }
18916
18917    #[test]
18918    fn libsais16x64_omp_wrappers_match_direct_calls_and_reject_negative_threads() {
18919        let text = [2, 1, 3, 1, 2, 0];
18920        let gsa_text = [2, 1, 0, 3, 1, 0];
18921        let mut direct_sa = vec![0; text.len()];
18922        let mut omp_sa = vec![0; text.len()];
18923        assert_eq!(libsais16x64(&text, &mut direct_sa, 0, None), 0);
18924        assert_eq!(libsais16x64_omp(&text, &mut omp_sa, 0, None, 2), 0);
18925        assert_eq!(omp_sa, direct_sa);
18926        assert_eq!(libsais16x64_omp(&text, &mut omp_sa, 0, None, -1), -1);
18927
18928        let mut direct_gsa = vec![0; gsa_text.len()];
18929        let mut omp_gsa = vec![0; gsa_text.len()];
18930        assert_eq!(libsais16x64_gsa(&gsa_text, &mut direct_gsa, 0, None), 0);
18931        assert_eq!(libsais16x64_gsa_omp(&gsa_text, &mut omp_gsa, 0, None, 2), 0);
18932        assert_eq!(omp_gsa, direct_gsa);
18933        assert_eq!(
18934            libsais16x64_gsa_omp(&gsa_text, &mut omp_gsa, 0, None, -1),
18935            -1
18936        );
18937
18938        let int_text = [1, 2, 1, 0];
18939        let mut direct_int_text = int_text.to_vec();
18940        let mut omp_int_text = int_text.to_vec();
18941        let mut direct_int_sa = vec![0; int_text.len()];
18942        let mut omp_int_sa = vec![0; int_text.len()];
18943        assert_eq!(
18944            libsais16x64_long(&mut direct_int_text, &mut direct_int_sa, 3, 0),
18945            0
18946        );
18947        assert_eq!(
18948            libsais16x64_long_omp(&mut omp_int_text, &mut omp_int_sa, 3, 0, 2),
18949            0
18950        );
18951        assert_eq!(omp_int_text, direct_int_text);
18952        assert_eq!(omp_int_sa, direct_int_sa);
18953        assert_eq!(
18954            libsais16x64_long_omp(&mut omp_int_text, &mut omp_int_sa, 3, 0, -1),
18955            -1
18956        );
18957
18958        let mut direct_bwt = vec![0; text.len()];
18959        let mut direct_work = vec![0; text.len()];
18960        let mut omp_bwt = vec![0; text.len()];
18961        let mut omp_work = vec![0; text.len()];
18962        assert_eq!(
18963            libsais16x64_bwt(&text, &mut direct_bwt, &mut direct_work, 0, None),
18964            libsais16x64_bwt_omp(&text, &mut omp_bwt, &mut omp_work, 0, None, 2)
18965        );
18966        assert_eq!(omp_bwt, direct_bwt);
18967        assert_eq!(
18968            libsais16x64_bwt_omp(&text, &mut omp_bwt, &mut omp_work, 0, None, -1),
18969            -1
18970        );
18971
18972        let mut direct_aux = vec![0; 2];
18973        let mut omp_aux = vec![0; 2];
18974        assert_eq!(
18975            libsais16x64_bwt_aux(
18976                &text,
18977                &mut direct_bwt,
18978                &mut direct_work,
18979                0,
18980                None,
18981                4,
18982                &mut direct_aux
18983            ),
18984            libsais16x64_bwt_aux_omp(
18985                &text,
18986                &mut omp_bwt,
18987                &mut omp_work,
18988                0,
18989                None,
18990                4,
18991                &mut omp_aux,
18992                2
18993            )
18994        );
18995        assert_eq!(omp_bwt, direct_bwt);
18996        assert_eq!(omp_aux, direct_aux);
18997        assert_eq!(
18998            libsais16x64_bwt_aux_omp(
18999                &text,
19000                &mut omp_bwt,
19001                &mut omp_work,
19002                0,
19003                None,
19004                4,
19005                &mut omp_aux,
19006                -1
19007            ),
19008            -1
19009        );
19010    }
19011
19012    #[test]
19013    fn libsais16x64_omp_frequency_wrappers_match_direct_calls() {
19014        let text = [2, 1, 3, 1, 2, 0];
19015        let gsa_text = [2, 1, 0, 3, 1, 0];
19016        let mut direct_sa = vec![0; text.len()];
19017        let mut omp_sa = vec![0; text.len()];
19018        let mut direct_freq = vec![-1; ALPHABET_SIZE];
19019        let mut omp_freq = vec![-1; ALPHABET_SIZE];
19020        assert_eq!(
19021            libsais16x64(&text, &mut direct_sa, 0, Some(&mut direct_freq)),
19022            0
19023        );
19024        assert_eq!(
19025            libsais16x64_omp(&text, &mut omp_sa, 0, Some(&mut omp_freq), 2),
19026            0
19027        );
19028        assert_eq!(omp_sa, direct_sa);
19029        assert_eq!(omp_freq, direct_freq);
19030
19031        let mut direct_gsa = vec![0; gsa_text.len()];
19032        let mut omp_gsa = vec![0; gsa_text.len()];
19033        direct_freq.fill(-1);
19034        omp_freq.fill(-1);
19035        assert_eq!(
19036            libsais16x64_gsa(&gsa_text, &mut direct_gsa, 0, Some(&mut direct_freq)),
19037            0
19038        );
19039        assert_eq!(
19040            libsais16x64_gsa_omp(&gsa_text, &mut omp_gsa, 0, Some(&mut omp_freq), 2),
19041            0
19042        );
19043        assert_eq!(omp_gsa, direct_gsa);
19044        assert_eq!(omp_freq, direct_freq);
19045
19046        let mut direct_bwt = vec![0; text.len()];
19047        let mut direct_work = vec![0; text.len()];
19048        let mut omp_bwt = vec![0; text.len()];
19049        let mut omp_work = vec![0; text.len()];
19050        direct_freq.fill(-1);
19051        omp_freq.fill(-1);
19052        assert_eq!(
19053            libsais16x64_bwt(
19054                &text,
19055                &mut direct_bwt,
19056                &mut direct_work,
19057                0,
19058                Some(&mut direct_freq)
19059            ),
19060            libsais16x64_bwt_omp(
19061                &text,
19062                &mut omp_bwt,
19063                &mut omp_work,
19064                0,
19065                Some(&mut omp_freq),
19066                2
19067            )
19068        );
19069        assert_eq!(omp_bwt, direct_bwt);
19070        assert_eq!(omp_freq, direct_freq);
19071
19072        let mut direct_aux = vec![0; 2];
19073        let mut omp_aux = vec![0; 2];
19074        direct_freq.fill(-1);
19075        omp_freq.fill(-1);
19076        assert_eq!(
19077            libsais16x64_bwt_aux(
19078                &text,
19079                &mut direct_bwt,
19080                &mut direct_work,
19081                0,
19082                Some(&mut direct_freq),
19083                4,
19084                &mut direct_aux
19085            ),
19086            libsais16x64_bwt_aux_omp(
19087                &text,
19088                &mut omp_bwt,
19089                &mut omp_work,
19090                0,
19091                Some(&mut omp_freq),
19092                4,
19093                &mut omp_aux,
19094                2
19095            )
19096        );
19097        assert_eq!(omp_bwt, direct_bwt);
19098        assert_eq!(omp_aux, direct_aux);
19099        assert_eq!(omp_freq, direct_freq);
19100    }
19101
19102    #[test]
19103    fn libsais16x64_unbwt_omp_frequency_wrappers_match_direct_calls() {
19104        let text = [2, 1, 3, 1, 2, 0];
19105        let mut freq = vec![0; ALPHABET_SIZE];
19106        let mut bwt = vec![0; text.len()];
19107        let mut work = vec![0; text.len()];
19108        let primary = libsais16x64_bwt(&text, &mut bwt, &mut work, 0, Some(&mut freq));
19109        assert!(primary >= 0);
19110
19111        let mut direct = vec![0; text.len()];
19112        let mut direct_work = vec![0; text.len() + 1];
19113        let mut omp = vec![0; text.len()];
19114        let mut omp_work = vec![0; text.len() + 1];
19115        assert_eq!(
19116            libsais16x64_unbwt(&bwt, &mut direct, &mut direct_work, Some(&freq), primary),
19117            libsais16x64_unbwt_omp(&bwt, &mut omp, &mut omp_work, Some(&freq), primary, 2)
19118        );
19119        assert_eq!(omp, direct);
19120        assert_eq!(omp, text);
19121
19122        let mut aux = vec![0; (text.len() - 1) / 4 + 1];
19123        assert_eq!(
19124            libsais16x64_bwt_aux(&text, &mut bwt, &mut work, 0, Some(&mut freq), 4, &mut aux),
19125            0
19126        );
19127        direct.fill(0);
19128        direct_work.fill(0);
19129        omp.fill(0);
19130        omp_work.fill(0);
19131        assert_eq!(
19132            libsais16x64_unbwt_aux(&bwt, &mut direct, &mut direct_work, Some(&freq), 4, &aux),
19133            libsais16x64_unbwt_aux_omp(&bwt, &mut omp, &mut omp_work, Some(&freq), 4, &aux, 2)
19134        );
19135        assert_eq!(omp, direct);
19136        assert_eq!(omp, text);
19137    }
19138
19139    #[test]
19140    fn libsais16x64_lcp_and_unbwt_omp_wrappers_match_direct_calls() {
19141        let text = [2, 1, 3, 1, 2, 0];
19142        let mut sa = vec![0; text.len()];
19143        assert_eq!(libsais16x64(&text, &mut sa, 0, None), 0);
19144
19145        let mut direct_plcp = vec![0; text.len()];
19146        let mut omp_plcp = vec![0; text.len()];
19147        assert_eq!(libsais16x64_plcp(&text, &sa, &mut direct_plcp), 0);
19148        assert_eq!(libsais16x64_plcp_omp(&text, &sa, &mut omp_plcp, 2), 0);
19149        assert_eq!(omp_plcp, direct_plcp);
19150        assert_eq!(libsais16x64_plcp_omp(&text, &sa, &mut omp_plcp, -1), -1);
19151
19152        let gsa_text = [2, 1, 0, 1, 2, 0];
19153        let mut gsa = vec![0; gsa_text.len()];
19154        assert_eq!(libsais16x64_gsa(&gsa_text, &mut gsa, 0, None), 0);
19155        let mut direct_gsa_plcp = vec![0; gsa_text.len()];
19156        let mut omp_gsa_plcp = vec![0; gsa_text.len()];
19157        assert_eq!(
19158            libsais16x64_plcp_gsa(&gsa_text, &gsa, &mut direct_gsa_plcp),
19159            0
19160        );
19161        assert_eq!(
19162            libsais16x64_plcp_gsa_omp(&gsa_text, &gsa, &mut omp_gsa_plcp, 2),
19163            0
19164        );
19165        assert_eq!(omp_gsa_plcp, direct_gsa_plcp);
19166        assert_eq!(
19167            libsais16x64_plcp_gsa_omp(&gsa_text, &gsa, &mut omp_gsa_plcp, -1),
19168            -1
19169        );
19170
19171        let mut direct_lcp = vec![0; text.len()];
19172        let mut omp_lcp = vec![0; text.len()];
19173        assert_eq!(libsais16x64_lcp(&direct_plcp, &sa, &mut direct_lcp), 0);
19174        assert_eq!(libsais16x64_lcp_omp(&direct_plcp, &sa, &mut omp_lcp, 2), 0);
19175        assert_eq!(omp_lcp, direct_lcp);
19176        assert_eq!(
19177            libsais16x64_lcp_omp(&direct_plcp, &sa, &mut omp_lcp, -1),
19178            -1
19179        );
19180
19181        let mut bwt = vec![0; text.len()];
19182        let mut work = vec![0; text.len()];
19183        let primary = libsais16x64_bwt(&text, &mut bwt, &mut work, 0, None);
19184        let mut direct = vec![0; text.len()];
19185        let mut omp = vec![0; text.len()];
19186        let mut direct_work = vec![0; text.len()];
19187        let mut omp_work = vec![0; text.len()];
19188        assert_eq!(
19189            libsais16x64_unbwt(&bwt, &mut direct, &mut direct_work, None, primary),
19190            0
19191        );
19192        assert_eq!(
19193            libsais16x64_unbwt_omp(&bwt, &mut omp, &mut omp_work, None, primary, 2),
19194            0
19195        );
19196        assert_eq!(omp, direct);
19197        assert_eq!(
19198            libsais16x64_unbwt_omp(&bwt, &mut omp, &mut omp_work, None, primary, -1),
19199            -1
19200        );
19201    }
19202}